1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
33 #include "stringpool.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
44 #include "fold-const.h"
46 #include "stor-layout.h"
48 #include "print-tree.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
57 #include "sched-int.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
70 #include "tree-pass.h"
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #include "gstab.h" /* for N_SLINE */
78 #include "case-cfn-macros.h"
80 #include "tree-ssa-propagate.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 #ifndef TARGET_NO_PROTOTYPE
86 #define TARGET_NO_PROTOTYPE 0
89 #define min(A,B) ((A) < (B) ? (A) : (B))
90 #define max(A,B) ((A) > (B) ? (A) : (B))
92 /* Structure used to define the rs6000 stack */
93 typedef struct rs6000_stack
{
94 int reload_completed
; /* stack info won't change from here on */
95 int first_gp_reg_save
; /* first callee saved GP register used */
96 int first_fp_reg_save
; /* first callee saved FP register used */
97 int first_altivec_reg_save
; /* first callee saved AltiVec register used */
98 int lr_save_p
; /* true if the link reg needs to be saved */
99 int cr_save_p
; /* true if the CR reg needs to be saved */
100 unsigned int vrsave_mask
; /* mask of vec registers to save */
101 int push_p
; /* true if we need to allocate stack space */
102 int calls_p
; /* true if the function makes any calls */
103 int world_save_p
; /* true if we're saving *everything*:
104 r13-r31, cr, f14-f31, vrsave, v20-v31 */
105 enum rs6000_abi abi
; /* which ABI to use */
106 int gp_save_offset
; /* offset to save GP regs from initial SP */
107 int fp_save_offset
; /* offset to save FP regs from initial SP */
108 int altivec_save_offset
; /* offset to save AltiVec regs from initial SP */
109 int lr_save_offset
; /* offset to save LR from initial SP */
110 int cr_save_offset
; /* offset to save CR from initial SP */
111 int vrsave_save_offset
; /* offset to save VRSAVE from initial SP */
112 int varargs_save_offset
; /* offset to save the varargs registers */
113 int ehrd_offset
; /* offset to EH return data */
114 int ehcr_offset
; /* offset to EH CR field data */
115 int reg_size
; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size
; /* variable save area size */
117 int parm_size
; /* outgoing parameter size */
118 int save_size
; /* save area size */
119 int fixed_size
; /* fixed size of stack frame */
120 int gp_size
; /* size of saved GP registers */
121 int fp_size
; /* size of saved FP registers */
122 int altivec_size
; /* size of saved AltiVec registers */
123 int cr_size
; /* size to hold CR if not in fixed area */
124 int vrsave_size
; /* size to hold VRSAVE */
125 int altivec_padding_size
; /* size of altivec alignment padding */
126 HOST_WIDE_INT total_size
; /* total bytes allocated for stack */
130 /* A C structure for machine-specific, per-function data.
131 This is added to the cfun structure. */
132 typedef struct GTY(()) machine_function
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame
;
136 /* Flags if __builtin_return_address (0) was used. */
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue
;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset
;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot
;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer
;
152 bool split_stack_argp_used
;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed
;
155 /* The number of components we use for separate shrink-wrapping. */
157 /* The components already handled by separate shrink-wrapping, which should
158 not be considered by the prologue and epilogue. */
159 bool gpr_is_wrapped_separately
[32];
160 bool fpr_is_wrapped_separately
[32];
161 bool lr_is_wrapped_separately
;
164 /* Support targetm.vectorize.builtin_mask_for_load. */
165 static GTY(()) tree altivec_builtin_mask_for_load
;
167 /* Set to nonzero once AIX common-mode calls have been defined. */
168 static GTY(()) int common_mode_defined
;
170 /* Label number of label created for -mrelocatable, to call to so we can
171 get the address of the GOT section */
172 static int rs6000_pic_labelno
;
175 /* Counter for labels which are to be placed in .fixup. */
176 int fixuplabelno
= 0;
179 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
182 /* Specify the machine mode that pointers have. After generation of rtl, the
183 compiler makes no further distinction between pointers and any other objects
184 of this machine mode. The type is unsigned since not all things that
185 include rs6000.h also include machmode.h. */
186 unsigned rs6000_pmode
;
188 /* Width in bits of a pointer. */
189 unsigned rs6000_pointer_size
;
191 #ifdef HAVE_AS_GNU_ATTRIBUTE
192 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
193 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
195 /* Flag whether floating point values have been passed/returned.
196 Note that this doesn't say whether fprs are used, since the
197 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
198 should be set for soft-float values passed in gprs and ieee128
199 values passed in vsx registers. */
200 static bool rs6000_passes_float
;
201 static bool rs6000_passes_long_double
;
202 /* Flag whether vector values have been passed/returned. */
203 static bool rs6000_passes_vector
;
204 /* Flag whether small (<= 8 byte) structures have been returned. */
205 static bool rs6000_returns_struct
;
208 /* Value is TRUE if register/mode pair is acceptable. */
209 bool rs6000_hard_regno_mode_ok_p
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
211 /* Maximum number of registers needed for a given register class and mode. */
212 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
214 /* How many registers are needed for a given register and mode. */
215 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
217 /* Map register number to register class. */
218 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
220 static int dbg_cost_ctrl
;
222 /* Built in types. */
223 tree rs6000_builtin_types
[RS6000_BTI_MAX
];
224 tree rs6000_builtin_decls
[RS6000_BUILTIN_COUNT
];
226 /* Flag to say the TOC is initialized */
227 int toc_initialized
, need_toc_init
;
228 char toc_label_name
[10];
230 /* Cached value of rs6000_variable_issue. This is cached in
231 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
232 static short cached_can_issue_more
;
234 static GTY(()) section
*read_only_data_section
;
235 static GTY(()) section
*private_data_section
;
236 static GTY(()) section
*tls_data_section
;
237 static GTY(()) section
*tls_private_data_section
;
238 static GTY(()) section
*read_only_private_data_section
;
239 static GTY(()) section
*sdata2_section
;
240 static GTY(()) section
*toc_section
;
242 struct builtin_description
244 const HOST_WIDE_INT mask
;
245 const enum insn_code icode
;
246 const char *const name
;
247 const enum rs6000_builtins code
;
250 /* Describe the vector unit used for modes. */
251 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
252 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
254 /* Register classes for various constraints that are based on the target
256 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
258 /* Describe the alignment of a vector. */
259 int rs6000_vector_align
[NUM_MACHINE_MODES
];
261 /* Map selected modes to types for builtins. */
262 static GTY(()) tree builtin_mode_to_type
[MAX_MACHINE_MODE
][2];
264 /* What modes to automatically generate reciprocal divide estimate (fre) and
265 reciprocal sqrt (frsqrte) for. */
266 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
268 /* Masks to determine which reciprocal esitmate instructions to generate
270 enum rs6000_recip_mask
{
271 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
272 RECIP_DF_DIV
= 0x002,
273 RECIP_V4SF_DIV
= 0x004,
274 RECIP_V2DF_DIV
= 0x008,
276 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
277 RECIP_DF_RSQRT
= 0x020,
278 RECIP_V4SF_RSQRT
= 0x040,
279 RECIP_V2DF_RSQRT
= 0x080,
281 /* Various combination of flags for -mrecip=xxx. */
283 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
284 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
285 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
287 RECIP_HIGH_PRECISION
= RECIP_ALL
,
289 /* On low precision machines like the power5, don't enable double precision
290 reciprocal square root estimate, since it isn't accurate enough. */
291 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
294 /* -mrecip options. */
297 const char *string
; /* option name */
298 unsigned int mask
; /* mask bits to set */
299 } recip_options
[] = {
300 { "all", RECIP_ALL
},
301 { "none", RECIP_NONE
},
302 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
304 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
305 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
306 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
307 | RECIP_V2DF_RSQRT
) },
308 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
309 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
312 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
318 { "power9", PPC_PLATFORM_POWER9
},
319 { "power8", PPC_PLATFORM_POWER8
},
320 { "power7", PPC_PLATFORM_POWER7
},
321 { "power6x", PPC_PLATFORM_POWER6X
},
322 { "power6", PPC_PLATFORM_POWER6
},
323 { "power5+", PPC_PLATFORM_POWER5_PLUS
},
324 { "power5", PPC_PLATFORM_POWER5
},
325 { "ppc970", PPC_PLATFORM_PPC970
},
326 { "power4", PPC_PLATFORM_POWER4
},
327 { "ppca2", PPC_PLATFORM_PPCA2
},
328 { "ppc476", PPC_PLATFORM_PPC476
},
329 { "ppc464", PPC_PLATFORM_PPC464
},
330 { "ppc440", PPC_PLATFORM_PPC440
},
331 { "ppc405", PPC_PLATFORM_PPC405
},
332 { "ppc-cell-be", PPC_PLATFORM_CELL_BE
}
335 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
341 } cpu_supports_info
[] = {
342 /* AT_HWCAP masks. */
343 { "4xxmac", PPC_FEATURE_HAS_4xxMAC
, 0 },
344 { "altivec", PPC_FEATURE_HAS_ALTIVEC
, 0 },
345 { "arch_2_05", PPC_FEATURE_ARCH_2_05
, 0 },
346 { "arch_2_06", PPC_FEATURE_ARCH_2_06
, 0 },
347 { "archpmu", PPC_FEATURE_PERFMON_COMPAT
, 0 },
348 { "booke", PPC_FEATURE_BOOKE
, 0 },
349 { "cellbe", PPC_FEATURE_CELL_BE
, 0 },
350 { "dfp", PPC_FEATURE_HAS_DFP
, 0 },
351 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE
, 0 },
352 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE
, 0 },
353 { "fpu", PPC_FEATURE_HAS_FPU
, 0 },
354 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP
, 0 },
355 { "mmu", PPC_FEATURE_HAS_MMU
, 0 },
356 { "notb", PPC_FEATURE_NO_TB
, 0 },
357 { "pa6t", PPC_FEATURE_PA6T
, 0 },
358 { "power4", PPC_FEATURE_POWER4
, 0 },
359 { "power5", PPC_FEATURE_POWER5
, 0 },
360 { "power5+", PPC_FEATURE_POWER5_PLUS
, 0 },
361 { "power6x", PPC_FEATURE_POWER6_EXT
, 0 },
362 { "ppc32", PPC_FEATURE_32
, 0 },
363 { "ppc601", PPC_FEATURE_601_INSTR
, 0 },
364 { "ppc64", PPC_FEATURE_64
, 0 },
365 { "ppcle", PPC_FEATURE_PPC_LE
, 0 },
366 { "smt", PPC_FEATURE_SMT
, 0 },
367 { "spe", PPC_FEATURE_HAS_SPE
, 0 },
368 { "true_le", PPC_FEATURE_TRUE_LE
, 0 },
369 { "ucache", PPC_FEATURE_UNIFIED_CACHE
, 0 },
370 { "vsx", PPC_FEATURE_HAS_VSX
, 0 },
372 /* AT_HWCAP2 masks. */
373 { "arch_2_07", PPC_FEATURE2_ARCH_2_07
, 1 },
374 { "dscr", PPC_FEATURE2_HAS_DSCR
, 1 },
375 { "ebb", PPC_FEATURE2_HAS_EBB
, 1 },
376 { "htm", PPC_FEATURE2_HAS_HTM
, 1 },
377 { "htm-nosc", PPC_FEATURE2_HTM_NOSC
, 1 },
378 { "isel", PPC_FEATURE2_HAS_ISEL
, 1 },
379 { "tar", PPC_FEATURE2_HAS_TAR
, 1 },
380 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO
, 1 },
381 { "arch_3_00", PPC_FEATURE2_ARCH_3_00
, 1 },
382 { "ieee128", PPC_FEATURE2_HAS_IEEE128
, 1 }
385 /* On PowerPC, we have a limited number of target clones that we care about
386 which means we can use an array to hold the options, rather than having more
387 elaborate data structures to identify each possible variation. Order the
388 clones from the default to the highest ISA. */
390 CLONE_DEFAULT
= 0, /* default clone. */
391 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
392 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
393 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
394 CLONE_ISA_3_00
, /* ISA 3.00 (power9). */
398 /* Map compiler ISA bits into HWCAP names. */
400 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
401 const char *name
; /* name to use in __builtin_cpu_supports. */
404 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
405 { 0, "" }, /* Default options. */
406 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
407 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
408 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
409 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.00 (power9). */
413 /* Newer LIBCs explicitly export this symbol to declare that they provide
414 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
415 reference to this symbol whenever we expand a CPU builtin, so that
416 we never link against an old LIBC. */
417 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
419 /* True if we have expanded a CPU builtin. */
422 /* Pointer to function (in rs6000-c.c) that can define or undefine target
423 macros that have changed. Languages that don't support the preprocessor
424 don't link in rs6000-c.c, so we can't call it directly. */
425 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
, HOST_WIDE_INT
);
427 /* Simplfy register classes into simpler classifications. We assume
428 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
429 check for standard register classes (gpr/floating/altivec/vsx) and
430 floating/vector classes (float/altivec/vsx). */
432 enum rs6000_reg_type
{
443 /* Map register class to register type. */
444 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
446 /* First/last register type for the 'normal' register types (i.e. general
447 purpose, floating point, altivec, and VSX registers). */
448 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
450 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
453 /* Register classes we care about in secondary reload or go if legitimate
454 address. We only need to worry about GPR, FPR, and Altivec registers here,
455 along an ANY field that is the OR of the 3 register classes. */
457 enum rs6000_reload_reg_type
{
458 RELOAD_REG_GPR
, /* General purpose registers. */
459 RELOAD_REG_FPR
, /* Traditional floating point regs. */
460 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
461 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
465 /* For setting up register classes, loop through the 3 register classes mapping
466 into real registers, and skip the ANY class, which is just an OR of the
468 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
469 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
471 /* Map reload register type to a register in the register class. */
472 struct reload_reg_map_type
{
473 const char *name
; /* Register class name. */
474 int reg
; /* Register in the register class. */
477 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
478 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
479 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
480 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
481 { "Any", -1 }, /* RELOAD_REG_ANY. */
484 /* Mask bits for each register class, indexed per mode. Historically the
485 compiler has been more restrictive which types can do PRE_MODIFY instead of
486 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
487 typedef unsigned char addr_mask_type
;
489 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
490 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
491 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
492 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
493 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
494 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
495 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
496 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
498 /* Register type masks based on the type, of valid addressing modes. */
499 struct rs6000_reg_addr
{
500 enum insn_code reload_load
; /* INSN to reload for loading. */
501 enum insn_code reload_store
; /* INSN to reload for storing. */
502 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
503 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
504 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
505 enum insn_code fusion_gpr_ld
; /* INSN for fusing gpr ADDIS/loads. */
506 /* INSNs for fusing addi with loads
507 or stores for each reg. class. */
508 enum insn_code fusion_addi_ld
[(int)N_RELOAD_REG
];
509 enum insn_code fusion_addi_st
[(int)N_RELOAD_REG
];
510 /* INSNs for fusing addis with loads
511 or stores for each reg. class. */
512 enum insn_code fusion_addis_ld
[(int)N_RELOAD_REG
];
513 enum insn_code fusion_addis_st
[(int)N_RELOAD_REG
];
514 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
515 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
516 bool fused_toc
; /* Mode supports TOC fusion. */
519 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
521 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
523 mode_supports_pre_incdec_p (machine_mode mode
)
525 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
529 /* Helper function to say whether a mode supports PRE_MODIFY. */
531 mode_supports_pre_modify_p (machine_mode mode
)
533 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
537 /* Given that there exists at least one variable that is set (produced)
538 by OUT_INSN and read (consumed) by IN_INSN, return true iff
539 IN_INSN represents one or more memory store operations and none of
540 the variables set by OUT_INSN is used by IN_INSN as the address of a
541 store operation. If either IN_INSN or OUT_INSN does not represent
542 a "single" RTL SET expression (as loosely defined by the
543 implementation of the single_set function) or a PARALLEL with only
544 SETs, CLOBBERs, and USEs inside, this function returns false.
546 This rs6000-specific version of store_data_bypass_p checks for
547 certain conditions that result in assertion failures (and internal
548 compiler errors) in the generic store_data_bypass_p function and
549 returns false rather than calling store_data_bypass_p if one of the
550 problematic conditions is detected. */
553 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
560 in_set
= single_set (in_insn
);
563 if (MEM_P (SET_DEST (in_set
)))
565 out_set
= single_set (out_insn
);
568 out_pat
= PATTERN (out_insn
);
569 if (GET_CODE (out_pat
) == PARALLEL
)
571 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
573 out_exp
= XVECEXP (out_pat
, 0, i
);
574 if ((GET_CODE (out_exp
) == CLOBBER
)
575 || (GET_CODE (out_exp
) == USE
))
577 else if (GET_CODE (out_exp
) != SET
)
586 in_pat
= PATTERN (in_insn
);
587 if (GET_CODE (in_pat
) != PARALLEL
)
590 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
592 in_exp
= XVECEXP (in_pat
, 0, i
);
593 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
595 else if (GET_CODE (in_exp
) != SET
)
598 if (MEM_P (SET_DEST (in_exp
)))
600 out_set
= single_set (out_insn
);
603 out_pat
= PATTERN (out_insn
);
604 if (GET_CODE (out_pat
) != PARALLEL
)
606 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
608 out_exp
= XVECEXP (out_pat
, 0, j
);
609 if ((GET_CODE (out_exp
) == CLOBBER
)
610 || (GET_CODE (out_exp
) == USE
))
612 else if (GET_CODE (out_exp
) != SET
)
619 return store_data_bypass_p (out_insn
, in_insn
);
622 /* Return true if we have D-form addressing in altivec registers. */
624 mode_supports_vmx_dform (machine_mode mode
)
626 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
629 /* Return true if we have D-form addressing in VSX registers. This addressing
630 is more limited than normal d-form addressing in that the offset must be
631 aligned on a 16-byte boundary. */
633 mode_supports_vsx_dform_quad (machine_mode mode
)
635 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
640 /* Target cpu costs. */
642 struct processor_costs
{
643 const int mulsi
; /* cost of SImode multiplication. */
644 const int mulsi_const
; /* cost of SImode multiplication by constant. */
645 const int mulsi_const9
; /* cost of SImode mult by short constant. */
646 const int muldi
; /* cost of DImode multiplication. */
647 const int divsi
; /* cost of SImode division. */
648 const int divdi
; /* cost of DImode division. */
649 const int fp
; /* cost of simple SFmode and DFmode insns. */
650 const int dmul
; /* cost of DFmode multiplication (and fmadd). */
651 const int sdiv
; /* cost of SFmode division (fdivs). */
652 const int ddiv
; /* cost of DFmode division (fdiv). */
653 const int cache_line_size
; /* cache line size in bytes. */
654 const int l1_cache_size
; /* size of l1 cache, in kilobytes. */
655 const int l2_cache_size
; /* size of l2 cache, in kilobytes. */
656 const int simultaneous_prefetches
; /* number of parallel prefetch
658 const int sfdf_convert
; /* cost of SF->DF conversion. */
661 const struct processor_costs
*rs6000_cost
;
663 /* Processor costs (relative to an add) */
665 /* Instruction size costs on 32bit processors. */
667 struct processor_costs size32_cost
= {
668 COSTS_N_INSNS (1), /* mulsi */
669 COSTS_N_INSNS (1), /* mulsi_const */
670 COSTS_N_INSNS (1), /* mulsi_const9 */
671 COSTS_N_INSNS (1), /* muldi */
672 COSTS_N_INSNS (1), /* divsi */
673 COSTS_N_INSNS (1), /* divdi */
674 COSTS_N_INSNS (1), /* fp */
675 COSTS_N_INSNS (1), /* dmul */
676 COSTS_N_INSNS (1), /* sdiv */
677 COSTS_N_INSNS (1), /* ddiv */
678 32, /* cache line size */
682 0, /* SF->DF convert */
685 /* Instruction size costs on 64bit processors. */
687 struct processor_costs size64_cost
= {
688 COSTS_N_INSNS (1), /* mulsi */
689 COSTS_N_INSNS (1), /* mulsi_const */
690 COSTS_N_INSNS (1), /* mulsi_const9 */
691 COSTS_N_INSNS (1), /* muldi */
692 COSTS_N_INSNS (1), /* divsi */
693 COSTS_N_INSNS (1), /* divdi */
694 COSTS_N_INSNS (1), /* fp */
695 COSTS_N_INSNS (1), /* dmul */
696 COSTS_N_INSNS (1), /* sdiv */
697 COSTS_N_INSNS (1), /* ddiv */
698 128, /* cache line size */
702 0, /* SF->DF convert */
705 /* Instruction costs on RS64A processors. */
707 struct processor_costs rs64a_cost
= {
708 COSTS_N_INSNS (20), /* mulsi */
709 COSTS_N_INSNS (12), /* mulsi_const */
710 COSTS_N_INSNS (8), /* mulsi_const9 */
711 COSTS_N_INSNS (34), /* muldi */
712 COSTS_N_INSNS (65), /* divsi */
713 COSTS_N_INSNS (67), /* divdi */
714 COSTS_N_INSNS (4), /* fp */
715 COSTS_N_INSNS (4), /* dmul */
716 COSTS_N_INSNS (31), /* sdiv */
717 COSTS_N_INSNS (31), /* ddiv */
718 128, /* cache line size */
722 0, /* SF->DF convert */
725 /* Instruction costs on MPCCORE processors. */
727 struct processor_costs mpccore_cost
= {
728 COSTS_N_INSNS (2), /* mulsi */
729 COSTS_N_INSNS (2), /* mulsi_const */
730 COSTS_N_INSNS (2), /* mulsi_const9 */
731 COSTS_N_INSNS (2), /* muldi */
732 COSTS_N_INSNS (6), /* divsi */
733 COSTS_N_INSNS (6), /* divdi */
734 COSTS_N_INSNS (4), /* fp */
735 COSTS_N_INSNS (5), /* dmul */
736 COSTS_N_INSNS (10), /* sdiv */
737 COSTS_N_INSNS (17), /* ddiv */
738 32, /* cache line size */
742 0, /* SF->DF convert */
745 /* Instruction costs on PPC403 processors. */
747 struct processor_costs ppc403_cost
= {
748 COSTS_N_INSNS (4), /* mulsi */
749 COSTS_N_INSNS (4), /* mulsi_const */
750 COSTS_N_INSNS (4), /* mulsi_const9 */
751 COSTS_N_INSNS (4), /* muldi */
752 COSTS_N_INSNS (33), /* divsi */
753 COSTS_N_INSNS (33), /* divdi */
754 COSTS_N_INSNS (11), /* fp */
755 COSTS_N_INSNS (11), /* dmul */
756 COSTS_N_INSNS (11), /* sdiv */
757 COSTS_N_INSNS (11), /* ddiv */
758 32, /* cache line size */
762 0, /* SF->DF convert */
765 /* Instruction costs on PPC405 processors. */
767 struct processor_costs ppc405_cost
= {
768 COSTS_N_INSNS (5), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (3), /* mulsi_const9 */
771 COSTS_N_INSNS (5), /* muldi */
772 COSTS_N_INSNS (35), /* divsi */
773 COSTS_N_INSNS (35), /* divdi */
774 COSTS_N_INSNS (11), /* fp */
775 COSTS_N_INSNS (11), /* dmul */
776 COSTS_N_INSNS (11), /* sdiv */
777 COSTS_N_INSNS (11), /* ddiv */
778 32, /* cache line size */
782 0, /* SF->DF convert */
785 /* Instruction costs on PPC440 processors. */
787 struct processor_costs ppc440_cost
= {
788 COSTS_N_INSNS (3), /* mulsi */
789 COSTS_N_INSNS (2), /* mulsi_const */
790 COSTS_N_INSNS (2), /* mulsi_const9 */
791 COSTS_N_INSNS (3), /* muldi */
792 COSTS_N_INSNS (34), /* divsi */
793 COSTS_N_INSNS (34), /* divdi */
794 COSTS_N_INSNS (5), /* fp */
795 COSTS_N_INSNS (5), /* dmul */
796 COSTS_N_INSNS (19), /* sdiv */
797 COSTS_N_INSNS (33), /* ddiv */
798 32, /* cache line size */
802 0, /* SF->DF convert */
805 /* Instruction costs on PPC476 processors. */
807 struct processor_costs ppc476_cost
= {
808 COSTS_N_INSNS (4), /* mulsi */
809 COSTS_N_INSNS (4), /* mulsi_const */
810 COSTS_N_INSNS (4), /* mulsi_const9 */
811 COSTS_N_INSNS (4), /* muldi */
812 COSTS_N_INSNS (11), /* divsi */
813 COSTS_N_INSNS (11), /* divdi */
814 COSTS_N_INSNS (6), /* fp */
815 COSTS_N_INSNS (6), /* dmul */
816 COSTS_N_INSNS (19), /* sdiv */
817 COSTS_N_INSNS (33), /* ddiv */
818 32, /* l1 cache line size */
822 0, /* SF->DF convert */
825 /* Instruction costs on PPC601 processors. */
827 struct processor_costs ppc601_cost
= {
828 COSTS_N_INSNS (5), /* mulsi */
829 COSTS_N_INSNS (5), /* mulsi_const */
830 COSTS_N_INSNS (5), /* mulsi_const9 */
831 COSTS_N_INSNS (5), /* muldi */
832 COSTS_N_INSNS (36), /* divsi */
833 COSTS_N_INSNS (36), /* divdi */
834 COSTS_N_INSNS (4), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (17), /* sdiv */
837 COSTS_N_INSNS (31), /* ddiv */
838 32, /* cache line size */
842 0, /* SF->DF convert */
845 /* Instruction costs on PPC603 processors. */
847 struct processor_costs ppc603_cost
= {
848 COSTS_N_INSNS (5), /* mulsi */
849 COSTS_N_INSNS (3), /* mulsi_const */
850 COSTS_N_INSNS (2), /* mulsi_const9 */
851 COSTS_N_INSNS (5), /* muldi */
852 COSTS_N_INSNS (37), /* divsi */
853 COSTS_N_INSNS (37), /* divdi */
854 COSTS_N_INSNS (3), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (18), /* sdiv */
857 COSTS_N_INSNS (33), /* ddiv */
858 32, /* cache line size */
862 0, /* SF->DF convert */
865 /* Instruction costs on PPC604 processors. */
867 struct processor_costs ppc604_cost
= {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (20), /* divsi */
873 COSTS_N_INSNS (20), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (3), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (32), /* ddiv */
878 32, /* cache line size */
882 0, /* SF->DF convert */
885 /* Instruction costs on PPC604e processors. */
887 struct processor_costs ppc604e_cost
= {
888 COSTS_N_INSNS (2), /* mulsi */
889 COSTS_N_INSNS (2), /* mulsi_const */
890 COSTS_N_INSNS (2), /* mulsi_const9 */
891 COSTS_N_INSNS (2), /* muldi */
892 COSTS_N_INSNS (20), /* divsi */
893 COSTS_N_INSNS (20), /* divdi */
894 COSTS_N_INSNS (3), /* fp */
895 COSTS_N_INSNS (3), /* dmul */
896 COSTS_N_INSNS (18), /* sdiv */
897 COSTS_N_INSNS (32), /* ddiv */
898 32, /* cache line size */
902 0, /* SF->DF convert */
905 /* Instruction costs on PPC620 processors. */
907 struct processor_costs ppc620_cost
= {
908 COSTS_N_INSNS (5), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (3), /* mulsi_const9 */
911 COSTS_N_INSNS (7), /* muldi */
912 COSTS_N_INSNS (21), /* divsi */
913 COSTS_N_INSNS (37), /* divdi */
914 COSTS_N_INSNS (3), /* fp */
915 COSTS_N_INSNS (3), /* dmul */
916 COSTS_N_INSNS (18), /* sdiv */
917 COSTS_N_INSNS (32), /* ddiv */
918 128, /* cache line size */
922 0, /* SF->DF convert */
925 /* Instruction costs on PPC630 processors. */
927 struct processor_costs ppc630_cost
= {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (4), /* mulsi_const */
930 COSTS_N_INSNS (3), /* mulsi_const9 */
931 COSTS_N_INSNS (7), /* muldi */
932 COSTS_N_INSNS (21), /* divsi */
933 COSTS_N_INSNS (37), /* divdi */
934 COSTS_N_INSNS (3), /* fp */
935 COSTS_N_INSNS (3), /* dmul */
936 COSTS_N_INSNS (17), /* sdiv */
937 COSTS_N_INSNS (21), /* ddiv */
938 128, /* cache line size */
942 0, /* SF->DF convert */
945 /* Instruction costs on Cell processor. */
946 /* COSTS_N_INSNS (1) ~ one add. */
948 struct processor_costs ppccell_cost
= {
949 COSTS_N_INSNS (9/2)+2, /* mulsi */
950 COSTS_N_INSNS (6/2), /* mulsi_const */
951 COSTS_N_INSNS (6/2), /* mulsi_const9 */
952 COSTS_N_INSNS (15/2)+2, /* muldi */
953 COSTS_N_INSNS (38/2), /* divsi */
954 COSTS_N_INSNS (70/2), /* divdi */
955 COSTS_N_INSNS (10/2), /* fp */
956 COSTS_N_INSNS (10/2), /* dmul */
957 COSTS_N_INSNS (74/2), /* sdiv */
958 COSTS_N_INSNS (74/2), /* ddiv */
959 128, /* cache line size */
963 0, /* SF->DF convert */
966 /* Instruction costs on PPC750 and PPC7400 processors. */
968 struct processor_costs ppc750_cost
= {
969 COSTS_N_INSNS (5), /* mulsi */
970 COSTS_N_INSNS (3), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (5), /* muldi */
973 COSTS_N_INSNS (17), /* divsi */
974 COSTS_N_INSNS (17), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (17), /* sdiv */
978 COSTS_N_INSNS (31), /* ddiv */
979 32, /* cache line size */
983 0, /* SF->DF convert */
986 /* Instruction costs on PPC7450 processors. */
988 struct processor_costs ppc7450_cost
= {
989 COSTS_N_INSNS (4), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (4), /* muldi */
993 COSTS_N_INSNS (23), /* divsi */
994 COSTS_N_INSNS (23), /* divdi */
995 COSTS_N_INSNS (5), /* fp */
996 COSTS_N_INSNS (5), /* dmul */
997 COSTS_N_INSNS (21), /* sdiv */
998 COSTS_N_INSNS (35), /* ddiv */
999 32, /* cache line size */
1001 1024, /* l2 cache */
1003 0, /* SF->DF convert */
1006 /* Instruction costs on PPC8540 processors. */
1008 struct processor_costs ppc8540_cost
= {
1009 COSTS_N_INSNS (4), /* mulsi */
1010 COSTS_N_INSNS (4), /* mulsi_const */
1011 COSTS_N_INSNS (4), /* mulsi_const9 */
1012 COSTS_N_INSNS (4), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (19), /* divdi */
1015 COSTS_N_INSNS (4), /* fp */
1016 COSTS_N_INSNS (4), /* dmul */
1017 COSTS_N_INSNS (29), /* sdiv */
1018 COSTS_N_INSNS (29), /* ddiv */
1019 32, /* cache line size */
1022 1, /* prefetch streams /*/
1023 0, /* SF->DF convert */
1026 /* Instruction costs on E300C2 and E300C3 cores. */
1028 struct processor_costs ppce300c2c3_cost
= {
1029 COSTS_N_INSNS (4), /* mulsi */
1030 COSTS_N_INSNS (4), /* mulsi_const */
1031 COSTS_N_INSNS (4), /* mulsi_const9 */
1032 COSTS_N_INSNS (4), /* muldi */
1033 COSTS_N_INSNS (19), /* divsi */
1034 COSTS_N_INSNS (19), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (4), /* dmul */
1037 COSTS_N_INSNS (18), /* sdiv */
1038 COSTS_N_INSNS (33), /* ddiv */
1042 1, /* prefetch streams /*/
1043 0, /* SF->DF convert */
1046 /* Instruction costs on PPCE500MC processors. */
1048 struct processor_costs ppce500mc_cost
= {
1049 COSTS_N_INSNS (4), /* mulsi */
1050 COSTS_N_INSNS (4), /* mulsi_const */
1051 COSTS_N_INSNS (4), /* mulsi_const9 */
1052 COSTS_N_INSNS (4), /* muldi */
1053 COSTS_N_INSNS (14), /* divsi */
1054 COSTS_N_INSNS (14), /* divdi */
1055 COSTS_N_INSNS (8), /* fp */
1056 COSTS_N_INSNS (10), /* dmul */
1057 COSTS_N_INSNS (36), /* sdiv */
1058 COSTS_N_INSNS (66), /* ddiv */
1059 64, /* cache line size */
1062 1, /* prefetch streams /*/
1063 0, /* SF->DF convert */
1066 /* Instruction costs on PPCE500MC64 processors. */
1068 struct processor_costs ppce500mc64_cost
= {
1069 COSTS_N_INSNS (4), /* mulsi */
1070 COSTS_N_INSNS (4), /* mulsi_const */
1071 COSTS_N_INSNS (4), /* mulsi_const9 */
1072 COSTS_N_INSNS (4), /* muldi */
1073 COSTS_N_INSNS (14), /* divsi */
1074 COSTS_N_INSNS (14), /* divdi */
1075 COSTS_N_INSNS (4), /* fp */
1076 COSTS_N_INSNS (10), /* dmul */
1077 COSTS_N_INSNS (36), /* sdiv */
1078 COSTS_N_INSNS (66), /* ddiv */
1079 64, /* cache line size */
1082 1, /* prefetch streams /*/
1083 0, /* SF->DF convert */
1086 /* Instruction costs on PPCE5500 processors. */
1088 struct processor_costs ppce5500_cost
= {
1089 COSTS_N_INSNS (5), /* mulsi */
1090 COSTS_N_INSNS (5), /* mulsi_const */
1091 COSTS_N_INSNS (4), /* mulsi_const9 */
1092 COSTS_N_INSNS (5), /* muldi */
1093 COSTS_N_INSNS (14), /* divsi */
1094 COSTS_N_INSNS (14), /* divdi */
1095 COSTS_N_INSNS (7), /* fp */
1096 COSTS_N_INSNS (10), /* dmul */
1097 COSTS_N_INSNS (36), /* sdiv */
1098 COSTS_N_INSNS (66), /* ddiv */
1099 64, /* cache line size */
1102 1, /* prefetch streams /*/
1103 0, /* SF->DF convert */
1106 /* Instruction costs on PPCE6500 processors. */
1108 struct processor_costs ppce6500_cost
= {
1109 COSTS_N_INSNS (5), /* mulsi */
1110 COSTS_N_INSNS (5), /* mulsi_const */
1111 COSTS_N_INSNS (4), /* mulsi_const9 */
1112 COSTS_N_INSNS (5), /* muldi */
1113 COSTS_N_INSNS (14), /* divsi */
1114 COSTS_N_INSNS (14), /* divdi */
1115 COSTS_N_INSNS (7), /* fp */
1116 COSTS_N_INSNS (10), /* dmul */
1117 COSTS_N_INSNS (36), /* sdiv */
1118 COSTS_N_INSNS (66), /* ddiv */
1119 64, /* cache line size */
1122 1, /* prefetch streams /*/
1123 0, /* SF->DF convert */
1126 /* Instruction costs on AppliedMicro Titan processors. */
1128 struct processor_costs titan_cost
= {
1129 COSTS_N_INSNS (5), /* mulsi */
1130 COSTS_N_INSNS (5), /* mulsi_const */
1131 COSTS_N_INSNS (5), /* mulsi_const9 */
1132 COSTS_N_INSNS (5), /* muldi */
1133 COSTS_N_INSNS (18), /* divsi */
1134 COSTS_N_INSNS (18), /* divdi */
1135 COSTS_N_INSNS (10), /* fp */
1136 COSTS_N_INSNS (10), /* dmul */
1137 COSTS_N_INSNS (46), /* sdiv */
1138 COSTS_N_INSNS (72), /* ddiv */
1139 32, /* cache line size */
1142 1, /* prefetch streams /*/
1143 0, /* SF->DF convert */
1146 /* Instruction costs on POWER4 and POWER5 processors. */
1148 struct processor_costs power4_cost
= {
1149 COSTS_N_INSNS (3), /* mulsi */
1150 COSTS_N_INSNS (2), /* mulsi_const */
1151 COSTS_N_INSNS (2), /* mulsi_const9 */
1152 COSTS_N_INSNS (4), /* muldi */
1153 COSTS_N_INSNS (18), /* divsi */
1154 COSTS_N_INSNS (34), /* divdi */
1155 COSTS_N_INSNS (3), /* fp */
1156 COSTS_N_INSNS (3), /* dmul */
1157 COSTS_N_INSNS (17), /* sdiv */
1158 COSTS_N_INSNS (17), /* ddiv */
1159 128, /* cache line size */
1161 1024, /* l2 cache */
1162 8, /* prefetch streams /*/
1163 0, /* SF->DF convert */
1166 /* Instruction costs on POWER6 processors. */
1168 struct processor_costs power6_cost
= {
1169 COSTS_N_INSNS (8), /* mulsi */
1170 COSTS_N_INSNS (8), /* mulsi_const */
1171 COSTS_N_INSNS (8), /* mulsi_const9 */
1172 COSTS_N_INSNS (8), /* muldi */
1173 COSTS_N_INSNS (22), /* divsi */
1174 COSTS_N_INSNS (28), /* divdi */
1175 COSTS_N_INSNS (3), /* fp */
1176 COSTS_N_INSNS (3), /* dmul */
1177 COSTS_N_INSNS (13), /* sdiv */
1178 COSTS_N_INSNS (16), /* ddiv */
1179 128, /* cache line size */
1181 2048, /* l2 cache */
1182 16, /* prefetch streams */
1183 0, /* SF->DF convert */
1186 /* Instruction costs on POWER7 processors. */
1188 struct processor_costs power7_cost
= {
1189 COSTS_N_INSNS (2), /* mulsi */
1190 COSTS_N_INSNS (2), /* mulsi_const */
1191 COSTS_N_INSNS (2), /* mulsi_const9 */
1192 COSTS_N_INSNS (2), /* muldi */
1193 COSTS_N_INSNS (18), /* divsi */
1194 COSTS_N_INSNS (34), /* divdi */
1195 COSTS_N_INSNS (3), /* fp */
1196 COSTS_N_INSNS (3), /* dmul */
1197 COSTS_N_INSNS (13), /* sdiv */
1198 COSTS_N_INSNS (16), /* ddiv */
1199 128, /* cache line size */
1202 12, /* prefetch streams */
1203 COSTS_N_INSNS (3), /* SF->DF convert */
1206 /* Instruction costs on POWER8 processors. */
1208 struct processor_costs power8_cost
= {
1209 COSTS_N_INSNS (3), /* mulsi */
1210 COSTS_N_INSNS (3), /* mulsi_const */
1211 COSTS_N_INSNS (3), /* mulsi_const9 */
1212 COSTS_N_INSNS (3), /* muldi */
1213 COSTS_N_INSNS (19), /* divsi */
1214 COSTS_N_INSNS (35), /* divdi */
1215 COSTS_N_INSNS (3), /* fp */
1216 COSTS_N_INSNS (3), /* dmul */
1217 COSTS_N_INSNS (14), /* sdiv */
1218 COSTS_N_INSNS (17), /* ddiv */
1219 128, /* cache line size */
1222 12, /* prefetch streams */
1223 COSTS_N_INSNS (3), /* SF->DF convert */
1226 /* Instruction costs on POWER9 processors. */
1228 struct processor_costs power9_cost
= {
1229 COSTS_N_INSNS (3), /* mulsi */
1230 COSTS_N_INSNS (3), /* mulsi_const */
1231 COSTS_N_INSNS (3), /* mulsi_const9 */
1232 COSTS_N_INSNS (3), /* muldi */
1233 COSTS_N_INSNS (8), /* divsi */
1234 COSTS_N_INSNS (12), /* divdi */
1235 COSTS_N_INSNS (3), /* fp */
1236 COSTS_N_INSNS (3), /* dmul */
1237 COSTS_N_INSNS (13), /* sdiv */
1238 COSTS_N_INSNS (18), /* ddiv */
1239 128, /* cache line size */
1242 8, /* prefetch streams */
1243 COSTS_N_INSNS (3), /* SF->DF convert */
1246 /* Instruction costs on POWER A2 processors. */
1248 struct processor_costs ppca2_cost
= {
1249 COSTS_N_INSNS (16), /* mulsi */
1250 COSTS_N_INSNS (16), /* mulsi_const */
1251 COSTS_N_INSNS (16), /* mulsi_const9 */
1252 COSTS_N_INSNS (16), /* muldi */
1253 COSTS_N_INSNS (22), /* divsi */
1254 COSTS_N_INSNS (28), /* divdi */
1255 COSTS_N_INSNS (3), /* fp */
1256 COSTS_N_INSNS (3), /* dmul */
1257 COSTS_N_INSNS (59), /* sdiv */
1258 COSTS_N_INSNS (72), /* ddiv */
1261 2048, /* l2 cache */
1262 16, /* prefetch streams */
1263 0, /* SF->DF convert */
1267 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1268 #undef RS6000_BUILTIN_0
1269 #undef RS6000_BUILTIN_1
1270 #undef RS6000_BUILTIN_2
1271 #undef RS6000_BUILTIN_3
1272 #undef RS6000_BUILTIN_A
1273 #undef RS6000_BUILTIN_D
1274 #undef RS6000_BUILTIN_H
1275 #undef RS6000_BUILTIN_P
1276 #undef RS6000_BUILTIN_Q
1277 #undef RS6000_BUILTIN_X
1279 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1280 { NAME, ICODE, MASK, ATTR },
1282 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1283 { NAME, ICODE, MASK, ATTR },
1285 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1286 { NAME, ICODE, MASK, ATTR },
1288 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1289 { NAME, ICODE, MASK, ATTR },
1291 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1292 { NAME, ICODE, MASK, ATTR },
1294 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1295 { NAME, ICODE, MASK, ATTR },
1297 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1298 { NAME, ICODE, MASK, ATTR },
1300 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1301 { NAME, ICODE, MASK, ATTR },
1303 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1304 { NAME, ICODE, MASK, ATTR },
1306 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1307 { NAME, ICODE, MASK, ATTR },
1309 struct rs6000_builtin_info_type
{
1311 const enum insn_code icode
;
1312 const HOST_WIDE_INT mask
;
1313 const unsigned attr
;
1316 static const struct rs6000_builtin_info_type rs6000_builtin_info
[] =
1318 #include "rs6000-builtin.def"
1321 #undef RS6000_BUILTIN_0
1322 #undef RS6000_BUILTIN_1
1323 #undef RS6000_BUILTIN_2
1324 #undef RS6000_BUILTIN_3
1325 #undef RS6000_BUILTIN_A
1326 #undef RS6000_BUILTIN_D
1327 #undef RS6000_BUILTIN_H
1328 #undef RS6000_BUILTIN_P
1329 #undef RS6000_BUILTIN_Q
1330 #undef RS6000_BUILTIN_X
1332 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1333 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1336 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1337 static struct machine_function
* rs6000_init_machine_status (void);
1338 static int rs6000_ra_ever_killed (void);
1339 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1340 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1341 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1342 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1343 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1344 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1345 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1346 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1348 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1350 static bool is_microcoded_insn (rtx_insn
*);
1351 static bool is_nonpipeline_insn (rtx_insn
*);
1352 static bool is_cracked_insn (rtx_insn
*);
1353 static bool is_load_insn (rtx
, rtx
*);
1354 static bool is_store_insn (rtx
, rtx
*);
1355 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1356 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1357 static bool insn_must_be_first_in_group (rtx_insn
*);
1358 static bool insn_must_be_last_in_group (rtx_insn
*);
1359 static void altivec_init_builtins (void);
1360 static tree
builtin_function_type (machine_mode
, machine_mode
,
1361 machine_mode
, machine_mode
,
1362 enum rs6000_builtins
, const char *name
);
1363 static void rs6000_common_init_builtins (void);
1364 static void paired_init_builtins (void);
1365 static rtx
paired_expand_predicate_builtin (enum insn_code
, tree
, rtx
);
1366 static void htm_init_builtins (void);
1367 static int rs6000_emit_int_cmove (rtx
, rtx
, rtx
, rtx
);
1368 static rs6000_stack_t
*rs6000_stack_info (void);
1369 static void is_altivec_return_reg (rtx
, void *);
1370 int easy_vector_constant (rtx
, machine_mode
);
1371 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1372 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1373 static rtx
rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*, const_tree
,
1376 static void macho_branch_islands (void);
1378 static rtx
rs6000_legitimize_reload_address (rtx
, machine_mode
, int, int,
1380 static rtx
rs6000_debug_legitimize_reload_address (rtx
, machine_mode
, int,
1382 static bool rs6000_mode_dependent_address (const_rtx
);
1383 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1384 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1386 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1389 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1390 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1392 static bool rs6000_secondary_memory_needed (enum reg_class
, enum reg_class
,
1394 static bool rs6000_debug_secondary_memory_needed (enum reg_class
,
1397 static bool rs6000_cannot_change_mode_class (machine_mode
,
1400 static bool rs6000_debug_cannot_change_mode_class (machine_mode
,
1403 static bool rs6000_save_toc_in_prologue_p (void);
1404 static rtx
rs6000_internal_arg_pointer (void);
1406 rtx (*rs6000_legitimize_reload_address_ptr
) (rtx
, machine_mode
, int, int,
1408 = rs6000_legitimize_reload_address
;
1410 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1411 = rs6000_mode_dependent_address
;
1413 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1415 = rs6000_secondary_reload_class
;
1417 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1418 = rs6000_preferred_reload_class
;
1420 bool (*rs6000_secondary_memory_needed_ptr
) (enum reg_class
, enum reg_class
,
1422 = rs6000_secondary_memory_needed
;
1424 bool (*rs6000_cannot_change_mode_class_ptr
) (machine_mode
,
1427 = rs6000_cannot_change_mode_class
;
1429 const int INSN_NOT_AVAILABLE
= -1;
1431 static void rs6000_print_isa_options (FILE *, int, const char *,
1433 static void rs6000_print_builtin_options (FILE *, int, const char *,
1435 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1437 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1438 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1439 enum rs6000_reg_type
,
1441 secondary_reload_info
*,
1443 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1444 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused
));
1445 static tree
rs6000_fold_builtin (tree
, int, tree
*, bool);
1447 /* Hash table stuff for keeping track of TOC entries. */
1449 struct GTY((for_user
)) toc_hash_struct
1451 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1452 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1454 machine_mode key_mode
;
1458 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1460 static hashval_t
hash (toc_hash_struct
*);
1461 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1464 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1466 /* Hash table to keep track of the argument types for builtin functions. */
1468 struct GTY((for_user
)) builtin_hash_struct
1471 machine_mode mode
[4]; /* return value + 3 arguments. */
1472 unsigned char uns_p
[4]; /* and whether the types are unsigned. */
1475 struct builtin_hasher
: ggc_ptr_hash
<builtin_hash_struct
>
1477 static hashval_t
hash (builtin_hash_struct
*);
1478 static bool equal (builtin_hash_struct
*, builtin_hash_struct
*);
1481 static GTY (()) hash_table
<builtin_hasher
> *builtin_hash_table
;
1484 /* Default register names. */
1485 char rs6000_reg_names
[][8] =
1487 "0", "1", "2", "3", "4", "5", "6", "7",
1488 "8", "9", "10", "11", "12", "13", "14", "15",
1489 "16", "17", "18", "19", "20", "21", "22", "23",
1490 "24", "25", "26", "27", "28", "29", "30", "31",
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "8", "9", "10", "11", "12", "13", "14", "15",
1493 "16", "17", "18", "19", "20", "21", "22", "23",
1494 "24", "25", "26", "27", "28", "29", "30", "31",
1495 "mq", "lr", "ctr","ap",
1496 "0", "1", "2", "3", "4", "5", "6", "7",
1498 /* AltiVec registers. */
1499 "0", "1", "2", "3", "4", "5", "6", "7",
1500 "8", "9", "10", "11", "12", "13", "14", "15",
1501 "16", "17", "18", "19", "20", "21", "22", "23",
1502 "24", "25", "26", "27", "28", "29", "30", "31",
1504 /* Soft frame pointer. */
1506 /* HTM SPR registers. */
1507 "tfhar", "tfiar", "texasr"
1510 #ifdef TARGET_REGNAMES
1511 static const char alt_reg_names
[][8] =
1513 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1514 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1515 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1516 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1517 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1518 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1519 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1520 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1521 "mq", "lr", "ctr", "ap",
1522 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1524 /* AltiVec registers. */
1525 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1526 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1527 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1528 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1530 /* Soft frame pointer. */
1532 /* HTM SPR registers. */
1533 "tfhar", "tfiar", "texasr"
1537 /* Table of valid machine attributes. */
1539 static const struct attribute_spec rs6000_attribute_table
[] =
1541 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1542 affects_type_identity } */
1543 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute
,
1545 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1547 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute
,
1549 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1551 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute
,
1553 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1554 SUBTARGET_ATTRIBUTE_TABLE
,
1556 { NULL
, 0, 0, false, false, false, NULL
, false }
1559 #ifndef TARGET_PROFILE_KERNEL
1560 #define TARGET_PROFILE_KERNEL 0
1563 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1564 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1566 /* Initialize the GCC target structure. */
1567 #undef TARGET_ATTRIBUTE_TABLE
1568 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1569 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1570 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1571 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1572 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1574 #undef TARGET_ASM_ALIGNED_DI_OP
1575 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1577 /* Default unaligned ops are only provided for ELF. Find the ops needed
1578 for non-ELF systems. */
1579 #ifndef OBJECT_FORMAT_ELF
1581 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1583 #undef TARGET_ASM_UNALIGNED_HI_OP
1584 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1585 #undef TARGET_ASM_UNALIGNED_SI_OP
1586 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1587 #undef TARGET_ASM_UNALIGNED_DI_OP
1588 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1591 #undef TARGET_ASM_UNALIGNED_HI_OP
1592 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1593 #undef TARGET_ASM_UNALIGNED_SI_OP
1594 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1595 #undef TARGET_ASM_UNALIGNED_DI_OP
1596 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1597 #undef TARGET_ASM_ALIGNED_DI_OP
1598 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1602 /* This hook deals with fixups for relocatable code and DI-mode objects
1604 #undef TARGET_ASM_INTEGER
1605 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1607 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1608 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1609 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1612 #undef TARGET_SET_UP_BY_PROLOGUE
1613 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1615 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1616 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1617 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1618 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1619 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1620 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1621 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1623 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1624 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1625 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1626 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1628 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1629 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1631 #undef TARGET_INTERNAL_ARG_POINTER
1632 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1634 #undef TARGET_HAVE_TLS
1635 #define TARGET_HAVE_TLS HAVE_AS_TLS
1637 #undef TARGET_CANNOT_FORCE_CONST_MEM
1638 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1640 #undef TARGET_DELEGITIMIZE_ADDRESS
1641 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1643 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1644 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1646 #undef TARGET_LEGITIMATE_COMBINED_INSN
1647 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1649 #undef TARGET_ASM_FUNCTION_PROLOGUE
1650 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1651 #undef TARGET_ASM_FUNCTION_EPILOGUE
1652 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1654 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1655 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1657 #undef TARGET_LEGITIMIZE_ADDRESS
1658 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1660 #undef TARGET_SCHED_VARIABLE_ISSUE
1661 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1663 #undef TARGET_SCHED_ISSUE_RATE
1664 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1665 #undef TARGET_SCHED_ADJUST_COST
1666 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1667 #undef TARGET_SCHED_ADJUST_PRIORITY
1668 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1669 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1670 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1671 #undef TARGET_SCHED_INIT
1672 #define TARGET_SCHED_INIT rs6000_sched_init
1673 #undef TARGET_SCHED_FINISH
1674 #define TARGET_SCHED_FINISH rs6000_sched_finish
1675 #undef TARGET_SCHED_REORDER
1676 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1677 #undef TARGET_SCHED_REORDER2
1678 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1680 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1681 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1686 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1687 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1688 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1689 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1690 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1691 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1692 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1693 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1695 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1696 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1698 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1699 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1700 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1701 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1702 rs6000_builtin_support_vector_misalignment
1703 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1704 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1707 rs6000_builtin_vectorization_cost
1708 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1709 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1710 rs6000_preferred_simd_mode
1711 #undef TARGET_VECTORIZE_INIT_COST
1712 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1713 #undef TARGET_VECTORIZE_ADD_STMT_COST
1714 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1715 #undef TARGET_VECTORIZE_FINISH_COST
1716 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1717 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1718 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1720 #undef TARGET_INIT_BUILTINS
1721 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1722 #undef TARGET_BUILTIN_DECL
1723 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1725 #undef TARGET_FOLD_BUILTIN
1726 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1727 #undef TARGET_GIMPLE_FOLD_BUILTIN
1728 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1730 #undef TARGET_EXPAND_BUILTIN
1731 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1733 #undef TARGET_MANGLE_TYPE
1734 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1736 #undef TARGET_INIT_LIBFUNCS
1737 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1740 #undef TARGET_BINDS_LOCAL_P
1741 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1744 #undef TARGET_MS_BITFIELD_LAYOUT_P
1745 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1747 #undef TARGET_ASM_OUTPUT_MI_THUNK
1748 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1750 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1751 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1753 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1754 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1756 #undef TARGET_REGISTER_MOVE_COST
1757 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1758 #undef TARGET_MEMORY_MOVE_COST
1759 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1760 #undef TARGET_CANNOT_COPY_INSN_P
1761 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1762 #undef TARGET_RTX_COSTS
1763 #define TARGET_RTX_COSTS rs6000_rtx_costs
1764 #undef TARGET_ADDRESS_COST
1765 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1767 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1768 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1770 #undef TARGET_PROMOTE_FUNCTION_MODE
1771 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1773 #undef TARGET_RETURN_IN_MEMORY
1774 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1776 #undef TARGET_RETURN_IN_MSB
1777 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1779 #undef TARGET_SETUP_INCOMING_VARARGS
1780 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1782 /* Always strict argument naming on rs6000. */
1783 #undef TARGET_STRICT_ARGUMENT_NAMING
1784 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1785 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1786 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1787 #undef TARGET_SPLIT_COMPLEX_ARG
1788 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1789 #undef TARGET_MUST_PASS_IN_STACK
1790 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1791 #undef TARGET_PASS_BY_REFERENCE
1792 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1793 #undef TARGET_ARG_PARTIAL_BYTES
1794 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1795 #undef TARGET_FUNCTION_ARG_ADVANCE
1796 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1797 #undef TARGET_FUNCTION_ARG
1798 #define TARGET_FUNCTION_ARG rs6000_function_arg
1799 #undef TARGET_FUNCTION_ARG_BOUNDARY
1800 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1802 #undef TARGET_BUILD_BUILTIN_VA_LIST
1803 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1805 #undef TARGET_EXPAND_BUILTIN_VA_START
1806 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1808 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1809 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1811 #undef TARGET_EH_RETURN_FILTER_MODE
1812 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1814 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1815 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1817 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1818 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1820 #undef TARGET_FLOATN_MODE
1821 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1823 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1824 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1826 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1827 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1829 #undef TARGET_MD_ASM_ADJUST
1830 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1832 #undef TARGET_OPTION_OVERRIDE
1833 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1835 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1836 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1837 rs6000_builtin_vectorized_function
1839 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1840 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1841 rs6000_builtin_md_vectorized_function
1843 #undef TARGET_STACK_PROTECT_GUARD
1844 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1847 #undef TARGET_STACK_PROTECT_FAIL
1848 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1852 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1853 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1856 /* Use a 32-bit anchor range. This leads to sequences like:
1858 addis tmp,anchor,high
1861 where tmp itself acts as an anchor, and can be shared between
1862 accesses to the same 64k page. */
1863 #undef TARGET_MIN_ANCHOR_OFFSET
1864 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1865 #undef TARGET_MAX_ANCHOR_OFFSET
1866 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1867 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1868 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1869 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1870 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1872 #undef TARGET_BUILTIN_RECIPROCAL
1873 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1875 #undef TARGET_EXPAND_TO_RTL_HOOK
1876 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1878 #undef TARGET_INSTANTIATE_DECLS
1879 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1881 #undef TARGET_SECONDARY_RELOAD
1882 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1884 #undef TARGET_LEGITIMATE_ADDRESS_P
1885 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1887 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1888 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1891 #define TARGET_LRA_P rs6000_lra_p
1893 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1894 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1896 #undef TARGET_CAN_ELIMINATE
1897 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1899 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1900 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1902 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1903 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1905 #undef TARGET_TRAMPOLINE_INIT
1906 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1908 #undef TARGET_FUNCTION_VALUE
1909 #define TARGET_FUNCTION_VALUE rs6000_function_value
1911 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1912 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1914 #undef TARGET_OPTION_SAVE
1915 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1917 #undef TARGET_OPTION_RESTORE
1918 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1920 #undef TARGET_OPTION_PRINT
1921 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1923 #undef TARGET_CAN_INLINE_P
1924 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1926 #undef TARGET_SET_CURRENT_FUNCTION
1927 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1929 #undef TARGET_LEGITIMATE_CONSTANT_P
1930 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1932 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1933 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1935 #undef TARGET_CAN_USE_DOLOOP_P
1936 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1938 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1939 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1941 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1942 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1943 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1944 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1945 #undef TARGET_UNWIND_WORD_MODE
1946 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1948 #undef TARGET_OFFLOAD_OPTIONS
1949 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1951 #undef TARGET_C_MODE_FOR_SUFFIX
1952 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1954 #undef TARGET_INVALID_BINARY_OP
1955 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1957 #undef TARGET_OPTAB_SUPPORTED_P
1958 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1960 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1961 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1963 #undef TARGET_COMPARE_VERSION_PRIORITY
1964 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1966 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1967 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1968 rs6000_generate_version_dispatcher_body
1970 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1971 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1972 rs6000_get_function_versions_dispatcher
1974 #undef TARGET_OPTION_FUNCTION_VERSIONS
1975 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1979 /* Processor table. */
1982 const char *const name
; /* Canonical processor name. */
1983 const enum processor_type processor
; /* Processor type enum value. */
1984 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1987 static struct rs6000_ptt
const processor_target_table
[] =
1989 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1990 #include "rs6000-cpus.def"
1994 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1998 rs6000_cpu_name_lookup (const char *name
)
2004 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
2005 if (! strcmp (name
, processor_target_table
[i
].name
))
2013 /* Return number of consecutive hard regs needed starting at reg REGNO
2014 to hold something of mode MODE.
2015 This is ordinarily the length in words of a value of mode MODE
2016 but can be less for certain modes in special long registers.
2018 POWER and PowerPC GPRs hold 32 bits worth;
2019 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2022 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
2024 unsigned HOST_WIDE_INT reg_size
;
2026 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2027 128-bit floating point that can go in vector registers, which has VSX
2028 memory addressing. */
2029 if (FP_REGNO_P (regno
))
2030 reg_size
= (VECTOR_MEM_VSX_P (mode
) || FLOAT128_VECTOR_P (mode
)
2031 ? UNITS_PER_VSX_WORD
2032 : UNITS_PER_FP_WORD
);
2034 else if (ALTIVEC_REGNO_P (regno
))
2035 reg_size
= UNITS_PER_ALTIVEC_WORD
;
2038 reg_size
= UNITS_PER_WORD
;
2040 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
2043 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2046 rs6000_hard_regno_mode_ok (int regno
, machine_mode mode
)
2048 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
2050 if (COMPLEX_MODE_P (mode
))
2051 mode
= GET_MODE_INNER (mode
);
2053 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2054 register combinations, and use PTImode where we need to deal with quad
2055 word memory operations. Don't allow quad words in the argument or frame
2056 pointer registers, just registers 0..31. */
2057 if (mode
== PTImode
)
2058 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2059 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
2060 && ((regno
& 1) == 0));
2062 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2063 implementations. Don't allow an item to be split between a FP register
2064 and an Altivec register. Allow TImode in all VSX registers if the user
2066 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
2067 && (VECTOR_MEM_VSX_P (mode
)
2068 || FLOAT128_VECTOR_P (mode
)
2069 || reg_addr
[mode
].scalar_in_vmx_p
2070 || (TARGET_VSX_TIMODE
&& mode
== TImode
)
2071 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
2073 if (FP_REGNO_P (regno
))
2074 return FP_REGNO_P (last_regno
);
2076 if (ALTIVEC_REGNO_P (regno
))
2078 if (GET_MODE_SIZE (mode
) != 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
2081 return ALTIVEC_REGNO_P (last_regno
);
2085 /* The GPRs can hold any mode, but values bigger than one register
2086 cannot go past R31. */
2087 if (INT_REGNO_P (regno
))
2088 return INT_REGNO_P (last_regno
);
2090 /* The float registers (except for VSX vector modes) can only hold floating
2091 modes and DImode. */
2092 if (FP_REGNO_P (regno
))
2094 if (FLOAT128_VECTOR_P (mode
))
2097 if (SCALAR_FLOAT_MODE_P (mode
)
2098 && (mode
!= TDmode
|| (regno
% 2) == 0)
2099 && FP_REGNO_P (last_regno
))
2102 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2104 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
2107 if (TARGET_VSX_SMALL_INTEGER
)
2112 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
2117 if (PAIRED_SIMD_REGNO_P (regno
) && TARGET_PAIRED_FLOAT
2118 && PAIRED_VECTOR_MODE (mode
))
2124 /* The CR register can only hold CC modes. */
2125 if (CR_REGNO_P (regno
))
2126 return GET_MODE_CLASS (mode
) == MODE_CC
;
2128 if (CA_REGNO_P (regno
))
2129 return mode
== Pmode
|| mode
== SImode
;
2131 /* AltiVec only in AldyVec registers. */
2132 if (ALTIVEC_REGNO_P (regno
))
2133 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
2134 || mode
== V1TImode
);
2136 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2137 and it must be able to fit within the register set. */
2139 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
2142 /* Print interesting facts about registers. */
2144 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2148 for (r
= first_regno
; r
<= last_regno
; ++r
)
2150 const char *comma
= "";
2153 if (first_regno
== last_regno
)
2154 fprintf (stderr
, "%s:\t", reg_name
);
2156 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2159 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2160 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2164 fprintf (stderr
, ",\n\t");
2169 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2170 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2171 rs6000_hard_regno_nregs
[m
][r
]);
2173 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2178 if (call_used_regs
[r
])
2182 fprintf (stderr
, ",\n\t");
2187 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2195 fprintf (stderr
, ",\n\t");
2200 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2206 fprintf (stderr
, ",\n\t");
2210 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2211 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2216 fprintf (stderr
, ",\n\t");
2220 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2225 rs6000_debug_vector_unit (enum rs6000_vector v
)
2231 case VECTOR_NONE
: ret
= "none"; break;
2232 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2233 case VECTOR_VSX
: ret
= "vsx"; break;
2234 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2235 case VECTOR_PAIRED
: ret
= "paired"; break;
2236 case VECTOR_OTHER
: ret
= "other"; break;
2237 default: ret
= "unknown"; break;
2243 /* Inner function printing just the address mask for a particular reload
2245 DEBUG_FUNCTION
char *
2246 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2251 if ((mask
& RELOAD_REG_VALID
) != 0)
2253 else if (keep_spaces
)
2256 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2258 else if (keep_spaces
)
2261 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2263 else if (keep_spaces
)
2266 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2268 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2270 else if (keep_spaces
)
2273 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2275 else if (keep_spaces
)
2278 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2280 else if (keep_spaces
)
2283 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2285 else if (keep_spaces
)
2293 /* Print the address masks in a human readble fashion. */
2295 rs6000_debug_print_mode (ssize_t m
)
2301 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2302 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2303 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2304 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2306 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2307 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2308 fprintf (stderr
, " Reload=%c%c",
2309 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2310 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2312 spaces
+= sizeof (" Reload=sl") - 1;
2314 if (reg_addr
[m
].scalar_in_vmx_p
)
2316 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2320 spaces
+= sizeof (" Upper=y") - 1;
2322 fuse_extra_p
= ((reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2323 || reg_addr
[m
].fused_toc
);
2326 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2328 if (rc
!= RELOAD_REG_ANY
)
2330 if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2331 || reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
2332 || reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
2333 || reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
2334 || reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2336 fuse_extra_p
= true;
2345 fprintf (stderr
, "%*s Fuse:", spaces
, "");
2348 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2350 if (rc
!= RELOAD_REG_ANY
)
2354 if (reg_addr
[m
].fusion_addis_ld
[rc
] != CODE_FOR_nothing
)
2356 else if (reg_addr
[m
].fusion_addi_ld
[rc
] != CODE_FOR_nothing
)
2361 if (reg_addr
[m
].fusion_addis_st
[rc
] != CODE_FOR_nothing
)
2363 else if (reg_addr
[m
].fusion_addi_st
[rc
] != CODE_FOR_nothing
)
2368 if (load
== '-' && store
== '-')
2372 fprintf (stderr
, "%*s%c=%c%c", (spaces
+ 1), "",
2373 reload_reg_map
[rc
].name
[0], load
, store
);
2379 if (reg_addr
[m
].fusion_gpr_ld
!= CODE_FOR_nothing
)
2381 fprintf (stderr
, "%*sP8gpr", (spaces
+ 1), "");
2385 spaces
+= sizeof (" P8gpr") - 1;
2387 if (reg_addr
[m
].fused_toc
)
2389 fprintf (stderr
, "%*sToc", (spaces
+ 1), "");
2393 spaces
+= sizeof (" Toc") - 1;
2396 spaces
+= sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2398 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2399 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2401 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2403 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2404 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2407 fputs ("\n", stderr
);
2410 #define DEBUG_FMT_ID "%-32s= "
2411 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2412 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2413 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2415 /* Print various interesting information with -mdebug=reg. */
2417 rs6000_debug_reg_global (void)
2419 static const char *const tf
[2] = { "false", "true" };
2420 const char *nl
= (const char *)0;
2423 char costly_num
[20];
2425 char flags_buffer
[40];
2426 const char *costly_str
;
2427 const char *nop_str
;
2428 const char *trace_str
;
2429 const char *abi_str
;
2430 const char *cmodel_str
;
2431 struct cl_target_option cl_opts
;
2433 /* Modes we want tieable information on. */
2434 static const machine_mode print_tieable_modes
[] = {
2470 /* Virtual regs we are interested in. */
2471 const static struct {
2472 int regno
; /* register number. */
2473 const char *name
; /* register name. */
2474 } virtual_regs
[] = {
2475 { STACK_POINTER_REGNUM
, "stack pointer:" },
2476 { TOC_REGNUM
, "toc: " },
2477 { STATIC_CHAIN_REGNUM
, "static chain: " },
2478 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2479 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2480 { ARG_POINTER_REGNUM
, "arg pointer: " },
2481 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2482 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2483 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2484 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2485 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2486 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2487 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2488 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2489 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2490 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2493 fputs ("\nHard register information:\n", stderr
);
2494 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2495 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2496 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2499 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2500 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2501 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2502 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2503 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2504 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2506 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2507 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2508 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2512 "d reg_class = %s\n"
2513 "f reg_class = %s\n"
2514 "v reg_class = %s\n"
2515 "wa reg_class = %s\n"
2516 "wb reg_class = %s\n"
2517 "wd reg_class = %s\n"
2518 "we reg_class = %s\n"
2519 "wf reg_class = %s\n"
2520 "wg reg_class = %s\n"
2521 "wh reg_class = %s\n"
2522 "wi reg_class = %s\n"
2523 "wj reg_class = %s\n"
2524 "wk reg_class = %s\n"
2525 "wl reg_class = %s\n"
2526 "wm reg_class = %s\n"
2527 "wo reg_class = %s\n"
2528 "wp reg_class = %s\n"
2529 "wq reg_class = %s\n"
2530 "wr reg_class = %s\n"
2531 "ws reg_class = %s\n"
2532 "wt reg_class = %s\n"
2533 "wu reg_class = %s\n"
2534 "wv reg_class = %s\n"
2535 "ww reg_class = %s\n"
2536 "wx reg_class = %s\n"
2537 "wy reg_class = %s\n"
2538 "wz reg_class = %s\n"
2539 "wA reg_class = %s\n"
2540 "wH reg_class = %s\n"
2541 "wI reg_class = %s\n"
2542 "wJ reg_class = %s\n"
2543 "wK reg_class = %s\n"
2545 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2546 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_f
]],
2547 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2548 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2549 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wb
]],
2550 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wd
]],
2551 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2552 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wf
]],
2553 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wg
]],
2554 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wh
]],
2555 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wi
]],
2556 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wj
]],
2557 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wk
]],
2558 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wl
]],
2559 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wm
]],
2560 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wo
]],
2561 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wp
]],
2562 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wq
]],
2563 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2564 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ws
]],
2565 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wt
]],
2566 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wu
]],
2567 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wv
]],
2568 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_ww
]],
2569 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2570 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wy
]],
2571 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wz
]],
2572 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]],
2573 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wH
]],
2574 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wI
]],
2575 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wJ
]],
2576 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wK
]]);
2579 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2580 rs6000_debug_print_mode (m
);
2582 fputs ("\n", stderr
);
2584 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2586 machine_mode mode1
= print_tieable_modes
[m1
];
2587 bool first_time
= true;
2589 nl
= (const char *)0;
2590 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2592 machine_mode mode2
= print_tieable_modes
[m2
];
2593 if (mode1
!= mode2
&& MODES_TIEABLE_P (mode1
, mode2
))
2597 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2602 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2607 fputs ("\n", stderr
);
2613 if (rs6000_recip_control
)
2615 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2617 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2618 if (rs6000_recip_bits
[m
])
2621 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2623 (RS6000_RECIP_AUTO_RE_P (m
)
2625 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2626 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2628 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2631 fputs ("\n", stderr
);
2634 if (rs6000_cpu_index
>= 0)
2636 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2638 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2640 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2641 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2644 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2646 if (rs6000_tune_index
>= 0)
2648 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2650 = processor_target_table
[rs6000_tune_index
].target_enable
;
2652 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2653 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2656 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2658 cl_target_option_save (&cl_opts
, &global_options
);
2659 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2662 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2663 rs6000_isa_flags_explicit
);
2665 rs6000_print_builtin_options (stderr
, 0, "rs6000_builtin_mask",
2666 rs6000_builtin_mask
);
2668 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2670 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2671 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2673 switch (rs6000_sched_costly_dep
)
2675 case max_dep_latency
:
2676 costly_str
= "max_dep_latency";
2680 costly_str
= "no_dep_costly";
2683 case all_deps_costly
:
2684 costly_str
= "all_deps_costly";
2687 case true_store_to_load_dep_costly
:
2688 costly_str
= "true_store_to_load_dep_costly";
2691 case store_to_load_dep_costly
:
2692 costly_str
= "store_to_load_dep_costly";
2696 costly_str
= costly_num
;
2697 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2701 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2703 switch (rs6000_sched_insert_nops
)
2705 case sched_finish_regroup_exact
:
2706 nop_str
= "sched_finish_regroup_exact";
2709 case sched_finish_pad_groups
:
2710 nop_str
= "sched_finish_pad_groups";
2713 case sched_finish_none
:
2714 nop_str
= "sched_finish_none";
2719 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2723 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2725 switch (rs6000_sdata
)
2732 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2736 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2740 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2745 switch (rs6000_traceback
)
2747 case traceback_default
: trace_str
= "default"; break;
2748 case traceback_none
: trace_str
= "none"; break;
2749 case traceback_part
: trace_str
= "part"; break;
2750 case traceback_full
: trace_str
= "full"; break;
2751 default: trace_str
= "unknown"; break;
2754 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2756 switch (rs6000_current_cmodel
)
2758 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2759 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2760 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2761 default: cmodel_str
= "unknown"; break;
2764 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2766 switch (rs6000_current_abi
)
2768 case ABI_NONE
: abi_str
= "none"; break;
2769 case ABI_AIX
: abi_str
= "aix"; break;
2770 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2771 case ABI_V4
: abi_str
= "V4"; break;
2772 case ABI_DARWIN
: abi_str
= "darwin"; break;
2773 default: abi_str
= "unknown"; break;
2776 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2778 if (rs6000_altivec_abi
)
2779 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2781 if (rs6000_darwin64_abi
)
2782 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2784 fprintf (stderr
, DEBUG_FMT_S
, "single_float",
2785 (TARGET_SINGLE_FLOAT
? "true" : "false"));
2787 fprintf (stderr
, DEBUG_FMT_S
, "double_float",
2788 (TARGET_DOUBLE_FLOAT
? "true" : "false"));
2790 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2791 (TARGET_SOFT_FLOAT
? "true" : "false"));
2793 if (TARGET_LINK_STACK
)
2794 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2796 fprintf (stderr
, DEBUG_FMT_S
, "lra", TARGET_LRA
? "true" : "false");
2798 if (TARGET_P8_FUSION
)
2802 strcpy (options
, (TARGET_P9_FUSION
) ? "power9" : "power8");
2803 if (TARGET_TOC_FUSION
)
2804 strcat (options
, ", toc");
2806 if (TARGET_P8_FUSION_SIGN
)
2807 strcat (options
, ", sign");
2809 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2812 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2813 TARGET_SECURE_PLT
? "secure" : "bss");
2814 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2815 aix_struct_return
? "aix" : "sysv");
2816 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2817 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2818 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2819 tf
[!!rs6000_align_branch_targets
]);
2820 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2821 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2822 rs6000_long_double_type_size
);
2823 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2824 (int)rs6000_sched_restricted_insns_priority
);
2825 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2827 fprintf (stderr
, DEBUG_FMT_D
, "Number of rs6000 builtins",
2828 (int)RS6000_BUILTIN_COUNT
);
2830 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2831 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2834 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2835 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2837 if (TARGET_DIRECT_MOVE_128
)
2838 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2839 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2843 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2844 legitimate address support to figure out the appropriate addressing to
2848 rs6000_setup_reg_addr_masks (void)
2850 ssize_t rc
, reg
, m
, nregs
;
2851 addr_mask_type any_addr_mask
, addr_mask
;
2853 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2855 machine_mode m2
= (machine_mode
) m
;
2856 bool complex_p
= false;
2857 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2860 if (COMPLEX_MODE_P (m2
))
2863 m2
= GET_MODE_INNER (m2
);
2866 msize
= GET_MODE_SIZE (m2
);
2868 /* SDmode is special in that we want to access it only via REG+REG
2869 addressing on power7 and above, since we want to use the LFIWZX and
2870 STFIWZX instructions to load it. */
2871 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2874 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2877 reg
= reload_reg_map
[rc
].reg
;
2879 /* Can mode values go in the GPR/FPR/Altivec registers? */
2880 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2882 bool small_int_vsx_p
= (small_int_p
2883 && (rc
== RELOAD_REG_FPR
2884 || rc
== RELOAD_REG_VMX
));
2886 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2887 addr_mask
|= RELOAD_REG_VALID
;
2889 /* Indicate if the mode takes more than 1 physical register. If
2890 it takes a single register, indicate it can do REG+REG
2891 addressing. Small integers in VSX registers can only do
2892 REG+REG addressing. */
2893 if (small_int_vsx_p
)
2894 addr_mask
|= RELOAD_REG_INDEXED
;
2895 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2896 addr_mask
|= RELOAD_REG_MULTIPLE
;
2898 addr_mask
|= RELOAD_REG_INDEXED
;
2900 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2901 addressing. If we allow scalars into Altivec registers,
2902 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2905 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2907 && !VECTOR_MODE_P (m2
)
2908 && !FLOAT128_VECTOR_P (m2
)
2911 && (m2
!= DFmode
|| !TARGET_UPPER_REGS_DF
)
2912 && (m2
!= SFmode
|| !TARGET_UPPER_REGS_SF
))
2914 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2916 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2917 we don't allow PRE_MODIFY for some multi-register
2922 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2926 if (TARGET_POWERPC64
)
2927 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2933 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2939 /* GPR and FPR registers can do REG+OFFSET addressing, except
2940 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2941 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2942 if ((addr_mask
!= 0) && !indexed_only_p
2944 && (rc
== RELOAD_REG_GPR
2945 || ((msize
== 8 || m2
== SFmode
)
2946 && (rc
== RELOAD_REG_FPR
2947 || (rc
== RELOAD_REG_VMX
2948 && TARGET_P9_DFORM_SCALAR
)))))
2949 addr_mask
|= RELOAD_REG_OFFSET
;
2951 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2952 instructions are enabled. The offset for 128-bit VSX registers is
2953 only 12-bits. While GPRs can handle the full offset range, VSX
2954 registers can only handle the restricted range. */
2955 else if ((addr_mask
!= 0) && !indexed_only_p
2956 && msize
== 16 && TARGET_P9_DFORM_VECTOR
2957 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2958 || (m2
== TImode
&& TARGET_VSX_TIMODE
)))
2960 addr_mask
|= RELOAD_REG_OFFSET
;
2961 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2962 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2965 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2966 addressing on 128-bit types. */
2967 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2968 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2969 addr_mask
|= RELOAD_REG_AND_M16
;
2971 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2972 any_addr_mask
|= addr_mask
;
2975 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2980 /* Initialize the various global tables that are based on register size. */
2982 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2988 /* Precalculate REGNO_REG_CLASS. */
2989 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2990 for (r
= 1; r
< 32; ++r
)
2991 rs6000_regno_regclass
[r
] = BASE_REGS
;
2993 for (r
= 32; r
< 64; ++r
)
2994 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2996 for (r
= 64; r
< FIRST_PSEUDO_REGISTER
; ++r
)
2997 rs6000_regno_regclass
[r
] = NO_REGS
;
2999 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
3000 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
3002 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
3003 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
3004 rs6000_regno_regclass
[r
] = CR_REGS
;
3006 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
3007 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
3008 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
3009 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
3010 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
3011 rs6000_regno_regclass
[TFHAR_REGNO
] = SPR_REGS
;
3012 rs6000_regno_regclass
[TFIAR_REGNO
] = SPR_REGS
;
3013 rs6000_regno_regclass
[TEXASR_REGNO
] = SPR_REGS
;
3014 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
3015 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
3017 /* Precalculate register class to simpler reload register class. We don't
3018 need all of the register classes that are combinations of different
3019 classes, just the simple ones that have constraint letters. */
3020 for (c
= 0; c
< N_REG_CLASSES
; c
++)
3021 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
3023 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
3024 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
3025 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
3026 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
3027 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
3028 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
3029 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
3030 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
3031 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
3032 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
3036 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
3037 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
3041 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
3042 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
3045 /* Precalculate the valid memory formats as well as the vector information,
3046 this must be set up before the rs6000_hard_regno_nregs_internal calls
3048 gcc_assert ((int)VECTOR_NONE
== 0);
3049 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
3050 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_unit
));
3052 gcc_assert ((int)CODE_FOR_nothing
== 0);
3053 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
3055 gcc_assert ((int)NO_REGS
== 0);
3056 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
3058 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3059 believes it can use native alignment or still uses 128-bit alignment. */
3060 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
3071 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3072 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3073 if (TARGET_FLOAT128_TYPE
)
3075 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
3076 rs6000_vector_align
[KFmode
] = 128;
3078 if (FLOAT128_IEEE_P (TFmode
))
3080 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
3081 rs6000_vector_align
[TFmode
] = 128;
3085 /* V2DF mode, VSX only. */
3088 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
3089 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
3090 rs6000_vector_align
[V2DFmode
] = align64
;
3093 /* V4SF mode, either VSX or Altivec. */
3096 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
3097 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
3098 rs6000_vector_align
[V4SFmode
] = align32
;
3100 else if (TARGET_ALTIVEC
)
3102 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
3103 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
3104 rs6000_vector_align
[V4SFmode
] = align32
;
3107 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3111 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
3112 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
3113 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
3114 rs6000_vector_align
[V4SImode
] = align32
;
3115 rs6000_vector_align
[V8HImode
] = align32
;
3116 rs6000_vector_align
[V16QImode
] = align32
;
3120 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
3121 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
3122 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
3126 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
3127 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
3128 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
3132 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3133 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3136 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
3137 rs6000_vector_unit
[V2DImode
]
3138 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3139 rs6000_vector_align
[V2DImode
] = align64
;
3141 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
3142 rs6000_vector_unit
[V1TImode
]
3143 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
3144 rs6000_vector_align
[V1TImode
] = 128;
3147 /* DFmode, see if we want to use the VSX unit. Memory is handled
3148 differently, so don't set rs6000_vector_mem. */
3149 if (TARGET_VSX
&& TARGET_VSX_SCALAR_DOUBLE
)
3151 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
3152 rs6000_vector_align
[DFmode
] = 64;
3155 /* SFmode, see if we want to use the VSX unit. */
3156 if (TARGET_P8_VECTOR
&& TARGET_VSX_SCALAR_FLOAT
)
3158 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
3159 rs6000_vector_align
[SFmode
] = 32;
3162 /* Allow TImode in VSX register and set the VSX memory macros. */
3163 if (TARGET_VSX
&& TARGET_VSX_TIMODE
)
3165 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
3166 rs6000_vector_align
[TImode
] = align64
;
3169 /* TODO add paired floating point vector support. */
3171 /* Register class constraints for the constraints that depend on compile
3172 switches. When the VSX code was added, different constraints were added
3173 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3174 of the VSX registers are used. The register classes for scalar floating
3175 point types is set, based on whether we allow that type into the upper
3176 (Altivec) registers. GCC has register classes to target the Altivec
3177 registers for load/store operations, to select using a VSX memory
3178 operation instead of the traditional floating point operation. The
3181 d - Register class to use with traditional DFmode instructions.
3182 f - Register class to use with traditional SFmode instructions.
3183 v - Altivec register.
3184 wa - Any VSX register.
3185 wc - Reserved to represent individual CR bits (used in LLVM).
3186 wd - Preferred register class for V2DFmode.
3187 wf - Preferred register class for V4SFmode.
3188 wg - Float register for power6x move insns.
3189 wh - FP register for direct move instructions.
3190 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3191 wj - FP or VSX register to hold 64-bit integers for direct moves.
3192 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3193 wl - Float register if we can do 32-bit signed int loads.
3194 wm - VSX register for ISA 2.07 direct move operations.
3195 wn - always NO_REGS.
3196 wr - GPR if 64-bit mode is permitted.
3197 ws - Register class to do ISA 2.06 DF operations.
3198 wt - VSX register for TImode in VSX registers.
3199 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3200 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3201 ww - Register class to do SF conversions in with VSX operations.
3202 wx - Float register if we can do 32-bit int stores.
3203 wy - Register class to do ISA 2.07 SF operations.
3204 wz - Float register if we can do 32-bit unsigned int loads.
3205 wH - Altivec register if SImode is allowed in VSX registers.
3206 wI - VSX register if SImode is allowed in VSX registers.
3207 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3208 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3210 if (TARGET_HARD_FLOAT
)
3211 rs6000_constraints
[RS6000_CONSTRAINT_f
] = FLOAT_REGS
; /* SFmode */
3213 if (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
3214 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
; /* DFmode */
3218 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
3219 rs6000_constraints
[RS6000_CONSTRAINT_wd
] = VSX_REGS
; /* V2DFmode */
3220 rs6000_constraints
[RS6000_CONSTRAINT_wf
] = VSX_REGS
; /* V4SFmode */
3222 if (TARGET_VSX_TIMODE
)
3223 rs6000_constraints
[RS6000_CONSTRAINT_wt
] = VSX_REGS
; /* TImode */
3225 if (TARGET_UPPER_REGS_DF
) /* DFmode */
3227 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = VSX_REGS
;
3228 rs6000_constraints
[RS6000_CONSTRAINT_wv
] = ALTIVEC_REGS
;
3231 rs6000_constraints
[RS6000_CONSTRAINT_ws
] = FLOAT_REGS
;
3233 if (TARGET_UPPER_REGS_DI
) /* DImode */
3234 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = VSX_REGS
;
3236 rs6000_constraints
[RS6000_CONSTRAINT_wi
] = FLOAT_REGS
;
3239 /* Add conditional constraints based on various options, to allow us to
3240 collapse multiple insn patterns. */
3242 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
3244 if (TARGET_MFPGPR
) /* DFmode */
3245 rs6000_constraints
[RS6000_CONSTRAINT_wg
] = FLOAT_REGS
;
3248 rs6000_constraints
[RS6000_CONSTRAINT_wl
] = FLOAT_REGS
; /* DImode */
3250 if (TARGET_DIRECT_MOVE
)
3252 rs6000_constraints
[RS6000_CONSTRAINT_wh
] = FLOAT_REGS
;
3253 rs6000_constraints
[RS6000_CONSTRAINT_wj
] /* DImode */
3254 = rs6000_constraints
[RS6000_CONSTRAINT_wi
];
3255 rs6000_constraints
[RS6000_CONSTRAINT_wk
] /* DFmode */
3256 = rs6000_constraints
[RS6000_CONSTRAINT_ws
];
3257 rs6000_constraints
[RS6000_CONSTRAINT_wm
] = VSX_REGS
;
3260 if (TARGET_POWERPC64
)
3262 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
3263 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
3266 if (TARGET_P8_VECTOR
&& TARGET_UPPER_REGS_SF
) /* SFmode */
3268 rs6000_constraints
[RS6000_CONSTRAINT_wu
] = ALTIVEC_REGS
;
3269 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = VSX_REGS
;
3270 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = VSX_REGS
;
3272 else if (TARGET_P8_VECTOR
)
3274 rs6000_constraints
[RS6000_CONSTRAINT_wy
] = FLOAT_REGS
;
3275 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3277 else if (TARGET_VSX
)
3278 rs6000_constraints
[RS6000_CONSTRAINT_ww
] = FLOAT_REGS
;
3281 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
3284 rs6000_constraints
[RS6000_CONSTRAINT_wz
] = FLOAT_REGS
; /* DImode */
3286 if (TARGET_FLOAT128_TYPE
)
3288 rs6000_constraints
[RS6000_CONSTRAINT_wq
] = VSX_REGS
; /* KFmode */
3289 if (FLOAT128_IEEE_P (TFmode
))
3290 rs6000_constraints
[RS6000_CONSTRAINT_wp
] = VSX_REGS
; /* TFmode */
3293 /* Support for new D-form instructions. */
3294 if (TARGET_P9_DFORM_SCALAR
)
3295 rs6000_constraints
[RS6000_CONSTRAINT_wb
] = ALTIVEC_REGS
;
3297 /* Support for ISA 3.0 (power9) vectors. */
3298 if (TARGET_P9_VECTOR
)
3299 rs6000_constraints
[RS6000_CONSTRAINT_wo
] = VSX_REGS
;
3301 /* Support for new direct moves (ISA 3.0 + 64bit). */
3302 if (TARGET_DIRECT_MOVE_128
)
3303 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
3305 /* Support small integers in VSX registers. */
3306 if (TARGET_VSX_SMALL_INTEGER
)
3308 rs6000_constraints
[RS6000_CONSTRAINT_wH
] = ALTIVEC_REGS
;
3309 rs6000_constraints
[RS6000_CONSTRAINT_wI
] = FLOAT_REGS
;
3310 if (TARGET_P9_VECTOR
)
3312 rs6000_constraints
[RS6000_CONSTRAINT_wJ
] = FLOAT_REGS
;
3313 rs6000_constraints
[RS6000_CONSTRAINT_wK
] = ALTIVEC_REGS
;
3317 /* Set up the reload helper and direct move functions. */
3318 if (TARGET_VSX
|| TARGET_ALTIVEC
)
3322 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
3323 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
3324 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
3325 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
3326 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
3327 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
3328 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
3329 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3330 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3331 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3332 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3333 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3334 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3335 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3336 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3337 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3338 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3339 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3340 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3341 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3343 if (FLOAT128_VECTOR_P (KFmode
))
3345 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3346 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3349 if (FLOAT128_VECTOR_P (TFmode
))
3351 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3352 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3355 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3357 if (TARGET_NO_SDMODE_STACK
)
3359 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3360 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3363 if (TARGET_VSX_TIMODE
)
3365 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3366 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3369 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3371 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3372 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3373 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3374 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3375 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3376 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3377 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3378 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3379 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3381 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3382 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3383 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3384 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3385 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3386 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3387 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3388 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3389 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3391 if (FLOAT128_VECTOR_P (KFmode
))
3393 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3394 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3397 if (FLOAT128_VECTOR_P (TFmode
))
3399 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3400 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3406 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3407 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3408 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3409 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3410 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3411 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3412 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3413 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3414 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3415 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3416 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3417 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3418 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3419 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3420 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3421 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3422 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3423 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3424 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3425 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3427 if (FLOAT128_VECTOR_P (KFmode
))
3429 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3430 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3433 if (FLOAT128_IEEE_P (TFmode
))
3435 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3436 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3439 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3441 if (TARGET_NO_SDMODE_STACK
)
3443 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3444 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3447 if (TARGET_VSX_TIMODE
)
3449 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3450 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3453 if (TARGET_DIRECT_MOVE
)
3455 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3456 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3457 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3461 if (TARGET_UPPER_REGS_DF
)
3462 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3464 if (TARGET_UPPER_REGS_DI
)
3465 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3467 if (TARGET_UPPER_REGS_SF
)
3468 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3470 if (TARGET_VSX_SMALL_INTEGER
)
3472 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3473 if (TARGET_P9_VECTOR
)
3475 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3476 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3481 /* Setup the fusion operations. */
3482 if (TARGET_P8_FUSION
)
3484 reg_addr
[QImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_qi
;
3485 reg_addr
[HImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_hi
;
3486 reg_addr
[SImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_si
;
3488 reg_addr
[DImode
].fusion_gpr_ld
= CODE_FOR_fusion_gpr_load_di
;
3491 if (TARGET_P9_FUSION
)
3494 enum machine_mode mode
; /* mode of the fused type. */
3495 enum machine_mode pmode
; /* pointer mode. */
3496 enum rs6000_reload_reg_type rtype
; /* register type. */
3497 enum insn_code load
; /* load insn. */
3498 enum insn_code store
; /* store insn. */
3501 static const struct fuse_insns addis_insns
[] = {
3502 { SFmode
, DImode
, RELOAD_REG_FPR
,
3503 CODE_FOR_fusion_vsx_di_sf_load
,
3504 CODE_FOR_fusion_vsx_di_sf_store
},
3506 { SFmode
, SImode
, RELOAD_REG_FPR
,
3507 CODE_FOR_fusion_vsx_si_sf_load
,
3508 CODE_FOR_fusion_vsx_si_sf_store
},
3510 { DFmode
, DImode
, RELOAD_REG_FPR
,
3511 CODE_FOR_fusion_vsx_di_df_load
,
3512 CODE_FOR_fusion_vsx_di_df_store
},
3514 { DFmode
, SImode
, RELOAD_REG_FPR
,
3515 CODE_FOR_fusion_vsx_si_df_load
,
3516 CODE_FOR_fusion_vsx_si_df_store
},
3518 { DImode
, DImode
, RELOAD_REG_FPR
,
3519 CODE_FOR_fusion_vsx_di_di_load
,
3520 CODE_FOR_fusion_vsx_di_di_store
},
3522 { DImode
, SImode
, RELOAD_REG_FPR
,
3523 CODE_FOR_fusion_vsx_si_di_load
,
3524 CODE_FOR_fusion_vsx_si_di_store
},
3526 { QImode
, DImode
, RELOAD_REG_GPR
,
3527 CODE_FOR_fusion_gpr_di_qi_load
,
3528 CODE_FOR_fusion_gpr_di_qi_store
},
3530 { QImode
, SImode
, RELOAD_REG_GPR
,
3531 CODE_FOR_fusion_gpr_si_qi_load
,
3532 CODE_FOR_fusion_gpr_si_qi_store
},
3534 { HImode
, DImode
, RELOAD_REG_GPR
,
3535 CODE_FOR_fusion_gpr_di_hi_load
,
3536 CODE_FOR_fusion_gpr_di_hi_store
},
3538 { HImode
, SImode
, RELOAD_REG_GPR
,
3539 CODE_FOR_fusion_gpr_si_hi_load
,
3540 CODE_FOR_fusion_gpr_si_hi_store
},
3542 { SImode
, DImode
, RELOAD_REG_GPR
,
3543 CODE_FOR_fusion_gpr_di_si_load
,
3544 CODE_FOR_fusion_gpr_di_si_store
},
3546 { SImode
, SImode
, RELOAD_REG_GPR
,
3547 CODE_FOR_fusion_gpr_si_si_load
,
3548 CODE_FOR_fusion_gpr_si_si_store
},
3550 { SFmode
, DImode
, RELOAD_REG_GPR
,
3551 CODE_FOR_fusion_gpr_di_sf_load
,
3552 CODE_FOR_fusion_gpr_di_sf_store
},
3554 { SFmode
, SImode
, RELOAD_REG_GPR
,
3555 CODE_FOR_fusion_gpr_si_sf_load
,
3556 CODE_FOR_fusion_gpr_si_sf_store
},
3558 { DImode
, DImode
, RELOAD_REG_GPR
,
3559 CODE_FOR_fusion_gpr_di_di_load
,
3560 CODE_FOR_fusion_gpr_di_di_store
},
3562 { DFmode
, DImode
, RELOAD_REG_GPR
,
3563 CODE_FOR_fusion_gpr_di_df_load
,
3564 CODE_FOR_fusion_gpr_di_df_store
},
3567 machine_mode cur_pmode
= Pmode
;
3570 for (i
= 0; i
< ARRAY_SIZE (addis_insns
); i
++)
3572 machine_mode xmode
= addis_insns
[i
].mode
;
3573 enum rs6000_reload_reg_type rtype
= addis_insns
[i
].rtype
;
3575 if (addis_insns
[i
].pmode
!= cur_pmode
)
3578 if (rtype
== RELOAD_REG_FPR
&& !TARGET_HARD_FLOAT
)
3581 reg_addr
[xmode
].fusion_addis_ld
[rtype
] = addis_insns
[i
].load
;
3582 reg_addr
[xmode
].fusion_addis_st
[rtype
] = addis_insns
[i
].store
;
3584 if (rtype
== RELOAD_REG_FPR
&& TARGET_P9_DFORM_SCALAR
)
3586 reg_addr
[xmode
].fusion_addis_ld
[RELOAD_REG_VMX
]
3587 = addis_insns
[i
].load
;
3588 reg_addr
[xmode
].fusion_addis_st
[RELOAD_REG_VMX
]
3589 = addis_insns
[i
].store
;
3594 /* Note which types we support fusing TOC setup plus memory insn. We only do
3595 fused TOCs for medium/large code models. */
3596 if (TARGET_P8_FUSION
&& TARGET_TOC_FUSION
&& TARGET_POWERPC64
3597 && (TARGET_CMODEL
!= CMODEL_SMALL
))
3599 reg_addr
[QImode
].fused_toc
= true;
3600 reg_addr
[HImode
].fused_toc
= true;
3601 reg_addr
[SImode
].fused_toc
= true;
3602 reg_addr
[DImode
].fused_toc
= true;
3603 if (TARGET_HARD_FLOAT
)
3605 if (TARGET_SINGLE_FLOAT
)
3606 reg_addr
[SFmode
].fused_toc
= true;
3607 if (TARGET_DOUBLE_FLOAT
)
3608 reg_addr
[DFmode
].fused_toc
= true;
3612 /* Precalculate HARD_REGNO_NREGS. */
3613 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3614 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3615 rs6000_hard_regno_nregs
[m
][r
]
3616 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
)m
);
3618 /* Precalculate HARD_REGNO_MODE_OK. */
3619 for (r
= 0; r
< FIRST_PSEUDO_REGISTER
; ++r
)
3620 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3621 if (rs6000_hard_regno_mode_ok (r
, (machine_mode
)m
))
3622 rs6000_hard_regno_mode_ok_p
[m
][r
] = true;
3624 /* Precalculate CLASS_MAX_NREGS sizes. */
3625 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3629 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3630 reg_size
= UNITS_PER_VSX_WORD
;
3632 else if (c
== ALTIVEC_REGS
)
3633 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3635 else if (c
== FLOAT_REGS
)
3636 reg_size
= UNITS_PER_FP_WORD
;
3639 reg_size
= UNITS_PER_WORD
;
3641 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3643 machine_mode m2
= (machine_mode
)m
;
3644 int reg_size2
= reg_size
;
3646 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3648 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3649 reg_size2
= UNITS_PER_FP_WORD
;
3651 rs6000_class_max_nregs
[m
][c
]
3652 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3656 /* Calculate which modes to automatically generate code to use a the
3657 reciprocal divide and square root instructions. In the future, possibly
3658 automatically generate the instructions even if the user did not specify
3659 -mrecip. The older machines double precision reciprocal sqrt estimate is
3660 not accurate enough. */
3661 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3663 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3665 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3666 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3667 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3668 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3669 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3671 if (TARGET_FRSQRTES
)
3672 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3674 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3675 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3676 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3677 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3678 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3680 if (rs6000_recip_control
)
3682 if (!flag_finite_math_only
)
3683 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3684 if (flag_trapping_math
)
3685 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3686 if (!flag_reciprocal_math
)
3687 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3688 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3690 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3691 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3692 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3694 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3695 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3696 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3698 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3699 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3700 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3702 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3703 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3704 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3706 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3707 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3708 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3710 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3711 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3712 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3714 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3715 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3716 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3718 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3719 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3720 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3724 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3725 legitimate address support to figure out the appropriate addressing to
3727 rs6000_setup_reg_addr_masks ();
3729 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3731 if (TARGET_DEBUG_REG
)
3732 rs6000_debug_reg_global ();
3734 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3736 "SImode variable mult cost = %d\n"
3737 "SImode constant mult cost = %d\n"
3738 "SImode short constant mult cost = %d\n"
3739 "DImode multipliciation cost = %d\n"
3740 "SImode division cost = %d\n"
3741 "DImode division cost = %d\n"
3742 "Simple fp operation cost = %d\n"
3743 "DFmode multiplication cost = %d\n"
3744 "SFmode division cost = %d\n"
3745 "DFmode division cost = %d\n"
3746 "cache line size = %d\n"
3747 "l1 cache size = %d\n"
3748 "l2 cache size = %d\n"
3749 "simultaneous prefetches = %d\n"
3752 rs6000_cost
->mulsi_const
,
3753 rs6000_cost
->mulsi_const9
,
3761 rs6000_cost
->cache_line_size
,
3762 rs6000_cost
->l1_cache_size
,
3763 rs6000_cost
->l2_cache_size
,
3764 rs6000_cost
->simultaneous_prefetches
);
3769 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3772 darwin_rs6000_override_options (void)
3774 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3776 rs6000_altivec_abi
= 1;
3777 TARGET_ALTIVEC_VRSAVE
= 1;
3778 rs6000_current_abi
= ABI_DARWIN
;
3780 if (DEFAULT_ABI
== ABI_DARWIN
3782 darwin_one_byte_bool
= 1;
3784 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3786 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3787 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3791 rs6000_default_long_calls
= 1;
3792 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3795 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3797 if (!flag_mkernel
&& !flag_apple_kext
3799 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3800 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3802 /* Unless the user (not the configurer) has explicitly overridden
3803 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3804 G4 unless targeting the kernel. */
3807 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3808 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3809 && ! global_options_set
.x_rs6000_cpu_index
)
3811 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3816 /* If not otherwise specified by a target, make 'long double' equivalent to
3819 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3820 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3823 /* Return the builtin mask of the various options used that could affect which
3824 builtins were used. In the past we used target_flags, but we've run out of
3825 bits, and some options like PAIRED are no longer in target_flags. */
3828 rs6000_builtin_mask_calculate (void)
3830 return (((TARGET_ALTIVEC
) ? RS6000_BTM_ALTIVEC
: 0)
3831 | ((TARGET_CMPB
) ? RS6000_BTM_CMPB
: 0)
3832 | ((TARGET_VSX
) ? RS6000_BTM_VSX
: 0)
3833 | ((TARGET_PAIRED_FLOAT
) ? RS6000_BTM_PAIRED
: 0)
3834 | ((TARGET_FRE
) ? RS6000_BTM_FRE
: 0)
3835 | ((TARGET_FRES
) ? RS6000_BTM_FRES
: 0)
3836 | ((TARGET_FRSQRTE
) ? RS6000_BTM_FRSQRTE
: 0)
3837 | ((TARGET_FRSQRTES
) ? RS6000_BTM_FRSQRTES
: 0)
3838 | ((TARGET_POPCNTD
) ? RS6000_BTM_POPCNTD
: 0)
3839 | ((rs6000_cpu
== PROCESSOR_CELL
) ? RS6000_BTM_CELL
: 0)
3840 | ((TARGET_P8_VECTOR
) ? RS6000_BTM_P8_VECTOR
: 0)
3841 | ((TARGET_P9_VECTOR
) ? RS6000_BTM_P9_VECTOR
: 0)
3842 | ((TARGET_P9_MISC
) ? RS6000_BTM_P9_MISC
: 0)
3843 | ((TARGET_MODULO
) ? RS6000_BTM_MODULO
: 0)
3844 | ((TARGET_64BIT
) ? RS6000_BTM_64BIT
: 0)
3845 | ((TARGET_CRYPTO
) ? RS6000_BTM_CRYPTO
: 0)
3846 | ((TARGET_HTM
) ? RS6000_BTM_HTM
: 0)
3847 | ((TARGET_DFP
) ? RS6000_BTM_DFP
: 0)
3848 | ((TARGET_HARD_FLOAT
) ? RS6000_BTM_HARD_FLOAT
: 0)
3849 | ((TARGET_LONG_DOUBLE_128
) ? RS6000_BTM_LDBL128
: 0)
3850 | ((TARGET_FLOAT128_TYPE
) ? RS6000_BTM_FLOAT128
: 0));
3853 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3854 to clobber the XER[CA] bit because clobbering that bit without telling
3855 the compiler worked just fine with versions of GCC before GCC 5, and
3856 breaking a lot of older code in ways that are hard to track down is
3857 not such a great idea. */
3860 rs6000_md_asm_adjust (vec
<rtx
> &/*outputs*/, vec
<rtx
> &/*inputs*/,
3861 vec
<const char *> &/*constraints*/,
3862 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
3864 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3865 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3869 /* Override command line options.
3871 Combine build-specific configuration information with options
3872 specified on the command line to set various state variables which
3873 influence code generation, optimization, and expansion of built-in
3874 functions. Assure that command-line configuration preferences are
3875 compatible with each other and with the build configuration; issue
3876 warnings while adjusting configuration or error messages while
3877 rejecting configuration.
3879 Upon entry to this function:
3881 This function is called once at the beginning of
3882 compilation, and then again at the start and end of compiling
3883 each section of code that has a different configuration, as
3884 indicated, for example, by adding the
3886 __attribute__((__target__("cpu=power9")))
3888 qualifier to a function definition or, for example, by bracketing
3891 #pragma GCC target("altivec")
3895 #pragma GCC reset_options
3897 directives. Parameter global_init_p is true for the initial
3898 invocation, which initializes global variables, and false for all
3899 subsequent invocations.
3902 Various global state information is assumed to be valid. This
3903 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3904 default CPU specified at build configure time, TARGET_DEFAULT,
3905 representing the default set of option flags for the default
3906 target, and global_options_set.x_rs6000_isa_flags, representing
3907 which options were requested on the command line.
3909 Upon return from this function:
3911 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3912 was set by name on the command line. Additionally, if certain
3913 attributes are automatically enabled or disabled by this function
3914 in order to assure compatibility between options and
3915 configuration, the flags associated with those attributes are
3916 also set. By setting these "explicit bits", we avoid the risk
3917 that other code might accidentally overwrite these particular
3918 attributes with "default values".
3920 The various bits of rs6000_isa_flags are set to indicate the
3921 target options that have been selected for the most current
3922 compilation efforts. This has the effect of also turning on the
3923 associated TARGET_XXX values since these are macros which are
3924 generally defined to test the corresponding bit of the
3925 rs6000_isa_flags variable.
3927 The variable rs6000_builtin_mask is set to represent the target
3928 options for the most current compilation efforts, consistent with
3929 the current contents of rs6000_isa_flags. This variable controls
3930 expansion of built-in functions.
3932 Various other global variables and fields of global structures
3933 (over 50 in all) are initialized to reflect the desired options
3934 for the most current compilation efforts. */
3937 rs6000_option_override_internal (bool global_init_p
)
3940 bool have_cpu
= false;
3942 /* The default cpu requested at configure time, if any. */
3943 const char *implicit_cpu
= OPTION_TARGET_CPU_DEFAULT
;
3945 HOST_WIDE_INT set_masks
;
3946 HOST_WIDE_INT ignore_masks
;
3949 struct cl_target_option
*main_target_opt
3950 = ((global_init_p
|| target_option_default_node
== NULL
)
3951 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3953 /* Print defaults. */
3954 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3955 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3957 /* Remember the explicit arguments. */
3959 rs6000_isa_flags_explicit
= global_options_set
.x_rs6000_isa_flags
;
3961 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3962 library functions, so warn about it. The flag may be useful for
3963 performance studies from time to time though, so don't disable it
3965 if (global_options_set
.x_rs6000_alignment_flags
3966 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3967 && DEFAULT_ABI
== ABI_DARWIN
3969 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3970 " it is incompatible with the installed C and C++ libraries");
3972 /* Numerous experiment shows that IRA based loop pressure
3973 calculation works better for RTL loop invariant motion on targets
3974 with enough (>= 32) registers. It is an expensive optimization.
3975 So it is on only for peak performance. */
3976 if (optimize
>= 3 && global_init_p
3977 && !global_options_set
.x_flag_ira_loop_pressure
)
3978 flag_ira_loop_pressure
= 1;
3980 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3981 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3982 options were already specified. */
3983 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3984 && !global_options_set
.x_flag_asynchronous_unwind_tables
)
3985 flag_asynchronous_unwind_tables
= 1;
3987 /* Set the pointer size. */
3990 rs6000_pmode
= (int)DImode
;
3991 rs6000_pointer_size
= 64;
3995 rs6000_pmode
= (int)SImode
;
3996 rs6000_pointer_size
= 32;
3999 /* Some OSs don't support saving the high part of 64-bit registers on context
4000 switch. Other OSs don't support saving Altivec registers. On those OSs,
4001 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4002 if the user wants either, the user must explicitly specify them and we
4003 won't interfere with the user's specification. */
4005 set_masks
= POWERPC_MASKS
;
4006 #ifdef OS_MISSING_POWERPC64
4007 if (OS_MISSING_POWERPC64
)
4008 set_masks
&= ~OPTION_MASK_POWERPC64
;
4010 #ifdef OS_MISSING_ALTIVEC
4011 if (OS_MISSING_ALTIVEC
)
4012 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
4013 | OTHER_VSX_VECTOR_MASKS
);
4016 /* Don't override by the processor default if given explicitly. */
4017 set_masks
&= ~rs6000_isa_flags_explicit
;
4019 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4020 the cpu in a target attribute or pragma, but did not specify a tuning
4021 option, use the cpu for the tuning option rather than the option specified
4022 with -mtune on the command line. Process a '--with-cpu' configuration
4023 request as an implicit --cpu. */
4024 if (rs6000_cpu_index
>= 0)
4026 cpu_index
= rs6000_cpu_index
;
4029 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
4031 rs6000_cpu_index
= cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
4034 else if (implicit_cpu
)
4036 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (implicit_cpu
);
4041 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4042 const char *default_cpu
= ((!TARGET_POWERPC64
)
4044 : ((BYTES_BIG_ENDIAN
)
4048 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4052 gcc_assert (cpu_index
>= 0);
4056 #ifndef HAVE_AS_POWER9
4057 if (processor_target_table
[rs6000_cpu_index
].processor
4058 == PROCESSOR_POWER9
)
4061 warning (0, "will not generate power9 instructions because "
4062 "assembler lacks power9 support");
4065 #ifndef HAVE_AS_POWER8
4066 if (processor_target_table
[rs6000_cpu_index
].processor
4067 == PROCESSOR_POWER8
)
4070 warning (0, "will not generate power8 instructions because "
4071 "assembler lacks power8 support");
4074 #ifndef HAVE_AS_POPCNTD
4075 if (processor_target_table
[rs6000_cpu_index
].processor
4076 == PROCESSOR_POWER7
)
4079 warning (0, "will not generate power7 instructions because "
4080 "assembler lacks power7 support");
4084 if (processor_target_table
[rs6000_cpu_index
].processor
4085 == PROCESSOR_POWER6
)
4088 warning (0, "will not generate power6 instructions because "
4089 "assembler lacks power6 support");
4092 #ifndef HAVE_AS_POPCNTB
4093 if (processor_target_table
[rs6000_cpu_index
].processor
4094 == PROCESSOR_POWER5
)
4097 warning (0, "will not generate power5 instructions because "
4098 "assembler lacks power5 support");
4104 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4105 const char *default_cpu
= (!TARGET_POWERPC64
4111 rs6000_cpu_index
= cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
4115 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4116 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4117 with those from the cpu, except for options that were explicitly set. If
4118 we don't have a cpu, do not override the target bits set in
4122 rs6000_isa_flags
&= ~set_masks
;
4123 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
4128 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4129 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4130 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4131 to using rs6000_isa_flags, we need to do the initialization here.
4133 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4134 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4135 HOST_WIDE_INT flags
= ((TARGET_DEFAULT
) ? TARGET_DEFAULT
4136 : processor_target_table
[cpu_index
].target_enable
);
4137 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
4140 if (rs6000_tune_index
>= 0)
4141 tune_index
= rs6000_tune_index
;
4143 rs6000_tune_index
= tune_index
= cpu_index
;
4147 enum processor_type tune_proc
4148 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
4151 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
4152 if (processor_target_table
[i
].processor
== tune_proc
)
4154 rs6000_tune_index
= tune_index
= i
;
4159 gcc_assert (tune_index
>= 0);
4160 rs6000_cpu
= processor_target_table
[tune_index
].processor
;
4162 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
4163 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
4164 || rs6000_cpu
== PROCESSOR_PPCE5500
)
4167 error ("AltiVec not supported in this target");
4170 /* If we are optimizing big endian systems for space, use the load/store
4171 multiple and string instructions. */
4172 if (BYTES_BIG_ENDIAN
&& optimize_size
)
4173 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& (OPTION_MASK_MULTIPLE
4174 | OPTION_MASK_STRING
);
4176 /* Don't allow -mmultiple or -mstring on little endian systems
4177 unless the cpu is a 750, because the hardware doesn't support the
4178 instructions used in little endian mode, and causes an alignment
4179 trap. The 750 does not cause an alignment trap (except when the
4180 target is unaligned). */
4182 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
)
4184 if (TARGET_MULTIPLE
)
4186 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
4187 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
4188 warning (0, "-mmultiple is not supported on little endian systems");
4193 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
4194 if ((rs6000_isa_flags_explicit
& OPTION_MASK_STRING
) != 0)
4195 warning (0, "-mstring is not supported on little endian systems");
4199 /* If little-endian, default to -mstrict-align on older processors.
4200 Testing for htm matches power8 and later. */
4201 if (!BYTES_BIG_ENDIAN
4202 && !(processor_target_table
[tune_index
].target_enable
& OPTION_MASK_HTM
))
4203 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
4205 /* -maltivec={le,be} implies -maltivec. */
4206 if (rs6000_altivec_element_order
!= 0)
4207 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
4209 /* Disallow -maltivec=le in big endian mode for now. This is not
4210 known to be useful for anyone. */
4211 if (BYTES_BIG_ENDIAN
&& rs6000_altivec_element_order
== 1)
4213 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4214 rs6000_altivec_element_order
= 0;
4217 /* Add some warnings for VSX. */
4220 const char *msg
= NULL
;
4221 if (!TARGET_HARD_FLOAT
|| !TARGET_SINGLE_FLOAT
|| !TARGET_DOUBLE_FLOAT
)
4223 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4224 msg
= N_("-mvsx requires hardware floating point");
4227 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4228 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4231 else if (TARGET_PAIRED_FLOAT
)
4232 msg
= N_("-mvsx and -mpaired are incompatible");
4233 else if (TARGET_AVOID_XFORM
> 0)
4234 msg
= N_("-mvsx needs indexed addressing");
4235 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
4236 & OPTION_MASK_ALTIVEC
))
4238 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4239 msg
= N_("-mvsx and -mno-altivec are incompatible");
4241 msg
= N_("-mno-altivec disables vsx");
4247 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
4248 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4252 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4253 the -mcpu setting to enable options that conflict. */
4254 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
4255 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
4256 | OPTION_MASK_ALTIVEC
4257 | OPTION_MASK_VSX
)) != 0)
4258 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
4259 | OPTION_MASK_DIRECT_MOVE
)
4260 & ~rs6000_isa_flags_explicit
);
4262 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4263 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
4265 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4266 off all of the options that depend on those flags. */
4267 ignore_masks
= rs6000_disable_incompatible_switches ();
4269 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4270 unless the user explicitly used the -mno-<option> to disable the code. */
4271 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_DFORM_SCALAR
4272 || TARGET_P9_DFORM_VECTOR
|| TARGET_P9_DFORM_BOTH
> 0)
4273 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4274 else if (TARGET_P9_MINMAX
)
4278 if (cpu_index
== PROCESSOR_POWER9
)
4280 /* legacy behavior: allow -mcpu-power9 with certain
4281 capabilities explicitly disabled. */
4282 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
4283 /* However, reject this automatic fix if certain
4284 capabilities required for TARGET_P9_MINMAX support
4285 have been explicitly disabled. */
4286 if (((OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4287 | OPTION_MASK_UPPER_REGS_DF
) & rs6000_isa_flags
)
4288 != (OPTION_MASK_VSX
| OPTION_MASK_UPPER_REGS_SF
4289 | OPTION_MASK_UPPER_REGS_DF
))
4290 error ("-mpower9-minmax incompatible with explicitly disabled options");
4293 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4294 "<xxx> less than power9");
4296 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
4297 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
4298 & rs6000_isa_flags_explicit
))
4299 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4300 were explicitly cleared. */
4301 error ("-mpower9-minmax incompatible with explicitly disabled options");
4303 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
4305 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
4306 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
4307 else if (TARGET_VSX
)
4308 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
4309 else if (TARGET_POPCNTD
)
4310 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
4311 else if (TARGET_DFP
)
4312 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
4313 else if (TARGET_CMPB
)
4314 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
4315 else if (TARGET_FPRND
)
4316 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
4317 else if (TARGET_POPCNTB
)
4318 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
4319 else if (TARGET_ALTIVEC
)
4320 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
4322 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
4324 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
4325 error ("-mcrypto requires -maltivec");
4326 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
4329 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
4331 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4332 error ("-mdirect-move requires -mvsx");
4333 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
4336 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
4338 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4339 error ("-mpower8-vector requires -maltivec");
4340 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4343 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
4345 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4346 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
4347 error ("-mpower8-vector requires -mvsx");
4348 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
4350 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
4351 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
4352 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4356 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4358 rs6000_isa_flags
|= OPTION_MASK_VSX
;
4359 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
4363 if (TARGET_VSX_TIMODE
&& !TARGET_VSX
)
4365 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
)
4366 error ("-mvsx-timode requires -mvsx");
4367 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4370 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
4372 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
4373 error ("-mhard-dfp requires -mhard-float");
4374 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
4377 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4378 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4379 set the individual option. */
4380 if (TARGET_UPPER_REGS
> 0)
4383 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4385 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DF
;
4386 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4389 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4391 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_DI
;
4392 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4394 if (TARGET_P8_VECTOR
4395 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4397 rs6000_isa_flags
|= OPTION_MASK_UPPER_REGS_SF
;
4398 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4401 else if (TARGET_UPPER_REGS
== 0)
4404 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
))
4406 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4407 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DF
;
4410 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
))
4412 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4413 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_DI
;
4415 if (TARGET_P8_VECTOR
4416 && !(rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
))
4418 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4419 rs6000_isa_flags_explicit
|= OPTION_MASK_UPPER_REGS_SF
;
4423 if (TARGET_UPPER_REGS_DF
&& !TARGET_VSX
)
4425 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4426 error ("-mupper-regs-df requires -mvsx");
4427 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DF
;
4430 if (TARGET_UPPER_REGS_DI
&& !TARGET_VSX
)
4432 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DI
)
4433 error ("-mupper-regs-di requires -mvsx");
4434 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_DI
;
4437 if (TARGET_UPPER_REGS_SF
&& !TARGET_P8_VECTOR
)
4439 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4440 error ("-mupper-regs-sf requires -mpower8-vector");
4441 rs6000_isa_flags
&= ~OPTION_MASK_UPPER_REGS_SF
;
4444 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4445 silently turn off quad memory mode. */
4446 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
4448 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4449 warning (0, N_("-mquad-memory requires 64-bit mode"));
4451 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
4452 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4454 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
4455 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
4458 /* Non-atomic quad memory load/store are disabled for little endian, since
4459 the words are reversed, but atomic operations can still be done by
4460 swapping the words. */
4461 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4463 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4464 warning (0, N_("-mquad-memory is not available in little endian mode"));
4466 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4469 /* Assume if the user asked for normal quad memory instructions, they want
4470 the atomic versions as well, unless they explicity told us not to use quad
4471 word atomic instructions. */
4472 if (TARGET_QUAD_MEMORY
4473 && !TARGET_QUAD_MEMORY_ATOMIC
4474 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4475 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4477 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4478 generating power8 instructions. */
4479 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4480 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4481 & OPTION_MASK_P8_FUSION
);
4483 /* Setting additional fusion flags turns on base fusion. */
4484 if (!TARGET_P8_FUSION
&& (TARGET_P8_FUSION_SIGN
|| TARGET_TOC_FUSION
))
4486 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4488 if (TARGET_P8_FUSION_SIGN
)
4489 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4491 if (TARGET_TOC_FUSION
)
4492 error ("-mtoc-fusion requires -mpower8-fusion");
4494 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4497 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4500 /* Power9 fusion is a superset over power8 fusion. */
4501 if (TARGET_P9_FUSION
&& !TARGET_P8_FUSION
)
4503 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4505 /* We prefer to not mention undocumented options in
4506 error messages. However, if users have managed to select
4507 power9-fusion without selecting power8-fusion, they
4508 already know about undocumented flags. */
4509 error ("-mpower9-fusion requires -mpower8-fusion");
4510 rs6000_isa_flags
&= ~OPTION_MASK_P9_FUSION
;
4513 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4516 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4517 generating power9 instructions. */
4518 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_FUSION
))
4519 rs6000_isa_flags
|= (processor_target_table
[tune_index
].target_enable
4520 & OPTION_MASK_P9_FUSION
);
4522 /* Power8 does not fuse sign extended loads with the addis. If we are
4523 optimizing at high levels for speed, convert a sign extended load into a
4524 zero extending load, and an explicit sign extension. */
4525 if (TARGET_P8_FUSION
4526 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4527 && optimize_function_for_speed_p (cfun
)
4529 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4531 /* TOC fusion requires 64-bit and medium/large code model. */
4532 if (TARGET_TOC_FUSION
&& !TARGET_POWERPC64
)
4534 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4535 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4536 warning (0, N_("-mtoc-fusion requires 64-bit"));
4539 if (TARGET_TOC_FUSION
&& (TARGET_CMODEL
== CMODEL_SMALL
))
4541 rs6000_isa_flags
&= ~OPTION_MASK_TOC_FUSION
;
4542 if ((rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
) != 0)
4543 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4546 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4548 if (TARGET_P8_FUSION
&& !TARGET_TOC_FUSION
&& TARGET_POWERPC64
4549 && (TARGET_CMODEL
!= CMODEL_SMALL
)
4550 && !(rs6000_isa_flags_explicit
& OPTION_MASK_TOC_FUSION
))
4551 rs6000_isa_flags
|= OPTION_MASK_TOC_FUSION
;
4553 /* ISA 3.0 vector instructions include ISA 2.07. */
4554 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4556 /* We prefer to not mention undocumented options in
4557 error messages. However, if users have managed to select
4558 power9-vector without selecting power8-vector, they
4559 already know about undocumented flags. */
4560 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4561 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4562 error ("-mpower9-vector requires -mpower8-vector");
4563 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4565 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4566 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4567 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4571 /* OPTION_MASK_P9_VECTOR is explicit and
4572 OPTION_MASK_P8_VECTOR is not explicit. */
4573 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4574 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4578 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4579 -mpower9-dform-vector. */
4580 if (TARGET_P9_DFORM_BOTH
> 0)
4582 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4583 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_VECTOR
;
4585 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4586 rs6000_isa_flags
|= OPTION_MASK_P9_DFORM_SCALAR
;
4588 else if (TARGET_P9_DFORM_BOTH
== 0)
4590 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
))
4591 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_VECTOR
;
4593 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
))
4594 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4597 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4598 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
) && !TARGET_P9_VECTOR
)
4600 /* We prefer to not mention undocumented options in
4601 error messages. However, if users have managed to select
4602 power9-dform without selecting power9-vector, they
4603 already know about undocumented flags. */
4604 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4605 && (rs6000_isa_flags_explicit
& (OPTION_MASK_P9_DFORM_SCALAR
4606 | OPTION_MASK_P9_DFORM_VECTOR
)))
4607 error ("-mpower9-dform requires -mpower9-vector");
4608 else if (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
)
4611 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4612 rs6000_isa_flags_explicit
|=
4613 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4617 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4618 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4620 rs6000_isa_flags
|= OPTION_MASK_P9_VECTOR
;
4621 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4625 if ((TARGET_P9_DFORM_SCALAR
|| TARGET_P9_DFORM_VECTOR
)
4626 && !TARGET_DIRECT_MOVE
)
4628 /* We prefer to not mention undocumented options in
4629 error messages. However, if users have managed to select
4630 power9-dform without selecting direct-move, they
4631 already know about undocumented flags. */
4632 if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
4633 && ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_VECTOR
) ||
4634 (rs6000_isa_flags_explicit
& OPTION_MASK_P9_DFORM_SCALAR
) ||
4635 (TARGET_P9_DFORM_BOTH
== 1)))
4636 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4637 " require -mdirect-move");
4638 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
) == 0)
4640 rs6000_isa_flags
|= OPTION_MASK_DIRECT_MOVE
;
4641 rs6000_isa_flags_explicit
|= OPTION_MASK_DIRECT_MOVE
;
4646 ~(OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4647 rs6000_isa_flags_explicit
|=
4648 (OPTION_MASK_P9_DFORM_SCALAR
| OPTION_MASK_P9_DFORM_VECTOR
);
4652 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_DF
)
4654 /* We prefer to not mention undocumented options in
4655 error messages. However, if users have managed to select
4656 power9-dform without selecting upper-regs-df, they
4657 already know about undocumented flags. */
4658 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_DF
)
4659 error ("-mpower9-dform requires -mupper-regs-df");
4660 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4663 if (TARGET_P9_DFORM_SCALAR
&& !TARGET_UPPER_REGS_SF
)
4665 if (rs6000_isa_flags_explicit
& OPTION_MASK_UPPER_REGS_SF
)
4666 error ("-mpower9-dform requires -mupper-regs-sf");
4667 rs6000_isa_flags
&= ~OPTION_MASK_P9_DFORM_SCALAR
;
4670 /* Enable LRA by default. */
4671 if ((rs6000_isa_flags_explicit
& OPTION_MASK_LRA
) == 0)
4672 rs6000_isa_flags
|= OPTION_MASK_LRA
;
4674 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4675 but do show up with -mno-lra. Given -mlra will become the default once
4676 PR 69847 is fixed, turn off the options with problems by default if
4677 -mno-lra was used, and warn if the user explicitly asked for the option.
4679 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4680 Enable -mvsx-timode by default if LRA and VSX. */
4683 if (TARGET_VSX_TIMODE
)
4685 if ((rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) != 0)
4686 warning (0, "-mvsx-timode might need -mlra");
4689 rs6000_isa_flags
&= ~OPTION_MASK_VSX_TIMODE
;
4695 if (TARGET_VSX
&& !TARGET_VSX_TIMODE
4696 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_TIMODE
) == 0)
4697 rs6000_isa_flags
|= OPTION_MASK_VSX_TIMODE
;
4700 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4701 support. If we only have ISA 2.06 support, and the user did not specify
4702 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4703 but we don't enable the full vectorization support */
4704 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4705 TARGET_ALLOW_MOVMISALIGN
= 1;
4707 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4709 if (TARGET_ALLOW_MOVMISALIGN
> 0
4710 && global_options_set
.x_TARGET_ALLOW_MOVMISALIGN
)
4711 error ("-mallow-movmisalign requires -mvsx");
4713 TARGET_ALLOW_MOVMISALIGN
= 0;
4716 /* Determine when unaligned vector accesses are permitted, and when
4717 they are preferred over masked Altivec loads. Note that if
4718 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4719 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4721 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4725 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4726 error ("-mefficient-unaligned-vsx requires -mvsx");
4728 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4731 else if (!TARGET_ALLOW_MOVMISALIGN
)
4733 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4734 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4736 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4740 /* Check whether we should allow small integers into VSX registers. We
4741 require direct move to prevent the register allocator from having to move
4742 variables through memory to do moves. SImode can be used on ISA 2.07,
4743 while HImode and QImode require ISA 3.0. */
4744 if (TARGET_VSX_SMALL_INTEGER
4745 && (!TARGET_DIRECT_MOVE
|| !TARGET_P8_VECTOR
|| !TARGET_UPPER_REGS_DI
))
4747 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX_SMALL_INTEGER
)
4748 error ("-mvsx-small-integer requires -mpower8-vector, "
4749 "-mupper-regs-di, and -mdirect-move");
4751 rs6000_isa_flags
&= ~OPTION_MASK_VSX_SMALL_INTEGER
;
4754 /* Set long double size before the IEEE 128-bit tests. */
4755 if (!global_options_set
.x_rs6000_long_double_type_size
)
4757 if (main_target_opt
!= NULL
4758 && (main_target_opt
->x_rs6000_long_double_type_size
4759 != RS6000_DEFAULT_LONG_DOUBLE_SIZE
))
4760 error ("target attribute or pragma changes long double size");
4762 rs6000_long_double_type_size
= RS6000_DEFAULT_LONG_DOUBLE_SIZE
;
4765 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4766 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4767 pick up this default. */
4768 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4769 if (!global_options_set
.x_rs6000_ieeequad
)
4770 rs6000_ieeequad
= 1;
4773 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4774 sytems, but don't enable the __float128 keyword. */
4775 if (TARGET_VSX
&& TARGET_LONG_DOUBLE_128
4776 && (TARGET_FLOAT128_ENABLE_TYPE
|| TARGET_IEEEQUAD
)
4777 && ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) == 0))
4778 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4780 /* IEEE 128-bit floating point requires VSX support. */
4783 if (TARGET_FLOAT128_KEYWORD
)
4785 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4786 error ("-mfloat128 requires VSX support");
4788 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4789 | OPTION_MASK_FLOAT128_KEYWORD
4790 | OPTION_MASK_FLOAT128_HW
);
4793 else if (TARGET_FLOAT128_TYPE
)
4795 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_TYPE
) != 0)
4796 error ("-mfloat128-type requires VSX support");
4798 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4799 | OPTION_MASK_FLOAT128_KEYWORD
4800 | OPTION_MASK_FLOAT128_HW
);
4804 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4805 128-bit floating point support to be enabled. */
4806 if (!TARGET_FLOAT128_TYPE
)
4808 if (TARGET_FLOAT128_KEYWORD
)
4810 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4812 error ("-mfloat128 requires -mfloat128-type");
4813 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4814 | OPTION_MASK_FLOAT128_KEYWORD
4815 | OPTION_MASK_FLOAT128_HW
);
4818 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_TYPE
;
4821 if (TARGET_FLOAT128_HW
)
4823 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4825 error ("-mfloat128-hardware requires -mfloat128-type");
4826 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4829 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_TYPE
4830 | OPTION_MASK_FLOAT128_KEYWORD
4831 | OPTION_MASK_FLOAT128_HW
);
4835 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4836 -mfloat128-hardware by default. However, don't enable the __float128
4837 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4838 -mfloat128 option as well if it was not already set. */
4839 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
4840 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4841 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4842 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4844 if (TARGET_FLOAT128_HW
4845 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4847 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4848 error ("-mfloat128-hardware requires full ISA 3.0 support");
4850 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4853 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4855 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4856 error ("-mfloat128-hardware requires -m64");
4858 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4861 if (TARGET_FLOAT128_HW
&& !TARGET_FLOAT128_KEYWORD
4862 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0
4863 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4864 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4866 /* Print the options after updating the defaults. */
4867 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4868 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4870 /* E500mc does "better" if we inline more aggressively. Respect the
4871 user's opinion, though. */
4872 if (rs6000_block_move_inline_limit
== 0
4873 && (rs6000_cpu
== PROCESSOR_PPCE500MC
4874 || rs6000_cpu
== PROCESSOR_PPCE500MC64
4875 || rs6000_cpu
== PROCESSOR_PPCE5500
4876 || rs6000_cpu
== PROCESSOR_PPCE6500
))
4877 rs6000_block_move_inline_limit
= 128;
4879 /* store_one_arg depends on expand_block_move to handle at least the
4880 size of reg_parm_stack_space. */
4881 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4882 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4886 /* If the appropriate debug option is enabled, replace the target hooks
4887 with debug versions that call the real version and then prints
4888 debugging information. */
4889 if (TARGET_DEBUG_COST
)
4891 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4892 targetm
.address_cost
= rs6000_debug_address_cost
;
4893 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4896 if (TARGET_DEBUG_ADDR
)
4898 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4899 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4900 rs6000_secondary_reload_class_ptr
4901 = rs6000_debug_secondary_reload_class
;
4902 rs6000_secondary_memory_needed_ptr
4903 = rs6000_debug_secondary_memory_needed
;
4904 rs6000_cannot_change_mode_class_ptr
4905 = rs6000_debug_cannot_change_mode_class
;
4906 rs6000_preferred_reload_class_ptr
4907 = rs6000_debug_preferred_reload_class
;
4908 rs6000_legitimize_reload_address_ptr
4909 = rs6000_debug_legitimize_reload_address
;
4910 rs6000_mode_dependent_address_ptr
4911 = rs6000_debug_mode_dependent_address
;
4914 if (rs6000_veclibabi_name
)
4916 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4917 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4920 error ("unknown vectorization library ABI type (%s) for "
4921 "-mveclibabi= switch", rs6000_veclibabi_name
);
4927 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4928 target attribute or pragma which automatically enables both options,
4929 unless the altivec ABI was set. This is set by default for 64-bit, but
4931 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4932 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
4933 | OPTION_MASK_FLOAT128_TYPE
4934 | OPTION_MASK_FLOAT128_KEYWORD
)
4935 & ~rs6000_isa_flags_explicit
);
4937 /* Enable Altivec ABI for AIX -maltivec. */
4938 if (TARGET_XCOFF
&& (TARGET_ALTIVEC
|| TARGET_VSX
))
4940 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4941 error ("target attribute or pragma changes AltiVec ABI");
4943 rs6000_altivec_abi
= 1;
4946 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4947 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4948 be explicitly overridden in either case. */
4951 if (!global_options_set
.x_rs6000_altivec_abi
4952 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4954 if (main_target_opt
!= NULL
&&
4955 !main_target_opt
->x_rs6000_altivec_abi
)
4956 error ("target attribute or pragma changes AltiVec ABI");
4958 rs6000_altivec_abi
= 1;
4962 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4963 So far, the only darwin64 targets are also MACH-O. */
4965 && DEFAULT_ABI
== ABI_DARWIN
4968 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4969 error ("target attribute or pragma changes darwin64 ABI");
4972 rs6000_darwin64_abi
= 1;
4973 /* Default to natural alignment, for better performance. */
4974 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4978 /* Place FP constants in the constant pool instead of TOC
4979 if section anchors enabled. */
4980 if (flag_section_anchors
4981 && !global_options_set
.x_TARGET_NO_FP_IN_TOC
)
4982 TARGET_NO_FP_IN_TOC
= 1;
4984 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4985 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4987 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4988 SUBTARGET_OVERRIDE_OPTIONS
;
4990 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4991 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4993 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4994 SUB3TARGET_OVERRIDE_OPTIONS
;
4997 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4998 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
5000 /* For the E500 family of cores, reset the single/double FP flags to let us
5001 check that they remain constant across attributes or pragmas. Also,
5002 clear a possible request for string instructions, not supported and which
5003 we might have silently queried above for -Os.
5005 For other families, clear ISEL in case it was set implicitly.
5010 case PROCESSOR_PPC8540
:
5011 case PROCESSOR_PPC8548
:
5012 case PROCESSOR_PPCE500MC
:
5013 case PROCESSOR_PPCE500MC64
:
5014 case PROCESSOR_PPCE5500
:
5015 case PROCESSOR_PPCE6500
:
5017 rs6000_single_float
= 0;
5018 rs6000_double_float
= 0;
5020 rs6000_isa_flags
&= ~OPTION_MASK_STRING
;
5026 if (have_cpu
&& !(rs6000_isa_flags_explicit
& OPTION_MASK_ISEL
))
5027 rs6000_isa_flags
&= ~OPTION_MASK_ISEL
;
5032 if (main_target_opt
)
5034 if (main_target_opt
->x_rs6000_single_float
!= rs6000_single_float
)
5035 error ("target attribute or pragma changes single precision floating "
5037 if (main_target_opt
->x_rs6000_double_float
!= rs6000_double_float
)
5038 error ("target attribute or pragma changes double precision floating "
5042 rs6000_always_hint
= (rs6000_cpu
!= PROCESSOR_POWER4
5043 && rs6000_cpu
!= PROCESSOR_POWER5
5044 && rs6000_cpu
!= PROCESSOR_POWER6
5045 && rs6000_cpu
!= PROCESSOR_POWER7
5046 && rs6000_cpu
!= PROCESSOR_POWER8
5047 && rs6000_cpu
!= PROCESSOR_POWER9
5048 && rs6000_cpu
!= PROCESSOR_PPCA2
5049 && rs6000_cpu
!= PROCESSOR_CELL
5050 && rs6000_cpu
!= PROCESSOR_PPC476
);
5051 rs6000_sched_groups
= (rs6000_cpu
== PROCESSOR_POWER4
5052 || rs6000_cpu
== PROCESSOR_POWER5
5053 || rs6000_cpu
== PROCESSOR_POWER7
5054 || rs6000_cpu
== PROCESSOR_POWER8
);
5055 rs6000_align_branch_targets
= (rs6000_cpu
== PROCESSOR_POWER4
5056 || rs6000_cpu
== PROCESSOR_POWER5
5057 || rs6000_cpu
== PROCESSOR_POWER6
5058 || rs6000_cpu
== PROCESSOR_POWER7
5059 || rs6000_cpu
== PROCESSOR_POWER8
5060 || rs6000_cpu
== PROCESSOR_POWER9
5061 || rs6000_cpu
== PROCESSOR_PPCE500MC
5062 || rs6000_cpu
== PROCESSOR_PPCE500MC64
5063 || rs6000_cpu
== PROCESSOR_PPCE5500
5064 || rs6000_cpu
== PROCESSOR_PPCE6500
);
5066 /* Allow debug switches to override the above settings. These are set to -1
5067 in rs6000.opt to indicate the user hasn't directly set the switch. */
5068 if (TARGET_ALWAYS_HINT
>= 0)
5069 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
5071 if (TARGET_SCHED_GROUPS
>= 0)
5072 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
5074 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
5075 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
5077 rs6000_sched_restricted_insns_priority
5078 = (rs6000_sched_groups
? 1 : 0);
5080 /* Handle -msched-costly-dep option. */
5081 rs6000_sched_costly_dep
5082 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
5084 if (rs6000_sched_costly_dep_str
)
5086 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
5087 rs6000_sched_costly_dep
= no_dep_costly
;
5088 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
5089 rs6000_sched_costly_dep
= all_deps_costly
;
5090 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
5091 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
5092 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
5093 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
5095 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
5096 atoi (rs6000_sched_costly_dep_str
));
5099 /* Handle -minsert-sched-nops option. */
5100 rs6000_sched_insert_nops
5101 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
5103 if (rs6000_sched_insert_nops_str
)
5105 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
5106 rs6000_sched_insert_nops
= sched_finish_none
;
5107 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
5108 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
5109 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
5110 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
5112 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
5113 atoi (rs6000_sched_insert_nops_str
));
5116 /* Handle stack protector */
5117 if (!global_options_set
.x_rs6000_stack_protector_guard
)
5118 #ifdef TARGET_THREAD_SSP_OFFSET
5119 rs6000_stack_protector_guard
= SSP_TLS
;
5121 rs6000_stack_protector_guard
= SSP_GLOBAL
;
5124 #ifdef TARGET_THREAD_SSP_OFFSET
5125 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
5126 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
5129 if (global_options_set
.x_rs6000_stack_protector_guard_offset_str
)
5132 const char *str
= rs6000_stack_protector_guard_offset_str
;
5135 long offset
= strtol (str
, &endp
, 0);
5136 if (!*str
|| *endp
|| errno
)
5137 error ("%qs is not a valid number "
5138 "in -mstack-protector-guard-offset=", str
);
5140 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
5141 || (TARGET_64BIT
&& (offset
& 3)))
5142 error ("%qs is not a valid offset "
5143 "in -mstack-protector-guard-offset=", str
);
5145 rs6000_stack_protector_guard_offset
= offset
;
5148 if (global_options_set
.x_rs6000_stack_protector_guard_reg_str
)
5150 const char *str
= rs6000_stack_protector_guard_reg_str
;
5151 int reg
= decode_reg_name (str
);
5153 if (!IN_RANGE (reg
, 1, 31))
5154 error ("%qs is not a valid base register "
5155 "in -mstack-protector-guard-reg=", str
);
5157 rs6000_stack_protector_guard_reg
= reg
;
5160 if (rs6000_stack_protector_guard
== SSP_TLS
5161 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
5162 error ("-mstack-protector-guard=tls needs a valid base register");
5166 #ifdef TARGET_REGNAMES
5167 /* If the user desires alternate register names, copy in the
5168 alternate names now. */
5169 if (TARGET_REGNAMES
)
5170 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
5173 /* Set aix_struct_return last, after the ABI is determined.
5174 If -maix-struct-return or -msvr4-struct-return was explicitly
5175 used, don't override with the ABI default. */
5176 if (!global_options_set
.x_aix_struct_return
)
5177 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
5180 /* IBM XL compiler defaults to unsigned bitfields. */
5181 if (TARGET_XL_COMPAT
)
5182 flag_signed_bitfields
= 0;
5185 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
5186 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
5188 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
5190 /* We can only guarantee the availability of DI pseudo-ops when
5191 assembling for 64-bit targets. */
5194 targetm
.asm_out
.aligned_op
.di
= NULL
;
5195 targetm
.asm_out
.unaligned_op
.di
= NULL
;
5199 /* Set branch target alignment, if not optimizing for size. */
5202 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5203 aligned 8byte to avoid misprediction by the branch predictor. */
5204 if (rs6000_cpu
== PROCESSOR_TITAN
5205 || rs6000_cpu
== PROCESSOR_CELL
)
5207 if (align_functions
<= 0)
5208 align_functions
= 8;
5209 if (align_jumps
<= 0)
5211 if (align_loops
<= 0)
5214 if (rs6000_align_branch_targets
)
5216 if (align_functions
<= 0)
5217 align_functions
= 16;
5218 if (align_jumps
<= 0)
5220 if (align_loops
<= 0)
5222 can_override_loop_align
= 1;
5226 if (align_jumps_max_skip
<= 0)
5227 align_jumps_max_skip
= 15;
5228 if (align_loops_max_skip
<= 0)
5229 align_loops_max_skip
= 15;
5232 /* Arrange to save and restore machine status around nested functions. */
5233 init_machine_status
= rs6000_init_machine_status
;
5235 /* We should always be splitting complex arguments, but we can't break
5236 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5237 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
5238 targetm
.calls
.split_complex_arg
= NULL
;
5240 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5241 if (DEFAULT_ABI
== ABI_AIX
)
5242 targetm
.calls
.custom_function_descriptors
= 0;
5245 /* Initialize rs6000_cost with the appropriate target costs. */
5247 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
5251 case PROCESSOR_RS64A
:
5252 rs6000_cost
= &rs64a_cost
;
5255 case PROCESSOR_MPCCORE
:
5256 rs6000_cost
= &mpccore_cost
;
5259 case PROCESSOR_PPC403
:
5260 rs6000_cost
= &ppc403_cost
;
5263 case PROCESSOR_PPC405
:
5264 rs6000_cost
= &ppc405_cost
;
5267 case PROCESSOR_PPC440
:
5268 rs6000_cost
= &ppc440_cost
;
5271 case PROCESSOR_PPC476
:
5272 rs6000_cost
= &ppc476_cost
;
5275 case PROCESSOR_PPC601
:
5276 rs6000_cost
= &ppc601_cost
;
5279 case PROCESSOR_PPC603
:
5280 rs6000_cost
= &ppc603_cost
;
5283 case PROCESSOR_PPC604
:
5284 rs6000_cost
= &ppc604_cost
;
5287 case PROCESSOR_PPC604e
:
5288 rs6000_cost
= &ppc604e_cost
;
5291 case PROCESSOR_PPC620
:
5292 rs6000_cost
= &ppc620_cost
;
5295 case PROCESSOR_PPC630
:
5296 rs6000_cost
= &ppc630_cost
;
5299 case PROCESSOR_CELL
:
5300 rs6000_cost
= &ppccell_cost
;
5303 case PROCESSOR_PPC750
:
5304 case PROCESSOR_PPC7400
:
5305 rs6000_cost
= &ppc750_cost
;
5308 case PROCESSOR_PPC7450
:
5309 rs6000_cost
= &ppc7450_cost
;
5312 case PROCESSOR_PPC8540
:
5313 case PROCESSOR_PPC8548
:
5314 rs6000_cost
= &ppc8540_cost
;
5317 case PROCESSOR_PPCE300C2
:
5318 case PROCESSOR_PPCE300C3
:
5319 rs6000_cost
= &ppce300c2c3_cost
;
5322 case PROCESSOR_PPCE500MC
:
5323 rs6000_cost
= &ppce500mc_cost
;
5326 case PROCESSOR_PPCE500MC64
:
5327 rs6000_cost
= &ppce500mc64_cost
;
5330 case PROCESSOR_PPCE5500
:
5331 rs6000_cost
= &ppce5500_cost
;
5334 case PROCESSOR_PPCE6500
:
5335 rs6000_cost
= &ppce6500_cost
;
5338 case PROCESSOR_TITAN
:
5339 rs6000_cost
= &titan_cost
;
5342 case PROCESSOR_POWER4
:
5343 case PROCESSOR_POWER5
:
5344 rs6000_cost
= &power4_cost
;
5347 case PROCESSOR_POWER6
:
5348 rs6000_cost
= &power6_cost
;
5351 case PROCESSOR_POWER7
:
5352 rs6000_cost
= &power7_cost
;
5355 case PROCESSOR_POWER8
:
5356 rs6000_cost
= &power8_cost
;
5359 case PROCESSOR_POWER9
:
5360 rs6000_cost
= &power9_cost
;
5363 case PROCESSOR_PPCA2
:
5364 rs6000_cost
= &ppca2_cost
;
5373 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
5374 rs6000_cost
->simultaneous_prefetches
,
5375 global_options
.x_param_values
,
5376 global_options_set
.x_param_values
);
5377 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, rs6000_cost
->l1_cache_size
,
5378 global_options
.x_param_values
,
5379 global_options_set
.x_param_values
);
5380 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
5381 rs6000_cost
->cache_line_size
,
5382 global_options
.x_param_values
,
5383 global_options_set
.x_param_values
);
5384 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, rs6000_cost
->l2_cache_size
,
5385 global_options
.x_param_values
,
5386 global_options_set
.x_param_values
);
5388 /* Increase loop peeling limits based on performance analysis. */
5389 maybe_set_param_value (PARAM_MAX_PEELED_INSNS
, 400,
5390 global_options
.x_param_values
,
5391 global_options_set
.x_param_values
);
5392 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS
, 400,
5393 global_options
.x_param_values
,
5394 global_options_set
.x_param_values
);
5396 /* Use the 'model' -fsched-pressure algorithm by default. */
5397 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
,
5398 SCHED_PRESSURE_MODEL
,
5399 global_options
.x_param_values
,
5400 global_options_set
.x_param_values
);
5402 /* If using typedef char *va_list, signal that
5403 __builtin_va_start (&ap, 0) can be optimized to
5404 ap = __builtin_next_arg (0). */
5405 if (DEFAULT_ABI
!= ABI_V4
)
5406 targetm
.expand_builtin_va_start
= NULL
;
5409 /* Set up single/double float flags.
5410 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5411 then set both flags. */
5412 if (TARGET_HARD_FLOAT
&& rs6000_single_float
== 0 && rs6000_double_float
== 0)
5413 rs6000_single_float
= rs6000_double_float
= 1;
5415 /* If not explicitly specified via option, decide whether to generate indexed
5416 load/store instructions. A value of -1 indicates that the
5417 initial value of this variable has not been overwritten. During
5418 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5419 if (TARGET_AVOID_XFORM
== -1)
5420 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5421 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5422 need indexed accesses and the type used is the scalar type of the element
5423 being loaded or stored. */
5424 TARGET_AVOID_XFORM
= (rs6000_cpu
== PROCESSOR_POWER6
&& TARGET_CMPB
5425 && !TARGET_ALTIVEC
);
5427 /* Set the -mrecip options. */
5428 if (rs6000_recip_name
)
5430 char *p
= ASTRDUP (rs6000_recip_name
);
5432 unsigned int mask
, i
;
5435 while ((q
= strtok (p
, ",")) != NULL
)
5446 if (!strcmp (q
, "default"))
5447 mask
= ((TARGET_RECIP_PRECISION
)
5448 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
5451 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
5452 if (!strcmp (q
, recip_options
[i
].string
))
5454 mask
= recip_options
[i
].mask
;
5458 if (i
== ARRAY_SIZE (recip_options
))
5460 error ("unknown option for -mrecip=%s", q
);
5468 rs6000_recip_control
&= ~mask
;
5470 rs6000_recip_control
|= mask
;
5474 /* Set the builtin mask of the various options used that could affect which
5475 builtins were used. In the past we used target_flags, but we've run out
5476 of bits, and some options like PAIRED are no longer in target_flags. */
5477 rs6000_builtin_mask
= rs6000_builtin_mask_calculate ();
5478 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
5479 rs6000_print_builtin_options (stderr
, 0, "builtin mask",
5480 rs6000_builtin_mask
);
5482 /* Initialize all of the registers. */
5483 rs6000_init_hard_regno_mode_ok (global_init_p
);
5485 /* Save the initial options in case the user does function specific options */
5487 target_option_default_node
= target_option_current_node
5488 = build_target_option_node (&global_options
);
5490 /* If not explicitly specified via option, decide whether to generate the
5491 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5492 if (TARGET_LINK_STACK
== -1)
5493 SET_TARGET_LINK_STACK (rs6000_cpu
== PROCESSOR_PPC476
&& flag_pic
);
5498 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5499 define the target cpu type. */
5502 rs6000_option_override (void)
5504 (void) rs6000_option_override_internal (true);
5508 /* Implement targetm.vectorize.builtin_mask_for_load. */
5510 rs6000_builtin_mask_for_load (void)
5512 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5513 if ((TARGET_ALTIVEC
&& !TARGET_VSX
)
5514 || (TARGET_VSX
&& !TARGET_EFFICIENT_UNALIGNED_VSX
))
5515 return altivec_builtin_mask_for_load
;
5520 /* Implement LOOP_ALIGN. */
5522 rs6000_loop_align (rtx label
)
5527 /* Don't override loop alignment if -falign-loops was specified. */
5528 if (!can_override_loop_align
)
5529 return align_loops_log
;
5531 bb
= BLOCK_FOR_INSN (label
);
5532 ninsns
= num_loop_insns(bb
->loop_father
);
5534 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5535 if (ninsns
> 4 && ninsns
<= 8
5536 && (rs6000_cpu
== PROCESSOR_POWER4
5537 || rs6000_cpu
== PROCESSOR_POWER5
5538 || rs6000_cpu
== PROCESSOR_POWER6
5539 || rs6000_cpu
== PROCESSOR_POWER7
5540 || rs6000_cpu
== PROCESSOR_POWER8
5541 || rs6000_cpu
== PROCESSOR_POWER9
))
5544 return align_loops_log
;
5547 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5549 rs6000_loop_align_max_skip (rtx_insn
*label
)
5551 return (1 << rs6000_loop_align (label
)) - 1;
5554 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5555 after applying N number of iterations. This routine does not determine
5556 how may iterations are required to reach desired alignment. */
5559 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5566 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
5569 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
5579 /* Assuming that all other types are naturally aligned. CHECKME! */
5584 /* Return true if the vector misalignment factor is supported by the
5587 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
5594 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5597 /* Return if movmisalign pattern is not supported for this mode. */
5598 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
5601 if (misalignment
== -1)
5603 /* Misalignment factor is unknown at compile time but we know
5604 it's word aligned. */
5605 if (rs6000_vector_alignment_reachable (type
, is_packed
))
5607 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
5609 if (element_size
== 64 || element_size
== 32)
5616 /* VSX supports word-aligned vector. */
5617 if (misalignment
% 4 == 0)
5623 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5625 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5626 tree vectype
, int misalign
)
5631 switch (type_of_cost
)
5641 case cond_branch_not_taken
:
5650 case vec_promote_demote
:
5656 case cond_branch_taken
:
5659 case unaligned_load
:
5660 if (TARGET_P9_VECTOR
)
5663 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5666 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5668 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5670 /* Double word aligned. */
5678 /* Double word aligned. */
5682 /* Unknown misalignment. */
5695 /* Misaligned loads are not supported. */
5700 case unaligned_store
:
5701 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5704 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5706 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5708 /* Double word aligned. */
5716 /* Double word aligned. */
5720 /* Unknown misalignment. */
5733 /* Misaligned stores are not supported. */
5739 /* This is a rough approximation assuming non-constant elements
5740 constructed into a vector via element insertion. FIXME:
5741 vec_construct is not granular enough for uniformly good
5742 decisions. If the initialization is a splat, this is
5743 cheaper than we estimate. Improve this someday. */
5744 elem_type
= TREE_TYPE (vectype
);
5745 /* 32-bit vectors loaded into registers are stored as double
5746 precision, so we need 2 permutes, 2 converts, and 1 merge
5747 to construct a vector of short floats from them. */
5748 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5749 && TYPE_PRECISION (elem_type
) == 32)
5751 /* On POWER9, integer vector types are built up in GPRs and then
5752 use a direct move (2 cycles). For POWER8 this is even worse,
5753 as we need two direct moves and a merge, and the direct moves
5755 else if (INTEGRAL_TYPE_P (elem_type
))
5757 if (TARGET_P9_VECTOR
)
5758 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5760 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 11;
5763 /* V2DFmode doesn't need a direct move. */
5771 /* Implement targetm.vectorize.preferred_simd_mode. */
5774 rs6000_preferred_simd_mode (machine_mode mode
)
5783 if (TARGET_ALTIVEC
|| TARGET_VSX
)
5800 if (TARGET_PAIRED_FLOAT
5806 typedef struct _rs6000_cost_data
5808 struct loop
*loop_info
;
5812 /* Test for likely overcommitment of vector hardware resources. If a
5813 loop iteration is relatively large, and too large a percentage of
5814 instructions in the loop are vectorized, the cost model may not
5815 adequately reflect delays from unavailable vector resources.
5816 Penalize the loop body cost for this case. */
5819 rs6000_density_test (rs6000_cost_data
*data
)
5821 const int DENSITY_PCT_THRESHOLD
= 85;
5822 const int DENSITY_SIZE_THRESHOLD
= 70;
5823 const int DENSITY_PENALTY
= 10;
5824 struct loop
*loop
= data
->loop_info
;
5825 basic_block
*bbs
= get_loop_body (loop
);
5826 int nbbs
= loop
->num_nodes
;
5827 int vec_cost
= data
->cost
[vect_body
], not_vec_cost
= 0;
5830 for (i
= 0; i
< nbbs
; i
++)
5832 basic_block bb
= bbs
[i
];
5833 gimple_stmt_iterator gsi
;
5835 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5837 gimple
*stmt
= gsi_stmt (gsi
);
5838 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5840 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5841 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5847 density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5849 if (density_pct
> DENSITY_PCT_THRESHOLD
5850 && vec_cost
+ not_vec_cost
> DENSITY_SIZE_THRESHOLD
)
5852 data
->cost
[vect_body
] = vec_cost
* (100 + DENSITY_PENALTY
) / 100;
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_NOTE
, vect_location
,
5855 "density %d%%, cost %d exceeds threshold, penalizing "
5856 "loop body cost by %d%%", density_pct
,
5857 vec_cost
+ not_vec_cost
, DENSITY_PENALTY
);
5861 /* Implement targetm.vectorize.init_cost. */
5863 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5864 instruction is needed by the vectorization. */
5865 static bool rs6000_vect_nonmem
;
5868 rs6000_init_cost (struct loop
*loop_info
)
5870 rs6000_cost_data
*data
= XNEW (struct _rs6000_cost_data
);
5871 data
->loop_info
= loop_info
;
5872 data
->cost
[vect_prologue
] = 0;
5873 data
->cost
[vect_body
] = 0;
5874 data
->cost
[vect_epilogue
] = 0;
5875 rs6000_vect_nonmem
= false;
5879 /* Implement targetm.vectorize.add_stmt_cost. */
5882 rs6000_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5883 struct _stmt_vec_info
*stmt_info
, int misalign
,
5884 enum vect_cost_model_location where
)
5886 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5887 unsigned retval
= 0;
5889 if (flag_vect_cost_model
)
5891 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5892 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5894 /* Statements in an inner loop relative to the loop being
5895 vectorized are weighted more heavily. The value here is
5896 arbitrary and could potentially be improved with analysis. */
5897 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5898 count
*= 50; /* FIXME. */
5900 retval
= (unsigned) (count
* stmt_cost
);
5901 cost_data
->cost
[where
] += retval
;
5903 /* Check whether we're doing something other than just a copy loop.
5904 Not all such loops may be profitably vectorized; see
5905 rs6000_finish_cost. */
5906 if ((kind
== vec_to_scalar
|| kind
== vec_perm
5907 || kind
== vec_promote_demote
|| kind
== vec_construct
5908 || kind
== scalar_to_vec
)
5909 || (where
== vect_body
&& kind
== vector_stmt
))
5910 rs6000_vect_nonmem
= true;
5916 /* Implement targetm.vectorize.finish_cost. */
5919 rs6000_finish_cost (void *data
, unsigned *prologue_cost
,
5920 unsigned *body_cost
, unsigned *epilogue_cost
)
5922 rs6000_cost_data
*cost_data
= (rs6000_cost_data
*) data
;
5924 if (cost_data
->loop_info
)
5925 rs6000_density_test (cost_data
);
5927 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5928 that require versioning for any reason. The vectorization is at
5929 best a wash inside the loop, and the versioning checks make
5930 profitability highly unlikely and potentially quite harmful. */
5931 if (cost_data
->loop_info
)
5933 loop_vec_info vec_info
= loop_vec_info_for_loop (cost_data
->loop_info
);
5934 if (!rs6000_vect_nonmem
5935 && LOOP_VINFO_VECT_FACTOR (vec_info
) == 2
5936 && LOOP_REQUIRES_VERSIONING (vec_info
))
5937 cost_data
->cost
[vect_body
] += 10000;
5940 *prologue_cost
= cost_data
->cost
[vect_prologue
];
5941 *body_cost
= cost_data
->cost
[vect_body
];
5942 *epilogue_cost
= cost_data
->cost
[vect_epilogue
];
5945 /* Implement targetm.vectorize.destroy_cost_data. */
5948 rs6000_destroy_cost_data (void *data
)
5953 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5954 library with vectorized intrinsics. */
5957 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5961 const char *suffix
= NULL
;
5962 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5965 machine_mode el_mode
, in_mode
;
5968 /* Libmass is suitable for unsafe math only as it does not correctly support
5969 parts of IEEE with the required precision such as denormals. Only support
5970 it if we have VSX to use the simd d2 or f4 functions.
5971 XXX: Add variable length support. */
5972 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5975 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5976 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5977 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5978 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5979 if (el_mode
!= in_mode
6015 if (el_mode
== DFmode
&& n
== 2)
6017 bdecl
= mathfn_built_in (double_type_node
, fn
);
6018 suffix
= "d2"; /* pow -> powd2 */
6020 else if (el_mode
== SFmode
&& n
== 4)
6022 bdecl
= mathfn_built_in (float_type_node
, fn
);
6023 suffix
= "4"; /* powf -> powf4 */
6035 gcc_assert (suffix
!= NULL
);
6036 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
6040 strcpy (name
, bname
+ sizeof ("__builtin_") - 1);
6041 strcat (name
, suffix
);
6044 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
6045 else if (n_args
== 2)
6046 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
6050 /* Build a function declaration for the vectorized function. */
6051 new_fndecl
= build_decl (BUILTINS_LOCATION
,
6052 FUNCTION_DECL
, get_identifier (name
), fntype
);
6053 TREE_PUBLIC (new_fndecl
) = 1;
6054 DECL_EXTERNAL (new_fndecl
) = 1;
6055 DECL_IS_NOVOPS (new_fndecl
) = 1;
6056 TREE_READONLY (new_fndecl
) = 1;
6061 /* Returns a function decl for a vectorized version of the builtin function
6062 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6063 if it is not available. */
6066 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
6069 machine_mode in_mode
, out_mode
;
6072 if (TARGET_DEBUG_BUILTIN
)
6073 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6074 combined_fn_name (combined_fn (fn
)),
6075 GET_MODE_NAME (TYPE_MODE (type_out
)),
6076 GET_MODE_NAME (TYPE_MODE (type_in
)));
6078 if (TREE_CODE (type_out
) != VECTOR_TYPE
6079 || TREE_CODE (type_in
) != VECTOR_TYPE
6080 || !TARGET_VECTORIZE_BUILTINS
)
6083 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6084 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6085 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6086 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6091 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6092 && out_mode
== DFmode
&& out_n
== 2
6093 && in_mode
== DFmode
&& in_n
== 2)
6094 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNDP
];
6095 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6096 && out_mode
== SFmode
&& out_n
== 4
6097 && in_mode
== SFmode
&& in_n
== 4)
6098 return rs6000_builtin_decls
[VSX_BUILTIN_CPSGNSP
];
6099 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6100 && out_mode
== SFmode
&& out_n
== 4
6101 && in_mode
== SFmode
&& in_n
== 4)
6102 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_COPYSIGN_V4SF
];
6105 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6106 && out_mode
== DFmode
&& out_n
== 2
6107 && in_mode
== DFmode
&& in_n
== 2)
6108 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIP
];
6109 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6110 && out_mode
== SFmode
&& out_n
== 4
6111 && in_mode
== SFmode
&& in_n
== 4)
6112 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIP
];
6113 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6114 && out_mode
== SFmode
&& out_n
== 4
6115 && in_mode
== SFmode
&& in_n
== 4)
6116 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIP
];
6119 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6120 && out_mode
== DFmode
&& out_n
== 2
6121 && in_mode
== DFmode
&& in_n
== 2)
6122 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIM
];
6123 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6124 && out_mode
== SFmode
&& out_n
== 4
6125 && in_mode
== SFmode
&& in_n
== 4)
6126 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIM
];
6127 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6128 && out_mode
== SFmode
&& out_n
== 4
6129 && in_mode
== SFmode
&& in_n
== 4)
6130 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIM
];
6133 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6134 && out_mode
== DFmode
&& out_n
== 2
6135 && in_mode
== DFmode
&& in_n
== 2)
6136 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDDP
];
6137 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6138 && out_mode
== SFmode
&& out_n
== 4
6139 && in_mode
== SFmode
&& in_n
== 4)
6140 return rs6000_builtin_decls
[VSX_BUILTIN_XVMADDSP
];
6141 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6142 && out_mode
== SFmode
&& out_n
== 4
6143 && in_mode
== SFmode
&& in_n
== 4)
6144 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VMADDFP
];
6147 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6148 && out_mode
== DFmode
&& out_n
== 2
6149 && in_mode
== DFmode
&& in_n
== 2)
6150 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIZ
];
6151 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6152 && out_mode
== SFmode
&& out_n
== 4
6153 && in_mode
== SFmode
&& in_n
== 4)
6154 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIZ
];
6155 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
6156 && out_mode
== SFmode
&& out_n
== 4
6157 && in_mode
== SFmode
&& in_n
== 4)
6158 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRFIZ
];
6161 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6162 && flag_unsafe_math_optimizations
6163 && out_mode
== DFmode
&& out_n
== 2
6164 && in_mode
== DFmode
&& in_n
== 2)
6165 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPI
];
6166 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6167 && flag_unsafe_math_optimizations
6168 && out_mode
== SFmode
&& out_n
== 4
6169 && in_mode
== SFmode
&& in_n
== 4)
6170 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPI
];
6173 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6174 && !flag_trapping_math
6175 && out_mode
== DFmode
&& out_n
== 2
6176 && in_mode
== DFmode
&& in_n
== 2)
6177 return rs6000_builtin_decls
[VSX_BUILTIN_XVRDPIC
];
6178 if (VECTOR_UNIT_VSX_P (V4SFmode
)
6179 && !flag_trapping_math
6180 && out_mode
== SFmode
&& out_n
== 4
6181 && in_mode
== SFmode
&& in_n
== 4)
6182 return rs6000_builtin_decls
[VSX_BUILTIN_XVRSPIC
];
6188 /* Generate calls to libmass if appropriate. */
6189 if (rs6000_veclib_handler
)
6190 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
6195 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6198 rs6000_builtin_md_vectorized_function (tree fndecl
, tree type_out
,
6201 machine_mode in_mode
, out_mode
;
6204 if (TARGET_DEBUG_BUILTIN
)
6205 fprintf (stderr
, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6206 IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
6207 GET_MODE_NAME (TYPE_MODE (type_out
)),
6208 GET_MODE_NAME (TYPE_MODE (type_in
)));
6210 if (TREE_CODE (type_out
) != VECTOR_TYPE
6211 || TREE_CODE (type_in
) != VECTOR_TYPE
6212 || !TARGET_VECTORIZE_BUILTINS
)
6215 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
6216 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
6217 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
6218 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
6220 enum rs6000_builtins fn
6221 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
6224 case RS6000_BUILTIN_RSQRTF
:
6225 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6226 && out_mode
== SFmode
&& out_n
== 4
6227 && in_mode
== SFmode
&& in_n
== 4)
6228 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRSQRTFP
];
6230 case RS6000_BUILTIN_RSQRT
:
6231 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6232 && out_mode
== DFmode
&& out_n
== 2
6233 && in_mode
== DFmode
&& in_n
== 2)
6234 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
6236 case RS6000_BUILTIN_RECIPF
:
6237 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
)
6238 && out_mode
== SFmode
&& out_n
== 4
6239 && in_mode
== SFmode
&& in_n
== 4)
6240 return rs6000_builtin_decls
[ALTIVEC_BUILTIN_VRECIPFP
];
6242 case RS6000_BUILTIN_RECIP
:
6243 if (VECTOR_UNIT_VSX_P (V2DFmode
)
6244 && out_mode
== DFmode
&& out_n
== 2
6245 && in_mode
== DFmode
&& in_n
== 2)
6246 return rs6000_builtin_decls
[VSX_BUILTIN_RECIP_V2DF
];
6254 /* Default CPU string for rs6000*_file_start functions. */
6255 static const char *rs6000_default_cpu
;
6257 /* Do anything needed at the start of the asm file. */
6260 rs6000_file_start (void)
6263 const char *start
= buffer
;
6264 FILE *file
= asm_out_file
;
6266 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
6268 default_file_start ();
6270 if (flag_verbose_asm
)
6272 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
6274 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
6276 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
6280 if (global_options_set
.x_rs6000_cpu_index
)
6282 fprintf (file
, "%s -mcpu=%s", start
,
6283 processor_target_table
[rs6000_cpu_index
].name
);
6287 if (global_options_set
.x_rs6000_tune_index
)
6289 fprintf (file
, "%s -mtune=%s", start
,
6290 processor_target_table
[rs6000_tune_index
].name
);
6294 if (PPC405_ERRATUM77
)
6296 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
6300 #ifdef USING_ELFOS_H
6301 switch (rs6000_sdata
)
6303 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
6304 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
6305 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
6306 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6309 if (rs6000_sdata
&& g_switch_value
)
6311 fprintf (file
, "%s -G %d", start
,
6321 #ifdef USING_ELFOS_H
6322 if (!(rs6000_default_cpu
&& rs6000_default_cpu
[0])
6323 && !global_options_set
.x_rs6000_cpu_index
)
6325 fputs ("\t.machine ", asm_out_file
);
6326 if ((rs6000_isa_flags
& OPTION_MASK_MODULO
) != 0)
6327 fputs ("power9\n", asm_out_file
);
6328 else if ((rs6000_isa_flags
& OPTION_MASK_DIRECT_MOVE
) != 0)
6329 fputs ("power8\n", asm_out_file
);
6330 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTD
) != 0)
6331 fputs ("power7\n", asm_out_file
);
6332 else if ((rs6000_isa_flags
& OPTION_MASK_CMPB
) != 0)
6333 fputs ("power6\n", asm_out_file
);
6334 else if ((rs6000_isa_flags
& OPTION_MASK_POPCNTB
) != 0)
6335 fputs ("power5\n", asm_out_file
);
6336 else if ((rs6000_isa_flags
& OPTION_MASK_MFCRF
) != 0)
6337 fputs ("power4\n", asm_out_file
);
6338 else if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) != 0)
6339 fputs ("ppc64\n", asm_out_file
);
6341 fputs ("ppc\n", asm_out_file
);
6345 if (DEFAULT_ABI
== ABI_ELFv2
)
6346 fprintf (file
, "\t.abiversion 2\n");
6350 /* Return nonzero if this function is known to have a null epilogue. */
6353 direct_return (void)
6355 if (reload_completed
)
6357 rs6000_stack_t
*info
= rs6000_stack_info ();
6359 if (info
->first_gp_reg_save
== 32
6360 && info
->first_fp_reg_save
== 64
6361 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6362 && ! info
->lr_save_p
6363 && ! info
->cr_save_p
6364 && info
->vrsave_size
== 0
6372 /* Return the number of instructions it takes to form a constant in an
6373 integer register. */
6376 num_insns_constant_wide (HOST_WIDE_INT value
)
6378 /* signed constant loadable with addi */
6379 if (((unsigned HOST_WIDE_INT
) value
+ 0x8000) < 0x10000)
6382 /* constant loadable with addis */
6383 else if ((value
& 0xffff) == 0
6384 && (value
>> 31 == -1 || value
>> 31 == 0))
6387 else if (TARGET_POWERPC64
)
6389 HOST_WIDE_INT low
= ((value
& 0xffffffff) ^ 0x80000000) - 0x80000000;
6390 HOST_WIDE_INT high
= value
>> 31;
6392 if (high
== 0 || high
== -1)
6398 return num_insns_constant_wide (high
) + 1;
6400 return num_insns_constant_wide (low
) + 1;
6402 return (num_insns_constant_wide (high
)
6403 + num_insns_constant_wide (low
) + 1);
6411 num_insns_constant (rtx op
, machine_mode mode
)
6413 HOST_WIDE_INT low
, high
;
6415 switch (GET_CODE (op
))
6418 if ((INTVAL (op
) >> 31) != 0 && (INTVAL (op
) >> 31) != -1
6419 && rs6000_is_valid_and_mask (op
, mode
))
6422 return num_insns_constant_wide (INTVAL (op
));
6424 case CONST_WIDE_INT
:
6427 int ins
= CONST_WIDE_INT_NUNITS (op
) - 1;
6428 for (i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6429 ins
+= num_insns_constant_wide (CONST_WIDE_INT_ELT (op
, i
));
6434 if (mode
== SFmode
|| mode
== SDmode
)
6438 if (DECIMAL_FLOAT_MODE_P (mode
))
6439 REAL_VALUE_TO_TARGET_DECIMAL32
6440 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6442 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6443 return num_insns_constant_wide ((HOST_WIDE_INT
) l
);
6447 if (DECIMAL_FLOAT_MODE_P (mode
))
6448 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6450 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op
), l
);
6451 high
= l
[WORDS_BIG_ENDIAN
== 0];
6452 low
= l
[WORDS_BIG_ENDIAN
!= 0];
6455 return (num_insns_constant_wide (low
)
6456 + num_insns_constant_wide (high
));
6459 if ((high
== 0 && low
>= 0)
6460 || (high
== -1 && low
< 0))
6461 return num_insns_constant_wide (low
);
6463 else if (rs6000_is_valid_and_mask (op
, mode
))
6467 return num_insns_constant_wide (high
) + 1;
6470 return (num_insns_constant_wide (high
)
6471 + num_insns_constant_wide (low
) + 1);
6479 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6480 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6481 corresponding element of the vector, but for V4SFmode and V2SFmode,
6482 the corresponding "float" is interpreted as an SImode integer. */
6485 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6489 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6490 gcc_assert (GET_MODE (op
) != V2DImode
6491 && GET_MODE (op
) != V2DFmode
);
6493 tmp
= CONST_VECTOR_ELT (op
, elt
);
6494 if (GET_MODE (op
) == V4SFmode
6495 || GET_MODE (op
) == V2SFmode
)
6496 tmp
= gen_lowpart (SImode
, tmp
);
6497 return INTVAL (tmp
);
6500 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6501 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6502 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6503 all items are set to the same value and contain COPIES replicas of the
6504 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6505 operand and the others are set to the value of the operand's msb. */
6508 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6510 machine_mode mode
= GET_MODE (op
);
6511 machine_mode inner
= GET_MODE_INNER (mode
);
6519 HOST_WIDE_INT splat_val
;
6520 HOST_WIDE_INT msb_val
;
6522 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6525 nunits
= GET_MODE_NUNITS (mode
);
6526 bitsize
= GET_MODE_BITSIZE (inner
);
6527 mask
= GET_MODE_MASK (inner
);
6529 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6531 msb_val
= val
>= 0 ? 0 : -1;
6533 /* Construct the value to be splatted, if possible. If not, return 0. */
6534 for (i
= 2; i
<= copies
; i
*= 2)
6536 HOST_WIDE_INT small_val
;
6538 small_val
= splat_val
>> bitsize
;
6540 if (splat_val
!= ((HOST_WIDE_INT
)
6541 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6542 | (small_val
& mask
)))
6544 splat_val
= small_val
;
6547 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6548 if (EASY_VECTOR_15 (splat_val
))
6551 /* Also check if we can splat, and then add the result to itself. Do so if
6552 the value is positive, of if the splat instruction is using OP's mode;
6553 for splat_val < 0, the splat and the add should use the same mode. */
6554 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6555 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6558 /* Also check if are loading up the most significant bit which can be done by
6559 loading up -1 and shifting the value left by -1. */
6560 else if (EASY_VECTOR_MSB (splat_val
, inner
))
6566 /* Check if VAL is present in every STEP-th element, and the
6567 other elements are filled with its most significant bit. */
6568 for (i
= 1; i
< nunits
; ++i
)
6570 HOST_WIDE_INT desired_val
;
6571 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6572 if ((i
& (step
- 1)) == 0)
6575 desired_val
= msb_val
;
6577 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6584 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6585 instruction, filling in the bottom elements with 0 or -1.
6587 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6588 for the number of zeroes to shift in, or negative for the number of 0xff
6591 OP is a CONST_VECTOR. */
6594 vspltis_shifted (rtx op
)
6596 machine_mode mode
= GET_MODE (op
);
6597 machine_mode inner
= GET_MODE_INNER (mode
);
6605 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6608 /* We need to create pseudo registers to do the shift, so don't recognize
6609 shift vector constants after reload. */
6610 if (!can_create_pseudo_p ())
6613 nunits
= GET_MODE_NUNITS (mode
);
6614 mask
= GET_MODE_MASK (inner
);
6616 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6618 /* Check if the value can really be the operand of a vspltis[bhw]. */
6619 if (EASY_VECTOR_15 (val
))
6622 /* Also check if we are loading up the most significant bit which can be done
6623 by loading up -1 and shifting the value left by -1. */
6624 else if (EASY_VECTOR_MSB (val
, inner
))
6630 /* Check if VAL is present in every STEP-th element until we find elements
6631 that are 0 or all 1 bits. */
6632 for (i
= 1; i
< nunits
; ++i
)
6634 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6635 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6637 /* If the value isn't the splat value, check for the remaining elements
6643 for (j
= i
+1; j
< nunits
; ++j
)
6645 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6646 if (const_vector_elt_as_int (op
, elt2
) != 0)
6650 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6653 else if ((elt_val
& mask
) == mask
)
6655 for (j
= i
+1; j
< nunits
; ++j
)
6657 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6658 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6662 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6670 /* If all elements are equal, we don't need to do VLSDOI. */
6675 /* Return true if OP is of the given MODE and can be synthesized
6676 with a vspltisb, vspltish or vspltisw. */
6679 easy_altivec_constant (rtx op
, machine_mode mode
)
6681 unsigned step
, copies
;
6683 if (mode
== VOIDmode
)
6684 mode
= GET_MODE (op
);
6685 else if (mode
!= GET_MODE (op
))
6688 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6690 if (mode
== V2DFmode
)
6691 return zero_constant (op
, mode
);
6693 else if (mode
== V2DImode
)
6695 if (GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
6696 || GET_CODE (CONST_VECTOR_ELT (op
, 1)) != CONST_INT
)
6699 if (zero_constant (op
, mode
))
6702 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6703 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6709 /* V1TImode is a special container for TImode. Ignore for now. */
6710 else if (mode
== V1TImode
)
6713 /* Start with a vspltisw. */
6714 step
= GET_MODE_NUNITS (mode
) / 4;
6717 if (vspltis_constant (op
, step
, copies
))
6720 /* Then try with a vspltish. */
6726 if (vspltis_constant (op
, step
, copies
))
6729 /* And finally a vspltisb. */
6735 if (vspltis_constant (op
, step
, copies
))
6738 if (vspltis_shifted (op
) != 0)
6744 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6745 result is OP. Abort if it is not possible. */
6748 gen_easy_altivec_constant (rtx op
)
6750 machine_mode mode
= GET_MODE (op
);
6751 int nunits
= GET_MODE_NUNITS (mode
);
6752 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6753 unsigned step
= nunits
/ 4;
6754 unsigned copies
= 1;
6756 /* Start with a vspltisw. */
6757 if (vspltis_constant (op
, step
, copies
))
6758 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6760 /* Then try with a vspltish. */
6766 if (vspltis_constant (op
, step
, copies
))
6767 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6769 /* And finally a vspltisb. */
6775 if (vspltis_constant (op
, step
, copies
))
6776 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6781 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6782 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6784 Return the number of instructions needed (1 or 2) into the address pointed
6787 Return the constant that is being split via CONSTANT_PTR. */
6790 xxspltib_constant_p (rtx op
,
6795 size_t nunits
= GET_MODE_NUNITS (mode
);
6797 HOST_WIDE_INT value
;
6800 /* Set the returned values to out of bound values. */
6801 *num_insns_ptr
= -1;
6802 *constant_ptr
= 256;
6804 if (!TARGET_P9_VECTOR
)
6807 if (mode
== VOIDmode
)
6808 mode
= GET_MODE (op
);
6810 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6813 /* Handle (vec_duplicate <constant>). */
6814 if (GET_CODE (op
) == VEC_DUPLICATE
)
6816 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6817 && mode
!= V2DImode
)
6820 element
= XEXP (op
, 0);
6821 if (!CONST_INT_P (element
))
6824 value
= INTVAL (element
);
6825 if (!IN_RANGE (value
, -128, 127))
6829 /* Handle (const_vector [...]). */
6830 else if (GET_CODE (op
) == CONST_VECTOR
)
6832 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6833 && mode
!= V2DImode
)
6836 element
= CONST_VECTOR_ELT (op
, 0);
6837 if (!CONST_INT_P (element
))
6840 value
= INTVAL (element
);
6841 if (!IN_RANGE (value
, -128, 127))
6844 for (i
= 1; i
< nunits
; i
++)
6846 element
= CONST_VECTOR_ELT (op
, i
);
6847 if (!CONST_INT_P (element
))
6850 if (value
!= INTVAL (element
))
6855 /* Handle integer constants being loaded into the upper part of the VSX
6856 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6857 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6858 else if (CONST_INT_P (op
))
6860 if (!SCALAR_INT_MODE_P (mode
))
6863 value
= INTVAL (op
);
6864 if (!IN_RANGE (value
, -128, 127))
6867 if (!IN_RANGE (value
, -1, 0))
6869 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6872 if (EASY_VECTOR_15 (value
))
6880 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6881 sign extend. Special case 0/-1 to allow getting any VSX register instead
6882 of an Altivec register. */
6883 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6884 && EASY_VECTOR_15 (value
))
6887 /* Return # of instructions and the constant byte for XXSPLTIB. */
6888 if (mode
== V16QImode
)
6891 else if (IN_RANGE (value
, -1, 0))
6897 *constant_ptr
= (int) value
;
6902 output_vec_const_move (rtx
*operands
)
6910 mode
= GET_MODE (dest
);
6914 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6915 int xxspltib_value
= 256;
6918 if (zero_constant (vec
, mode
))
6920 if (TARGET_P9_VECTOR
)
6921 return "xxspltib %x0,0";
6923 else if (dest_vmx_p
)
6924 return "vspltisw %0,0";
6927 return "xxlxor %x0,%x0,%x0";
6930 if (all_ones_constant (vec
, mode
))
6932 if (TARGET_P9_VECTOR
)
6933 return "xxspltib %x0,255";
6935 else if (dest_vmx_p
)
6936 return "vspltisw %0,-1";
6938 else if (TARGET_P8_VECTOR
)
6939 return "xxlorc %x0,%x0,%x0";
6945 if (TARGET_P9_VECTOR
6946 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6950 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6951 return "xxspltib %x0,%2";
6962 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6963 if (zero_constant (vec
, mode
))
6964 return "vspltisw %0,0";
6966 if (all_ones_constant (vec
, mode
))
6967 return "vspltisw %0,-1";
6969 /* Do we need to construct a value using VSLDOI? */
6970 shift
= vspltis_shifted (vec
);
6974 splat_vec
= gen_easy_altivec_constant (vec
);
6975 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6976 operands
[1] = XEXP (splat_vec
, 0);
6977 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6980 switch (GET_MODE (splat_vec
))
6983 return "vspltisw %0,%1";
6986 return "vspltish %0,%1";
6989 return "vspltisb %0,%1";
6999 /* Initialize TARGET of vector PAIRED to VALS. */
7002 paired_expand_vector_init (rtx target
, rtx vals
)
7004 machine_mode mode
= GET_MODE (target
);
7005 int n_elts
= GET_MODE_NUNITS (mode
);
7007 rtx x
, new_rtx
, tmp
, constant_op
, op1
, op2
;
7010 for (i
= 0; i
< n_elts
; ++i
)
7012 x
= XVECEXP (vals
, 0, i
);
7013 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7018 /* Load from constant pool. */
7019 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
7025 /* The vector is initialized only with non-constants. */
7026 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, XVECEXP (vals
, 0, 0),
7027 XVECEXP (vals
, 0, 1));
7029 emit_move_insn (target
, new_rtx
);
7033 /* One field is non-constant and the other one is a constant. Load the
7034 constant from the constant pool and use ps_merge instruction to
7035 construct the whole vector. */
7036 op1
= XVECEXP (vals
, 0, 0);
7037 op2
= XVECEXP (vals
, 0, 1);
7039 constant_op
= (CONSTANT_P (op1
)) ? op1
: op2
;
7041 tmp
= gen_reg_rtx (GET_MODE (constant_op
));
7042 emit_move_insn (tmp
, constant_op
);
7044 if (CONSTANT_P (op1
))
7045 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, tmp
, op2
);
7047 new_rtx
= gen_rtx_VEC_CONCAT (V2SFmode
, op1
, tmp
);
7049 emit_move_insn (target
, new_rtx
);
7053 paired_expand_vector_move (rtx operands
[])
7055 rtx op0
= operands
[0], op1
= operands
[1];
7057 emit_move_insn (op0
, op1
);
7060 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7061 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7062 operands for the relation operation COND. This is a recursive
7066 paired_emit_vector_compare (enum rtx_code rcode
,
7067 rtx dest
, rtx op0
, rtx op1
,
7068 rtx cc_op0
, rtx cc_op1
)
7070 rtx tmp
= gen_reg_rtx (V2SFmode
);
7073 gcc_assert (TARGET_PAIRED_FLOAT
);
7074 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
7080 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7084 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7085 emit_insn (gen_selv2sf4 (dest
, tmp
, op0
, op1
, CONST0_RTX (SFmode
)));
7089 paired_emit_vector_compare (GE
, dest
, op0
, op1
, cc_op1
, cc_op0
);
7092 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7095 tmp1
= gen_reg_rtx (V2SFmode
);
7096 max
= gen_reg_rtx (V2SFmode
);
7097 min
= gen_reg_rtx (V2SFmode
);
7098 gen_reg_rtx (V2SFmode
);
7100 emit_insn (gen_subv2sf3 (tmp
, cc_op0
, cc_op1
));
7101 emit_insn (gen_selv2sf4
7102 (max
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7103 emit_insn (gen_subv2sf3 (tmp
, cc_op1
, cc_op0
));
7104 emit_insn (gen_selv2sf4
7105 (min
, tmp
, cc_op0
, cc_op1
, CONST0_RTX (SFmode
)));
7106 emit_insn (gen_subv2sf3 (tmp1
, min
, max
));
7107 emit_insn (gen_selv2sf4 (dest
, tmp1
, op0
, op1
, CONST0_RTX (SFmode
)));
7110 paired_emit_vector_compare (EQ
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7113 paired_emit_vector_compare (LE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7116 paired_emit_vector_compare (LT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7119 paired_emit_vector_compare (GE
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7122 paired_emit_vector_compare (GT
, dest
, op1
, op0
, cc_op0
, cc_op1
);
7131 /* Emit vector conditional expression.
7132 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7133 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7136 paired_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
7137 rtx cond
, rtx cc_op0
, rtx cc_op1
)
7139 enum rtx_code rcode
= GET_CODE (cond
);
7141 if (!TARGET_PAIRED_FLOAT
)
7144 paired_emit_vector_compare (rcode
, dest
, op1
, op2
, cc_op0
, cc_op1
);
7149 /* Initialize vector TARGET to VALS. */
7152 rs6000_expand_vector_init (rtx target
, rtx vals
)
7154 machine_mode mode
= GET_MODE (target
);
7155 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7156 int n_elts
= GET_MODE_NUNITS (mode
);
7157 int n_var
= 0, one_var
= -1;
7158 bool all_same
= true, all_const_zero
= true;
7162 for (i
= 0; i
< n_elts
; ++i
)
7164 x
= XVECEXP (vals
, 0, i
);
7165 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
7166 ++n_var
, one_var
= i
;
7167 else if (x
!= CONST0_RTX (inner_mode
))
7168 all_const_zero
= false;
7170 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7176 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7177 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
7178 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
7180 /* Zero register. */
7181 emit_move_insn (target
, CONST0_RTX (mode
));
7184 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
7186 /* Splat immediate. */
7187 emit_insn (gen_rtx_SET (target
, const_vec
));
7192 /* Load from constant pool. */
7193 emit_move_insn (target
, const_vec
);
7198 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7199 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
7203 size_t num_elements
= all_same
? 1 : 2;
7204 for (i
= 0; i
< num_elements
; i
++)
7206 op
[i
] = XVECEXP (vals
, 0, i
);
7207 /* Just in case there is a SUBREG with a smaller mode, do a
7209 if (GET_MODE (op
[i
]) != inner_mode
)
7211 rtx tmp
= gen_reg_rtx (inner_mode
);
7212 convert_move (tmp
, op
[i
], 0);
7215 /* Allow load with splat double word. */
7216 else if (MEM_P (op
[i
]))
7219 op
[i
] = force_reg (inner_mode
, op
[i
]);
7221 else if (!REG_P (op
[i
]))
7222 op
[i
] = force_reg (inner_mode
, op
[i
]);
7227 if (mode
== V2DFmode
)
7228 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
7230 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
7234 if (mode
== V2DFmode
)
7235 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
7237 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
7242 /* Special case initializing vector int if we are on 64-bit systems with
7243 direct move or we have the ISA 3.0 instructions. */
7244 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
7245 && TARGET_DIRECT_MOVE_64BIT
)
7249 rtx element0
= XVECEXP (vals
, 0, 0);
7250 if (MEM_P (element0
))
7251 element0
= rs6000_address_for_fpconvert (element0
);
7253 element0
= force_reg (SImode
, element0
);
7255 if (TARGET_P9_VECTOR
)
7256 emit_insn (gen_vsx_splat_v4si (target
, element0
));
7259 rtx tmp
= gen_reg_rtx (DImode
);
7260 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
7261 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
7270 for (i
= 0; i
< 4; i
++)
7272 elements
[i
] = XVECEXP (vals
, 0, i
);
7273 if (!CONST_INT_P (elements
[i
]) && !REG_P (elements
[i
]))
7274 elements
[i
] = copy_to_mode_reg (SImode
, elements
[i
]);
7277 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
7278 elements
[2], elements
[3]));
7283 /* With single precision floating point on VSX, know that internally single
7284 precision is actually represented as a double, and either make 2 V2DF
7285 vectors, and convert these vectors to single precision, or do one
7286 conversion, and splat the result to the other elements. */
7287 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
7291 rtx element0
= XVECEXP (vals
, 0, 0);
7293 if (TARGET_P9_VECTOR
)
7295 if (MEM_P (element0
))
7296 element0
= rs6000_address_for_fpconvert (element0
);
7298 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
7303 rtx freg
= gen_reg_rtx (V4SFmode
);
7304 rtx sreg
= force_reg (SFmode
, element0
);
7305 rtx cvt
= (TARGET_XSCVDPSPN
7306 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
7307 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
7310 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
7316 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
7317 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
7318 rtx flt_even
= gen_reg_rtx (V4SFmode
);
7319 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
7320 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7321 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7322 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7323 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7325 /* Use VMRGEW if we can instead of doing a permute. */
7326 if (TARGET_P8_VECTOR
)
7328 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op2
));
7329 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op1
, op3
));
7330 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7331 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7332 if (BYTES_BIG_ENDIAN
)
7333 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_even
, flt_odd
));
7335 emit_insn (gen_p8_vmrgew_v4sf_direct (target
, flt_odd
, flt_even
));
7339 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7340 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7341 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7342 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7343 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7349 /* Special case initializing vector short/char that are splats if we are on
7350 64-bit systems with direct move. */
7351 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7352 && (mode
== V16QImode
|| mode
== V8HImode
))
7354 rtx op0
= XVECEXP (vals
, 0, 0);
7355 rtx di_tmp
= gen_reg_rtx (DImode
);
7358 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7360 if (mode
== V16QImode
)
7362 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7363 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7367 if (mode
== V8HImode
)
7369 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7370 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7375 /* Store value to stack temp. Load vector element. Splat. However, splat
7376 of 64-bit items is not supported on Altivec. */
7377 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7379 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7380 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7381 XVECEXP (vals
, 0, 0));
7382 x
= gen_rtx_UNSPEC (VOIDmode
,
7383 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7384 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7386 gen_rtx_SET (target
, mem
),
7388 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7389 gen_rtx_PARALLEL (VOIDmode
,
7390 gen_rtvec (1, const0_rtx
)));
7391 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7395 /* One field is non-constant. Load constant then overwrite
7399 rtx copy
= copy_rtx (vals
);
7401 /* Load constant part of vector, substitute neighboring value for
7403 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7404 rs6000_expand_vector_init (target
, copy
);
7406 /* Insert variable. */
7407 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
), one_var
);
7411 /* Construct the vector in memory one field at a time
7412 and load the whole vector. */
7413 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7414 for (i
= 0; i
< n_elts
; i
++)
7415 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7416 i
* GET_MODE_SIZE (inner_mode
)),
7417 XVECEXP (vals
, 0, i
));
7418 emit_move_insn (target
, mem
);
7421 /* Set field ELT of TARGET to VAL. */
7424 rs6000_expand_vector_set (rtx target
, rtx val
, int elt
)
7426 machine_mode mode
= GET_MODE (target
);
7427 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7428 rtx reg
= gen_reg_rtx (mode
);
7430 int width
= GET_MODE_SIZE (inner_mode
);
7433 val
= force_reg (GET_MODE (val
), val
);
7435 if (VECTOR_MEM_VSX_P (mode
))
7437 rtx insn
= NULL_RTX
;
7438 rtx elt_rtx
= GEN_INT (elt
);
7440 if (mode
== V2DFmode
)
7441 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7443 else if (mode
== V2DImode
)
7444 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7446 else if (TARGET_P9_VECTOR
&& TARGET_VSX_SMALL_INTEGER
7447 && TARGET_UPPER_REGS_DI
&& TARGET_POWERPC64
)
7449 if (mode
== V4SImode
)
7450 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7451 else if (mode
== V8HImode
)
7452 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7453 else if (mode
== V16QImode
)
7454 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7455 else if (mode
== V4SFmode
)
7456 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7466 /* Simplify setting single element vectors like V1TImode. */
7467 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
) && elt
== 0)
7469 emit_move_insn (target
, gen_lowpart (mode
, val
));
7473 /* Load single variable value. */
7474 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7475 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7476 x
= gen_rtx_UNSPEC (VOIDmode
,
7477 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7478 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7480 gen_rtx_SET (reg
, mem
),
7483 /* Linear sequence. */
7484 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7485 for (i
= 0; i
< 16; ++i
)
7486 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7488 /* Set permute mask to insert element into target. */
7489 for (i
= 0; i
< width
; ++i
)
7490 XVECEXP (mask
, 0, elt
*width
+ i
)
7491 = GEN_INT (i
+ 0x10);
7492 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7494 if (BYTES_BIG_ENDIAN
)
7495 x
= gen_rtx_UNSPEC (mode
,
7496 gen_rtvec (3, target
, reg
,
7497 force_reg (V16QImode
, x
)),
7501 if (TARGET_P9_VECTOR
)
7502 x
= gen_rtx_UNSPEC (mode
,
7503 gen_rtvec (3, target
, reg
,
7504 force_reg (V16QImode
, x
)),
7508 /* Invert selector. We prefer to generate VNAND on P8 so
7509 that future fusion opportunities can kick in, but must
7510 generate VNOR elsewhere. */
7511 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7512 rtx iorx
= (TARGET_P8_VECTOR
7513 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7514 : gen_rtx_AND (V16QImode
, notx
, notx
));
7515 rtx tmp
= gen_reg_rtx (V16QImode
);
7516 emit_insn (gen_rtx_SET (tmp
, iorx
));
7518 /* Permute with operands reversed and adjusted selector. */
7519 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7524 emit_insn (gen_rtx_SET (target
, x
));
7527 /* Extract field ELT from VEC into TARGET. */
7530 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7532 machine_mode mode
= GET_MODE (vec
);
7533 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7536 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7543 gcc_assert (INTVAL (elt
) == 0 && inner_mode
== TImode
);
7544 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7547 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7550 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7553 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7556 if (TARGET_DIRECT_MOVE_64BIT
)
7558 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7564 if (TARGET_DIRECT_MOVE_64BIT
)
7566 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7572 if (TARGET_DIRECT_MOVE_64BIT
)
7574 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7580 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7581 && TARGET_DIRECT_MOVE_64BIT
)
7583 if (GET_MODE (elt
) != DImode
)
7585 rtx tmp
= gen_reg_rtx (DImode
);
7586 convert_move (tmp
, elt
, 0);
7589 else if (!REG_P (elt
))
7590 elt
= force_reg (DImode
, elt
);
7595 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7599 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7603 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7607 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7611 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7615 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7623 gcc_assert (CONST_INT_P (elt
));
7625 /* Allocate mode-sized buffer. */
7626 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7628 emit_move_insn (mem
, vec
);
7630 /* Add offset to field within buffer matching vector element. */
7631 mem
= adjust_address_nv (mem
, inner_mode
,
7632 INTVAL (elt
) * GET_MODE_SIZE (inner_mode
));
7634 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7637 /* Helper function to return the register number of a RTX. */
7639 regno_or_subregno (rtx op
)
7643 else if (SUBREG_P (op
))
7644 return subreg_regno (op
);
7649 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7650 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7651 temporary (BASE_TMP) to fixup the address. Return the new memory address
7652 that is valid for reads or writes to a given register (SCALAR_REG). */
7655 rs6000_adjust_vec_address (rtx scalar_reg
,
7659 machine_mode scalar_mode
)
7661 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7662 rtx addr
= XEXP (mem
, 0);
7667 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7668 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7670 /* Calculate what we need to add to the address to get the element
7672 if (CONST_INT_P (element
))
7673 element_offset
= GEN_INT (INTVAL (element
) * scalar_size
);
7676 int byte_shift
= exact_log2 (scalar_size
);
7677 gcc_assert (byte_shift
>= 0);
7679 if (byte_shift
== 0)
7680 element_offset
= element
;
7684 if (TARGET_POWERPC64
)
7685 emit_insn (gen_ashldi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7687 emit_insn (gen_ashlsi3 (base_tmp
, element
, GEN_INT (byte_shift
)));
7689 element_offset
= base_tmp
;
7693 /* Create the new address pointing to the element within the vector. If we
7694 are adding 0, we don't have to change the address. */
7695 if (element_offset
== const0_rtx
)
7698 /* A simple indirect address can be converted into a reg + offset
7700 else if (REG_P (addr
) || SUBREG_P (addr
))
7701 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7703 /* Optimize D-FORM addresses with constant offset with a constant element, to
7704 include the element offset in the address directly. */
7705 else if (GET_CODE (addr
) == PLUS
)
7707 rtx op0
= XEXP (addr
, 0);
7708 rtx op1
= XEXP (addr
, 1);
7711 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7712 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7714 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7715 rtx offset_rtx
= GEN_INT (offset
);
7717 if (IN_RANGE (offset
, -32768, 32767)
7718 && (scalar_size
< 8 || (offset
& 0x3) == 0))
7719 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7722 emit_move_insn (base_tmp
, offset_rtx
);
7723 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7728 bool op1_reg_p
= (REG_P (op1
) || SUBREG_P (op1
));
7729 bool ele_reg_p
= (REG_P (element_offset
) || SUBREG_P (element_offset
));
7731 /* Note, ADDI requires the register being added to be a base
7732 register. If the register was R0, load it up into the temporary
7735 && (ele_reg_p
|| reg_or_subregno (op1
) != FIRST_GPR_REGNO
))
7737 insn
= gen_add3_insn (base_tmp
, op1
, element_offset
);
7738 gcc_assert (insn
!= NULL_RTX
);
7743 && reg_or_subregno (element_offset
) != FIRST_GPR_REGNO
)
7745 insn
= gen_add3_insn (base_tmp
, element_offset
, op1
);
7746 gcc_assert (insn
!= NULL_RTX
);
7752 emit_move_insn (base_tmp
, op1
);
7753 emit_insn (gen_add2_insn (base_tmp
, element_offset
));
7756 new_addr
= gen_rtx_PLUS (Pmode
, op0
, base_tmp
);
7762 emit_move_insn (base_tmp
, addr
);
7763 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7766 /* If we have a PLUS, we need to see whether the particular register class
7767 allows for D-FORM or X-FORM addressing. */
7768 if (GET_CODE (new_addr
) == PLUS
)
7770 rtx op1
= XEXP (new_addr
, 1);
7771 addr_mask_type addr_mask
;
7772 int scalar_regno
= regno_or_subregno (scalar_reg
);
7774 gcc_assert (scalar_regno
< FIRST_PSEUDO_REGISTER
);
7775 if (INT_REGNO_P (scalar_regno
))
7776 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_GPR
];
7778 else if (FP_REGNO_P (scalar_regno
))
7779 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_FPR
];
7781 else if (ALTIVEC_REGNO_P (scalar_regno
))
7782 addr_mask
= reg_addr
[scalar_mode
].addr_mask
[RELOAD_REG_VMX
];
7787 if (REG_P (op1
) || SUBREG_P (op1
))
7788 valid_addr_p
= (addr_mask
& RELOAD_REG_INDEXED
) != 0;
7790 valid_addr_p
= (addr_mask
& RELOAD_REG_OFFSET
) != 0;
7793 else if (REG_P (new_addr
) || SUBREG_P (new_addr
))
7794 valid_addr_p
= true;
7797 valid_addr_p
= false;
7801 emit_move_insn (base_tmp
, new_addr
);
7802 new_addr
= base_tmp
;
7805 return change_address (mem
, scalar_mode
, new_addr
);
7808 /* Split a variable vec_extract operation into the component instructions. */
7811 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7814 machine_mode mode
= GET_MODE (src
);
7815 machine_mode scalar_mode
= GET_MODE (dest
);
7816 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7817 int byte_shift
= exact_log2 (scalar_size
);
7819 gcc_assert (byte_shift
>= 0);
7821 /* If we are given a memory address, optimize to load just the element. We
7822 don't have to adjust the vector element number on little endian
7826 gcc_assert (REG_P (tmp_gpr
));
7827 emit_move_insn (dest
, rs6000_adjust_vec_address (dest
, src
, element
,
7828 tmp_gpr
, scalar_mode
));
7832 else if (REG_P (src
) || SUBREG_P (src
))
7834 int bit_shift
= byte_shift
+ 3;
7836 int dest_regno
= regno_or_subregno (dest
);
7837 int src_regno
= regno_or_subregno (src
);
7838 int element_regno
= regno_or_subregno (element
);
7840 gcc_assert (REG_P (tmp_gpr
));
7842 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7843 a general purpose register. */
7844 if (TARGET_P9_VECTOR
7845 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7846 && INT_REGNO_P (dest_regno
)
7847 && ALTIVEC_REGNO_P (src_regno
)
7848 && INT_REGNO_P (element_regno
))
7850 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7851 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7853 if (mode
== V16QImode
)
7854 emit_insn (VECTOR_ELT_ORDER_BIG
7855 ? gen_vextublx (dest_si
, element_si
, src
)
7856 : gen_vextubrx (dest_si
, element_si
, src
));
7858 else if (mode
== V8HImode
)
7860 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7861 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7862 emit_insn (VECTOR_ELT_ORDER_BIG
7863 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7864 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7870 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7871 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7872 emit_insn (VECTOR_ELT_ORDER_BIG
7873 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7874 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7881 gcc_assert (REG_P (tmp_altivec
));
7883 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7884 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7885 will shift the element into the upper position (adding 3 to convert a
7886 byte shift into a bit shift). */
7887 if (scalar_size
== 8)
7889 if (!VECTOR_ELT_ORDER_BIG
)
7891 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7897 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7899 emit_insn (gen_rtx_SET (tmp_gpr
,
7900 gen_rtx_AND (DImode
,
7901 gen_rtx_ASHIFT (DImode
,
7908 if (!VECTOR_ELT_ORDER_BIG
)
7910 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7912 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7913 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7919 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7922 /* Get the value into the lower byte of the Altivec register where VSLO
7924 if (TARGET_P9_VECTOR
)
7925 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7926 else if (can_create_pseudo_p ())
7927 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
7930 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7931 emit_move_insn (tmp_di
, tmp_gpr
);
7932 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
7935 /* Do the VSLO to get the value into the final location. */
7939 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
7943 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
7948 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7949 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
7950 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7951 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7954 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
7962 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
7963 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
7964 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
7965 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
7967 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
7968 emit_insn (gen_ashrdi3 (tmp_gpr_di
, tmp_gpr_di
,
7969 GEN_INT (64 - (8 * scalar_size
))));
7983 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7984 two SImode values. */
7987 rs6000_split_v4si_init_di_reg (rtx dest
, rtx si1
, rtx si2
, rtx tmp
)
7989 const unsigned HOST_WIDE_INT mask_32bit
= HOST_WIDE_INT_C (0xffffffff);
7991 if (CONST_INT_P (si1
) && CONST_INT_P (si2
))
7993 unsigned HOST_WIDE_INT const1
= (UINTVAL (si1
) & mask_32bit
) << 32;
7994 unsigned HOST_WIDE_INT const2
= UINTVAL (si2
) & mask_32bit
;
7996 emit_move_insn (dest
, GEN_INT (const1
| const2
));
8000 /* Put si1 into upper 32-bits of dest. */
8001 if (CONST_INT_P (si1
))
8002 emit_move_insn (dest
, GEN_INT ((UINTVAL (si1
) & mask_32bit
) << 32));
8005 /* Generate RLDIC. */
8006 rtx si1_di
= gen_rtx_REG (DImode
, regno_or_subregno (si1
));
8007 rtx shift_rtx
= gen_rtx_ASHIFT (DImode
, si1_di
, GEN_INT (32));
8008 rtx mask_rtx
= GEN_INT (mask_32bit
<< 32);
8009 rtx and_rtx
= gen_rtx_AND (DImode
, shift_rtx
, mask_rtx
);
8010 gcc_assert (!reg_overlap_mentioned_p (dest
, si1
));
8011 emit_insn (gen_rtx_SET (dest
, and_rtx
));
8014 /* Put si2 into the temporary. */
8015 gcc_assert (!reg_overlap_mentioned_p (dest
, tmp
));
8016 if (CONST_INT_P (si2
))
8017 emit_move_insn (tmp
, GEN_INT (UINTVAL (si2
) & mask_32bit
));
8019 emit_insn (gen_zero_extendsidi2 (tmp
, si2
));
8021 /* Combine the two parts. */
8022 emit_insn (gen_iordi3 (dest
, dest
, tmp
));
8026 /* Split a V4SI initialization. */
8029 rs6000_split_v4si_init (rtx operands
[])
8031 rtx dest
= operands
[0];
8033 /* Destination is a GPR, build up the two DImode parts in place. */
8034 if (REG_P (dest
) || SUBREG_P (dest
))
8036 int d_regno
= regno_or_subregno (dest
);
8037 rtx scalar1
= operands
[1];
8038 rtx scalar2
= operands
[2];
8039 rtx scalar3
= operands
[3];
8040 rtx scalar4
= operands
[4];
8041 rtx tmp1
= operands
[5];
8042 rtx tmp2
= operands
[6];
8044 /* Even though we only need one temporary (plus the destination, which
8045 has an early clobber constraint, try to use two temporaries, one for
8046 each double word created. That way the 2nd insn scheduling pass can
8047 rearrange things so the two parts are done in parallel. */
8048 if (BYTES_BIG_ENDIAN
)
8050 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
);
8051 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
+ 1);
8052 rs6000_split_v4si_init_di_reg (di_lo
, scalar1
, scalar2
, tmp1
);
8053 rs6000_split_v4si_init_di_reg (di_hi
, scalar3
, scalar4
, tmp2
);
8057 rtx di_lo
= gen_rtx_REG (DImode
, d_regno
+ 1);
8058 rtx di_hi
= gen_rtx_REG (DImode
, d_regno
);
8059 gcc_assert (!VECTOR_ELT_ORDER_BIG
);
8060 rs6000_split_v4si_init_di_reg (di_lo
, scalar4
, scalar3
, tmp1
);
8061 rs6000_split_v4si_init_di_reg (di_hi
, scalar2
, scalar1
, tmp2
);
8070 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8071 selects whether the alignment is abi mandated, optional, or
8072 both abi and optional alignment. */
8075 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8077 if (how
!= align_opt
)
8079 if (TREE_CODE (type
) == VECTOR_TYPE
)
8081 if (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (TYPE_MODE (type
)))
8086 else if (align
< 128)
8091 if (how
!= align_abi
)
8093 if (TREE_CODE (type
) == ARRAY_TYPE
8094 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8096 if (align
< BITS_PER_WORD
)
8097 align
= BITS_PER_WORD
;
8104 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8107 rs6000_special_adjust_field_align_p (tree type
, unsigned int computed
)
8109 if (TARGET_ALTIVEC
&& TREE_CODE (type
) == VECTOR_TYPE
)
8111 if (computed
!= 128)
8114 if (!warned
&& warn_psabi
)
8117 inform (input_location
,
8118 "the layout of aggregates containing vectors with"
8119 " %d-byte alignment has changed in GCC 5",
8120 computed
/ BITS_PER_UNIT
);
8123 /* In current GCC there is no special case. */
8130 /* AIX increases natural record alignment to doubleword if the first
8131 field is an FP double while the FP fields remain word aligned. */
8134 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8135 unsigned int specified
)
8137 unsigned int align
= MAX (computed
, specified
);
8138 tree field
= TYPE_FIELDS (type
);
8140 /* Skip all non field decls */
8141 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8142 field
= DECL_CHAIN (field
);
8144 if (field
!= NULL
&& field
!= type
)
8146 type
= TREE_TYPE (field
);
8147 while (TREE_CODE (type
) == ARRAY_TYPE
)
8148 type
= TREE_TYPE (type
);
8150 if (type
!= error_mark_node
&& TYPE_MODE (type
) == DFmode
)
8151 align
= MAX (align
, 64);
8157 /* Darwin increases record alignment to the natural alignment of
8161 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8162 unsigned int specified
)
8164 unsigned int align
= MAX (computed
, specified
);
8166 if (TYPE_PACKED (type
))
8169 /* Find the first field, looking down into aggregates. */
8171 tree field
= TYPE_FIELDS (type
);
8172 /* Skip all non field decls */
8173 while (field
!= NULL
&& TREE_CODE (field
) != FIELD_DECL
)
8174 field
= DECL_CHAIN (field
);
8177 /* A packed field does not contribute any extra alignment. */
8178 if (DECL_PACKED (field
))
8180 type
= TREE_TYPE (field
);
8181 while (TREE_CODE (type
) == ARRAY_TYPE
)
8182 type
= TREE_TYPE (type
);
8183 } while (AGGREGATE_TYPE_P (type
));
8185 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8186 align
= MAX (align
, TYPE_ALIGN (type
));
8191 /* Return 1 for an operand in small memory on V.4/eabi. */
8194 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8195 machine_mode mode ATTRIBUTE_UNUSED
)
8200 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8203 if (DEFAULT_ABI
!= ABI_V4
)
8206 if (GET_CODE (op
) == SYMBOL_REF
)
8209 else if (GET_CODE (op
) != CONST
8210 || GET_CODE (XEXP (op
, 0)) != PLUS
8211 || GET_CODE (XEXP (XEXP (op
, 0), 0)) != SYMBOL_REF
8212 || GET_CODE (XEXP (XEXP (op
, 0), 1)) != CONST_INT
)
8217 rtx sum
= XEXP (op
, 0);
8218 HOST_WIDE_INT summand
;
8220 /* We have to be careful here, because it is the referenced address
8221 that must be 32k from _SDA_BASE_, not just the symbol. */
8222 summand
= INTVAL (XEXP (sum
, 1));
8223 if (summand
< 0 || summand
> g_switch_value
)
8226 sym_ref
= XEXP (sum
, 0);
8229 return SYMBOL_REF_SMALL_P (sym_ref
);
8235 /* Return true if either operand is a general purpose register. */
8238 gpr_or_gpr_p (rtx op0
, rtx op1
)
8240 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8241 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8244 /* Return true if this is a move direct operation between GPR registers and
8245 floating point/VSX registers. */
8248 direct_move_p (rtx op0
, rtx op1
)
8252 if (!REG_P (op0
) || !REG_P (op1
))
8255 if (!TARGET_DIRECT_MOVE
&& !TARGET_MFPGPR
)
8258 regno0
= REGNO (op0
);
8259 regno1
= REGNO (op1
);
8260 if (regno0
>= FIRST_PSEUDO_REGISTER
|| regno1
>= FIRST_PSEUDO_REGISTER
)
8263 if (INT_REGNO_P (regno0
))
8264 return (TARGET_DIRECT_MOVE
) ? VSX_REGNO_P (regno1
) : FP_REGNO_P (regno1
);
8266 else if (INT_REGNO_P (regno1
))
8268 if (TARGET_MFPGPR
&& FP_REGNO_P (regno0
))
8271 else if (TARGET_DIRECT_MOVE
&& VSX_REGNO_P (regno0
))
8278 /* Return true if the OFFSET is valid for the quad address instructions that
8279 use d-form (register + offset) addressing. */
8282 quad_address_offset_p (HOST_WIDE_INT offset
)
8284 return (IN_RANGE (offset
, -32768, 32767) && ((offset
) & 0xf) == 0);
8287 /* Return true if the ADDR is an acceptable address for a quad memory
8288 operation of mode MODE (either LQ/STQ for general purpose registers, or
8289 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8290 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8291 3.0 LXV/STXV instruction. */
8294 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8298 if (GET_MODE_SIZE (mode
) != 16)
8301 if (legitimate_indirect_address_p (addr
, strict
))
8304 if (VECTOR_MODE_P (mode
) && !mode_supports_vsx_dform_quad (mode
))
8307 if (GET_CODE (addr
) != PLUS
)
8310 op0
= XEXP (addr
, 0);
8311 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8314 op1
= XEXP (addr
, 1);
8315 if (!CONST_INT_P (op1
))
8318 return quad_address_offset_p (INTVAL (op1
));
8321 /* Return true if this is a load or store quad operation. This function does
8322 not handle the atomic quad memory instructions. */
8325 quad_load_store_p (rtx op0
, rtx op1
)
8329 if (!TARGET_QUAD_MEMORY
)
8332 else if (REG_P (op0
) && MEM_P (op1
))
8333 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8334 && quad_memory_operand (op1
, GET_MODE (op1
))
8335 && !reg_overlap_mentioned_p (op0
, op1
));
8337 else if (MEM_P (op0
) && REG_P (op1
))
8338 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8339 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8344 if (TARGET_DEBUG_ADDR
)
8346 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8347 ret
? "true" : "false");
8348 debug_rtx (gen_rtx_SET (op0
, op1
));
8354 /* Given an address, return a constant offset term if one exists. */
8357 address_offset (rtx op
)
8359 if (GET_CODE (op
) == PRE_INC
8360 || GET_CODE (op
) == PRE_DEC
)
8362 else if (GET_CODE (op
) == PRE_MODIFY
8363 || GET_CODE (op
) == LO_SUM
)
8366 if (GET_CODE (op
) == CONST
)
8369 if (GET_CODE (op
) == PLUS
)
8372 if (CONST_INT_P (op
))
8378 /* Return true if the MEM operand is a memory operand suitable for use
8379 with a (full width, possibly multiple) gpr load/store. On
8380 powerpc64 this means the offset must be divisible by 4.
8381 Implements 'Y' constraint.
8383 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8384 a constraint function we know the operand has satisfied a suitable
8385 memory predicate. Also accept some odd rtl generated by reload
8386 (see rs6000_legitimize_reload_address for various forms). It is
8387 important that reload rtl be accepted by appropriate constraints
8388 but not by the operand predicate.
8390 Offsetting a lo_sum should not be allowed, except where we know by
8391 alignment that a 32k boundary is not crossed, but see the ???
8392 comment in rs6000_legitimize_reload_address. Note that by
8393 "offsetting" here we mean a further offset to access parts of the
8394 MEM. It's fine to have a lo_sum where the inner address is offset
8395 from a sym, since the same sym+offset will appear in the high part
8396 of the address calculation. */
8399 mem_operand_gpr (rtx op
, machine_mode mode
)
8401 unsigned HOST_WIDE_INT offset
;
8403 rtx addr
= XEXP (op
, 0);
8405 op
= address_offset (addr
);
8409 offset
= INTVAL (op
);
8410 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8413 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8417 if (GET_CODE (addr
) == LO_SUM
)
8418 /* For lo_sum addresses, we must allow any offset except one that
8419 causes a wrap, so test only the low 16 bits. */
8420 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8422 return offset
+ 0x8000 < 0x10000u
- extra
;
8425 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8426 enforce an offset divisible by 4 even for 32-bit. */
8429 mem_operand_ds_form (rtx op
, machine_mode mode
)
8431 unsigned HOST_WIDE_INT offset
;
8433 rtx addr
= XEXP (op
, 0);
8435 if (!offsettable_address_p (false, mode
, addr
))
8438 op
= address_offset (addr
);
8442 offset
= INTVAL (op
);
8443 if ((offset
& 3) != 0)
8446 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8450 if (GET_CODE (addr
) == LO_SUM
)
8451 /* For lo_sum addresses, we must allow any offset except one that
8452 causes a wrap, so test only the low 16 bits. */
8453 offset
= ((offset
& 0xffff) ^ 0x8000) - 0x8000;
8455 return offset
+ 0x8000 < 0x10000u
- extra
;
8458 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8461 reg_offset_addressing_ok_p (machine_mode mode
)
8475 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8476 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8477 a vector mode, if we want to use the VSX registers to move it around,
8478 we need to restrict ourselves to reg+reg addressing. Similarly for
8479 IEEE 128-bit floating point that is passed in a single vector
8481 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8482 return mode_supports_vsx_dform_quad (mode
);
8487 /* Paired vector modes. Only reg+reg addressing is valid. */
8488 if (TARGET_PAIRED_FLOAT
)
8493 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8494 addressing for the LFIWZX and STFIWX instructions. */
8495 if (TARGET_NO_SDMODE_STACK
)
8507 virtual_stack_registers_memory_p (rtx op
)
8511 if (GET_CODE (op
) == REG
)
8512 regnum
= REGNO (op
);
8514 else if (GET_CODE (op
) == PLUS
8515 && GET_CODE (XEXP (op
, 0)) == REG
8516 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
8517 regnum
= REGNO (XEXP (op
, 0));
8522 return (regnum
>= FIRST_VIRTUAL_REGISTER
8523 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8526 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8527 is known to not straddle a 32k boundary. This function is used
8528 to determine whether -mcmodel=medium code can use TOC pointer
8529 relative addressing for OP. This means the alignment of the TOC
8530 pointer must also be taken into account, and unfortunately that is
8533 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8534 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8538 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8542 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8544 if (GET_CODE (op
) != SYMBOL_REF
)
8547 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8549 if (mode_supports_vsx_dform_quad (mode
))
8552 dsize
= GET_MODE_SIZE (mode
);
8553 decl
= SYMBOL_REF_DECL (op
);
8559 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8560 replacing memory addresses with an anchor plus offset. We
8561 could find the decl by rummaging around in the block->objects
8562 VEC for the given offset but that seems like too much work. */
8563 dalign
= BITS_PER_UNIT
;
8564 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8565 && SYMBOL_REF_ANCHOR_P (op
)
8566 && SYMBOL_REF_BLOCK (op
) != NULL
)
8568 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8570 dalign
= block
->alignment
;
8571 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8573 else if (CONSTANT_POOL_ADDRESS_P (op
))
8575 /* It would be nice to have get_pool_align().. */
8576 machine_mode cmode
= get_pool_mode (op
);
8578 dalign
= GET_MODE_ALIGNMENT (cmode
);
8581 else if (DECL_P (decl
))
8583 dalign
= DECL_ALIGN (decl
);
8587 /* Allow BLKmode when the entire object is known to not
8588 cross a 32k boundary. */
8589 if (!DECL_SIZE_UNIT (decl
))
8592 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8595 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8599 dalign
/= BITS_PER_UNIT
;
8600 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8601 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8602 return dalign
>= dsize
;
8608 /* Find how many bits of the alignment we know for this access. */
8609 dalign
/= BITS_PER_UNIT
;
8610 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8611 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8613 lsb
= offset
& -offset
;
8617 return dalign
>= dsize
;
8621 constant_pool_expr_p (rtx op
)
8625 split_const (op
, &base
, &offset
);
8626 return (GET_CODE (base
) == SYMBOL_REF
8627 && CONSTANT_POOL_ADDRESS_P (base
)
8628 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8631 /* These are only used to pass through from print_operand/print_operand_address
8632 to rs6000_output_addr_const_extra over the intervening function
8633 output_addr_const which is not target code. */
8634 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8636 /* Return true if OP is a toc pointer relative address (the output
8637 of create_TOC_reference). If STRICT, do not match non-split
8638 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8639 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8640 TOCREL_OFFSET_RET respectively. */
8643 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8644 const_rtx
*tocrel_offset_ret
)
8649 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8651 /* When strict ensure we have everything tidy. */
8653 && !(GET_CODE (op
) == LO_SUM
8654 && REG_P (XEXP (op
, 0))
8655 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8658 /* When not strict, allow non-split TOC addresses and also allow
8659 (lo_sum (high ..)) TOC addresses created during reload. */
8660 if (GET_CODE (op
) == LO_SUM
)
8664 const_rtx tocrel_base
= op
;
8665 const_rtx tocrel_offset
= const0_rtx
;
8667 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8669 tocrel_base
= XEXP (op
, 0);
8670 tocrel_offset
= XEXP (op
, 1);
8673 if (tocrel_base_ret
)
8674 *tocrel_base_ret
= tocrel_base
;
8675 if (tocrel_offset_ret
)
8676 *tocrel_offset_ret
= tocrel_offset
;
8678 return (GET_CODE (tocrel_base
) == UNSPEC
8679 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
);
8682 /* Return true if X is a constant pool address, and also for cmodel=medium
8683 if X is a toc-relative address known to be offsettable within MODE. */
8686 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8689 const_rtx tocrel_base
, tocrel_offset
;
8690 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8691 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8692 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8694 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8695 INTVAL (tocrel_offset
), mode
)));
8699 legitimate_small_data_p (machine_mode mode
, rtx x
)
8701 return (DEFAULT_ABI
== ABI_V4
8702 && !flag_pic
&& !TARGET_TOC
8703 && (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
8704 && small_data_operand (x
, mode
));
8708 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8709 bool strict
, bool worst_case
)
8711 unsigned HOST_WIDE_INT offset
;
8714 if (GET_CODE (x
) != PLUS
)
8716 if (!REG_P (XEXP (x
, 0)))
8718 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8720 if (mode_supports_vsx_dform_quad (mode
))
8721 return quad_address_p (x
, mode
, strict
);
8722 if (!reg_offset_addressing_ok_p (mode
))
8723 return virtual_stack_registers_memory_p (x
);
8724 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8726 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
8729 offset
= INTVAL (XEXP (x
, 1));
8735 /* Paired single modes: offset addressing isn't valid. */
8741 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8743 if (VECTOR_MEM_VSX_P (mode
))
8748 if (!TARGET_POWERPC64
)
8750 else if (offset
& 3)
8763 if (!TARGET_POWERPC64
)
8765 else if (offset
& 3)
8774 return offset
< 0x10000 - extra
;
8778 legitimate_indexed_address_p (rtx x
, int strict
)
8782 if (GET_CODE (x
) != PLUS
)
8788 /* Recognize the rtl generated by reload which we know will later be
8789 replaced with proper base and index regs. */
8791 && reload_in_progress
8792 && (REG_P (op0
) || GET_CODE (op0
) == PLUS
)
8796 return (REG_P (op0
) && REG_P (op1
)
8797 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8798 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8799 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8800 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8804 avoiding_indexed_address_p (machine_mode mode
)
8806 /* Avoid indexed addressing for modes that have non-indexed
8807 load/store instruction forms. */
8808 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8812 legitimate_indirect_address_p (rtx x
, int strict
)
8814 return GET_CODE (x
) == REG
&& INT_REG_OK_FOR_BASE_P (x
, strict
);
8818 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
8820 if (!TARGET_MACHO
|| !flag_pic
8821 || mode
!= SImode
|| GET_CODE (x
) != MEM
)
8825 if (GET_CODE (x
) != LO_SUM
)
8827 if (GET_CODE (XEXP (x
, 0)) != REG
)
8829 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
8833 return CONSTANT_P (x
);
8837 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
8839 if (GET_CODE (x
) != LO_SUM
)
8841 if (GET_CODE (XEXP (x
, 0)) != REG
)
8843 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8845 /* quad word addresses are restricted, and we can't use LO_SUM. */
8846 if (mode_supports_vsx_dform_quad (mode
))
8850 if (TARGET_ELF
|| TARGET_MACHO
)
8854 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
8856 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8857 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8858 recognizes some LO_SUM addresses as valid although this
8859 function says opposite. In most cases, LRA through different
8860 transformations can generate correct code for address reloads.
8861 It can not manage only some LO_SUM cases. So we need to add
8862 code analogous to one in rs6000_legitimize_reload_address for
8863 LOW_SUM here saying that some addresses are still valid. */
8864 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
8865 && small_toc_ref (x
, VOIDmode
));
8866 if (TARGET_TOC
&& ! large_toc_ok
)
8868 if (GET_MODE_NUNITS (mode
) != 1)
8870 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
8871 && !(/* ??? Assume floating point reg based on mode? */
8872 TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
8873 && (mode
== DFmode
|| mode
== DDmode
)))
8876 return CONSTANT_P (x
) || large_toc_ok
;
8883 /* Try machine-dependent ways of modifying an illegitimate address
8884 to be legitimate. If we find one, return the new, valid address.
8885 This is used from only one place: `memory_address' in explow.c.
8887 OLDX is the address as it was before break_out_memory_refs was
8888 called. In some cases it is useful to look at this to decide what
8891 It is always safe for this function to do nothing. It exists to
8892 recognize opportunities to optimize the output.
8894 On RS/6000, first check for the sum of a register with a constant
8895 integer that is out of range. If so, generate code to add the
8896 constant with the low-order 16 bits masked to the register and force
8897 this result into another register (this can be done with `cau').
8898 Then generate an address of REG+(CONST&0xffff), allowing for the
8899 possibility of bit 16 being a one.
8901 Then check for the sum of a register and something not constant, try to
8902 load the other things into a register and return the sum. */
8905 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
8910 if (!reg_offset_addressing_ok_p (mode
)
8911 || mode_supports_vsx_dform_quad (mode
))
8913 if (virtual_stack_registers_memory_p (x
))
8916 /* In theory we should not be seeing addresses of the form reg+0,
8917 but just in case it is generated, optimize it away. */
8918 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
8919 return force_reg (Pmode
, XEXP (x
, 0));
8921 /* For TImode with load/store quad, restrict addresses to just a single
8922 pointer, so it works with both GPRs and VSX registers. */
8923 /* Make sure both operands are registers. */
8924 else if (GET_CODE (x
) == PLUS
8925 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
))
8926 return gen_rtx_PLUS (Pmode
,
8927 force_reg (Pmode
, XEXP (x
, 0)),
8928 force_reg (Pmode
, XEXP (x
, 1)));
8930 return force_reg (Pmode
, x
);
8932 if (GET_CODE (x
) == SYMBOL_REF
)
8934 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
8936 return rs6000_legitimize_tls_address (x
, model
);
8948 /* As in legitimate_offset_address_p we do not assume
8949 worst-case. The mode here is just a hint as to the registers
8950 used. A TImode is usually in gprs, but may actually be in
8951 fprs. Leave worst-case scenario for reload to handle via
8952 insn constraints. PTImode is only GPRs. */
8959 if (GET_CODE (x
) == PLUS
8960 && GET_CODE (XEXP (x
, 0)) == REG
8961 && GET_CODE (XEXP (x
, 1)) == CONST_INT
8962 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
8964 && !PAIRED_VECTOR_MODE (mode
))
8966 HOST_WIDE_INT high_int
, low_int
;
8968 low_int
= ((INTVAL (XEXP (x
, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8969 if (low_int
>= 0x8000 - extra
)
8971 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
8972 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8973 GEN_INT (high_int
)), 0);
8974 return plus_constant (Pmode
, sum
, low_int
);
8976 else if (GET_CODE (x
) == PLUS
8977 && GET_CODE (XEXP (x
, 0)) == REG
8978 && GET_CODE (XEXP (x
, 1)) != CONST_INT
8979 && GET_MODE_NUNITS (mode
) == 1
8980 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
8981 || (/* ??? Assume floating point reg based on mode? */
8982 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
8983 && (mode
== DFmode
|| mode
== DDmode
)))
8984 && !avoiding_indexed_address_p (mode
))
8986 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
8987 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
8989 else if (PAIRED_VECTOR_MODE (mode
))
8993 /* We accept [reg + reg]. */
8995 if (GET_CODE (x
) == PLUS
)
8997 rtx op1
= XEXP (x
, 0);
8998 rtx op2
= XEXP (x
, 1);
9001 op1
= force_reg (Pmode
, op1
);
9002 op2
= force_reg (Pmode
, op2
);
9004 /* We can't always do [reg + reg] for these, because [reg +
9005 reg + offset] is not a legitimate addressing mode. */
9006 y
= gen_rtx_PLUS (Pmode
, op1
, op2
);
9008 if ((GET_MODE_SIZE (mode
) > 8 || mode
== DDmode
) && REG_P (op2
))
9009 return force_reg (Pmode
, y
);
9014 return force_reg (Pmode
, x
);
9016 else if ((TARGET_ELF
9018 || !MACHO_DYNAMIC_NO_PIC_P
9024 && GET_CODE (x
) != CONST_INT
9025 && GET_CODE (x
) != CONST_WIDE_INT
9026 && GET_CODE (x
) != CONST_DOUBLE
9028 && GET_MODE_NUNITS (mode
) == 1
9029 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9030 || (/* ??? Assume floating point reg based on mode? */
9031 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
9032 && (mode
== DFmode
|| mode
== DDmode
))))
9034 rtx reg
= gen_reg_rtx (Pmode
);
9036 emit_insn (gen_elf_high (reg
, x
));
9038 emit_insn (gen_macho_high (reg
, x
));
9039 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9042 && GET_CODE (x
) == SYMBOL_REF
9043 && constant_pool_expr_p (x
)
9044 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9045 return create_TOC_reference (x
, NULL_RTX
);
9050 /* Debug version of rs6000_legitimize_address. */
9052 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9058 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9059 insns
= get_insns ();
9065 "\nrs6000_legitimize_address: mode %s, old code %s, "
9066 "new code %s, modified\n",
9067 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9068 GET_RTX_NAME (GET_CODE (ret
)));
9070 fprintf (stderr
, "Original address:\n");
9073 fprintf (stderr
, "oldx:\n");
9076 fprintf (stderr
, "New address:\n");
9081 fprintf (stderr
, "Insns added:\n");
9082 debug_rtx_list (insns
, 20);
9088 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9089 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9100 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9101 We need to emit DTP-relative relocations. */
9103 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9105 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9110 fputs ("\t.long\t", file
);
9113 fputs (DOUBLE_INT_ASM_OP
, file
);
9118 output_addr_const (file
, x
);
9120 fputs ("@dtprel+0x8000", file
);
9121 else if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
)
9123 switch (SYMBOL_REF_TLS_MODEL (x
))
9127 case TLS_MODEL_LOCAL_EXEC
:
9128 fputs ("@le", file
);
9130 case TLS_MODEL_INITIAL_EXEC
:
9131 fputs ("@ie", file
);
9133 case TLS_MODEL_GLOBAL_DYNAMIC
:
9134 case TLS_MODEL_LOCAL_DYNAMIC
:
9143 /* Return true if X is a symbol that refers to real (rather than emulated)
9147 rs6000_real_tls_symbol_ref_p (rtx x
)
9149 return (GET_CODE (x
) == SYMBOL_REF
9150 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9153 /* In the name of slightly smaller debug output, and to cater to
9154 general assembler lossage, recognize various UNSPEC sequences
9155 and turn them back into a direct symbol reference. */
9158 rs6000_delegitimize_address (rtx orig_x
)
9162 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9168 if (TARGET_CMODEL
!= CMODEL_SMALL
9169 && GET_CODE (y
) == LO_SUM
)
9173 if (GET_CODE (y
) == PLUS
9174 && GET_MODE (y
) == Pmode
9175 && CONST_INT_P (XEXP (y
, 1)))
9177 offset
= XEXP (y
, 1);
9181 if (GET_CODE (y
) == UNSPEC
9182 && XINT (y
, 1) == UNSPEC_TOCREL
)
9184 y
= XVECEXP (y
, 0, 0);
9187 /* Do not associate thread-local symbols with the original
9188 constant pool symbol. */
9190 && GET_CODE (y
) == SYMBOL_REF
9191 && CONSTANT_POOL_ADDRESS_P (y
)
9192 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9196 if (offset
!= NULL_RTX
)
9197 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9198 if (!MEM_P (orig_x
))
9201 return replace_equiv_address_nv (orig_x
, y
);
9205 && GET_CODE (orig_x
) == LO_SUM
9206 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9208 y
= XEXP (XEXP (orig_x
, 1), 0);
9209 if (GET_CODE (y
) == UNSPEC
9210 && XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9211 return XVECEXP (y
, 0, 0);
9217 /* Return true if X shouldn't be emitted into the debug info.
9218 The linker doesn't like .toc section references from
9219 .debug_* sections, so reject .toc section symbols. */
9222 rs6000_const_not_ok_for_debug_p (rtx x
)
9224 if (GET_CODE (x
) == SYMBOL_REF
9225 && CONSTANT_POOL_ADDRESS_P (x
))
9227 rtx c
= get_pool_constant (x
);
9228 machine_mode cmode
= get_pool_mode (x
);
9229 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9237 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9240 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9242 int icode
= INSN_CODE (insn
);
9244 /* Reject creating doloop insns. Combine should not be allowed
9245 to create these for a number of reasons:
9246 1) In a nested loop, if combine creates one of these in an
9247 outer loop and the register allocator happens to allocate ctr
9248 to the outer loop insn, then the inner loop can't use ctr.
9249 Inner loops ought to be more highly optimized.
9250 2) Combine often wants to create one of these from what was
9251 originally a three insn sequence, first combining the three
9252 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9253 allocated ctr, the splitter takes use back to the three insn
9254 sequence. It's better to stop combine at the two insn
9256 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9257 insns, the register allocator sometimes uses floating point
9258 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9259 jump insn and output reloads are not implemented for jumps,
9260 the ctrsi/ctrdi splitters need to handle all possible cases.
9261 That's a pain, and it gets to be seriously difficult when a
9262 splitter that runs after reload needs memory to transfer from
9263 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9264 for the difficult case. It's better to not create problems
9265 in the first place. */
9266 if (icode
!= CODE_FOR_nothing
9267 && (icode
== CODE_FOR_ctrsi_internal1
9268 || icode
== CODE_FOR_ctrdi_internal1
9269 || icode
== CODE_FOR_ctrsi_internal2
9270 || icode
== CODE_FOR_ctrdi_internal2
9271 || icode
== CODE_FOR_ctrsi_internal3
9272 || icode
== CODE_FOR_ctrdi_internal3
9273 || icode
== CODE_FOR_ctrsi_internal4
9274 || icode
== CODE_FOR_ctrdi_internal4
))
9280 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9282 static GTY(()) rtx rs6000_tls_symbol
;
9284 rs6000_tls_get_addr (void)
9286 if (!rs6000_tls_symbol
)
9287 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9289 return rs6000_tls_symbol
;
9292 /* Construct the SYMBOL_REF for TLS GOT references. */
9294 static GTY(()) rtx rs6000_got_symbol
;
9296 rs6000_got_sym (void)
9298 if (!rs6000_got_symbol
)
9300 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9301 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9302 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9305 return rs6000_got_symbol
;
9308 /* AIX Thread-Local Address support. */
9311 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9313 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
, tlsaddr
;
9317 name
= XSTR (addr
, 0);
9318 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9319 or the symbol will be in TLS private data section. */
9320 if (name
[strlen (name
) - 1] != ']'
9321 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr
))
9322 || bss_initializer_p (SYMBOL_REF_DECL (addr
))))
9324 tlsname
= XALLOCAVEC (char, strlen (name
) + 4);
9325 strcpy (tlsname
, name
);
9327 bss_initializer_p (SYMBOL_REF_DECL (addr
)) ? "[UL]" : "[TL]");
9328 tlsaddr
= copy_rtx (addr
);
9329 XSTR (tlsaddr
, 0) = ggc_strdup (tlsname
);
9334 /* Place addr into TOC constant pool. */
9335 sym
= force_const_mem (GET_MODE (tlsaddr
), tlsaddr
);
9337 /* Output the TOC entry and create the MEM referencing the value. */
9338 if (constant_pool_expr_p (XEXP (sym
, 0))
9339 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9341 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9342 mem
= gen_const_mem (Pmode
, tocref
);
9343 set_mem_alias_set (mem
, get_TOC_alias_set ());
9348 /* Use global-dynamic for local-dynamic. */
9349 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9350 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9352 /* Create new TOC reference for @m symbol. */
9353 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9354 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9355 strcpy (tlsname
, "*LCM");
9356 strcat (tlsname
, name
+ 3);
9357 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9358 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9359 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9360 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9361 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9363 rtx modreg
= gen_reg_rtx (Pmode
);
9364 emit_insn (gen_rtx_SET (modreg
, modmem
));
9366 tmpreg
= gen_reg_rtx (Pmode
);
9367 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9369 dest
= gen_reg_rtx (Pmode
);
9371 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9373 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9376 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9377 else if (TARGET_32BIT
)
9379 tlsreg
= gen_reg_rtx (SImode
);
9380 emit_insn (gen_tls_get_tpointer (tlsreg
));
9383 tlsreg
= gen_rtx_REG (DImode
, 13);
9385 /* Load the TOC value into temporary register. */
9386 tmpreg
= gen_reg_rtx (Pmode
);
9387 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9388 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9389 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9391 /* Add TOC symbol value to TLS pointer. */
9392 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9397 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9398 this (thread-local) address. */
9401 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9406 return rs6000_legitimize_tls_address_aix (addr
, model
);
9408 dest
= gen_reg_rtx (Pmode
);
9409 if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 16)
9415 tlsreg
= gen_rtx_REG (Pmode
, 13);
9416 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9420 tlsreg
= gen_rtx_REG (Pmode
, 2);
9421 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9425 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9429 tmp
= gen_reg_rtx (Pmode
);
9432 tlsreg
= gen_rtx_REG (Pmode
, 13);
9433 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9437 tlsreg
= gen_rtx_REG (Pmode
, 2);
9438 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9442 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9444 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9449 rtx r3
, got
, tga
, tmp1
, tmp2
, call_insn
;
9451 /* We currently use relocations like @got@tlsgd for tls, which
9452 means the linker will handle allocation of tls entries, placing
9453 them in the .got section. So use a pointer to the .got section,
9454 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9455 or to secondary GOT sections used by 32-bit -fPIC. */
9457 got
= gen_rtx_REG (Pmode
, 2);
9461 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9464 rtx gsym
= rs6000_got_sym ();
9465 got
= gen_reg_rtx (Pmode
);
9467 rs6000_emit_move (got
, gsym
, Pmode
);
9472 tmp1
= gen_reg_rtx (Pmode
);
9473 tmp2
= gen_reg_rtx (Pmode
);
9474 mem
= gen_const_mem (Pmode
, tmp1
);
9475 lab
= gen_label_rtx ();
9476 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9477 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9478 if (TARGET_LINK_STACK
)
9479 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9480 emit_move_insn (tmp2
, mem
);
9481 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9482 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9487 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9489 tga
= rs6000_tls_get_addr ();
9490 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
,
9491 1, const0_rtx
, Pmode
);
9493 r3
= gen_rtx_REG (Pmode
, 3);
9494 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9497 insn
= gen_tls_gd_aix64 (r3
, got
, addr
, tga
, const0_rtx
);
9499 insn
= gen_tls_gd_aix32 (r3
, got
, addr
, tga
, const0_rtx
);
9501 else if (DEFAULT_ABI
== ABI_V4
)
9502 insn
= gen_tls_gd_sysvsi (r3
, got
, addr
, tga
, const0_rtx
);
9505 call_insn
= last_call_insn ();
9506 PATTERN (call_insn
) = insn
;
9507 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9508 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9509 pic_offset_table_rtx
);
9511 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9513 tga
= rs6000_tls_get_addr ();
9514 tmp1
= gen_reg_rtx (Pmode
);
9515 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
,
9516 1, const0_rtx
, Pmode
);
9518 r3
= gen_rtx_REG (Pmode
, 3);
9519 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
9522 insn
= gen_tls_ld_aix64 (r3
, got
, tga
, const0_rtx
);
9524 insn
= gen_tls_ld_aix32 (r3
, got
, tga
, const0_rtx
);
9526 else if (DEFAULT_ABI
== ABI_V4
)
9527 insn
= gen_tls_ld_sysvsi (r3
, got
, tga
, const0_rtx
);
9530 call_insn
= last_call_insn ();
9531 PATTERN (call_insn
) = insn
;
9532 if (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
9533 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
),
9534 pic_offset_table_rtx
);
9536 if (rs6000_tls_size
== 16)
9539 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9541 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9543 else if (rs6000_tls_size
== 32)
9545 tmp2
= gen_reg_rtx (Pmode
);
9547 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9549 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9552 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9554 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9558 tmp2
= gen_reg_rtx (Pmode
);
9560 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9562 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9564 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9570 /* IE, or 64-bit offset LE. */
9571 tmp2
= gen_reg_rtx (Pmode
);
9573 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9575 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9578 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9580 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9588 /* Only create the global variable for the stack protect guard if we are using
9589 the global flavor of that guard. */
9591 rs6000_init_stack_protect_guard (void)
9593 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9594 return default_stack_protect_guard ();
9599 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9602 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9604 if (GET_CODE (x
) == HIGH
9605 && GET_CODE (XEXP (x
, 0)) == UNSPEC
)
9608 /* A TLS symbol in the TOC cannot contain a sum. */
9609 if (GET_CODE (x
) == CONST
9610 && GET_CODE (XEXP (x
, 0)) == PLUS
9611 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
9612 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9615 /* Do not place an ELF TLS symbol in the constant pool. */
9616 return TARGET_ELF
&& tls_referenced_p (x
);
9619 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9620 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9621 can be addressed relative to the toc pointer. */
9624 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9626 return ((constant_pool_expr_p (sym
)
9627 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9628 get_pool_mode (sym
)))
9629 || (TARGET_CMODEL
== CMODEL_MEDIUM
9630 && SYMBOL_REF_LOCAL_P (sym
)
9631 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9634 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9635 replace the input X, or the original X if no replacement is called for.
9636 The output parameter *WIN is 1 if the calling macro should goto WIN,
9639 For RS/6000, we wish to handle large displacements off a base
9640 register by splitting the addend across an addiu/addis and the mem insn.
9641 This cuts number of extra insns needed from 3 to 1.
9643 On Darwin, we use this to generate code for floating point constants.
9644 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9645 The Darwin code is inside #if TARGET_MACHO because only then are the
9646 machopic_* functions defined. */
9648 rs6000_legitimize_reload_address (rtx x
, machine_mode mode
,
9649 int opnum
, int type
,
9650 int ind_levels ATTRIBUTE_UNUSED
, int *win
)
9652 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9653 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
9655 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9656 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9659 && ((mode
== DFmode
&& recog_data
.operand_mode
[0] == V2DFmode
)
9660 || (mode
== DImode
&& recog_data
.operand_mode
[0] == V2DImode
)
9661 || (mode
== SFmode
&& recog_data
.operand_mode
[0] == V4SFmode
9662 && TARGET_P9_VECTOR
)
9663 || (mode
== SImode
&& recog_data
.operand_mode
[0] == V4SImode
9664 && TARGET_P9_VECTOR
)))
9665 reg_offset_p
= false;
9667 /* We must recognize output that we have already generated ourselves. */
9668 if (GET_CODE (x
) == PLUS
9669 && GET_CODE (XEXP (x
, 0)) == PLUS
9670 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9671 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9672 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
9674 if (TARGET_DEBUG_ADDR
)
9676 fprintf (stderr
, "\nlegitimize_reload_address push_reload #1:\n");
9679 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9680 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9681 opnum
, (enum reload_type
) type
);
9686 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9687 if (GET_CODE (x
) == LO_SUM
9688 && GET_CODE (XEXP (x
, 0)) == HIGH
)
9690 if (TARGET_DEBUG_ADDR
)
9692 fprintf (stderr
, "\nlegitimize_reload_address push_reload #2:\n");
9695 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9696 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9697 opnum
, (enum reload_type
) type
);
9703 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
9704 && GET_CODE (x
) == LO_SUM
9705 && GET_CODE (XEXP (x
, 0)) == PLUS
9706 && XEXP (XEXP (x
, 0), 0) == pic_offset_table_rtx
9707 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == HIGH
9708 && XEXP (XEXP (XEXP (x
, 0), 1), 0) == XEXP (x
, 1)
9709 && machopic_operand_p (XEXP (x
, 1)))
9711 /* Result of previous invocation of this function on Darwin
9712 floating point constant. */
9713 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9714 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9715 opnum
, (enum reload_type
) type
);
9721 if (TARGET_CMODEL
!= CMODEL_SMALL
9724 && small_toc_ref (x
, VOIDmode
))
9726 rtx hi
= gen_rtx_HIGH (Pmode
, copy_rtx (x
));
9727 x
= gen_rtx_LO_SUM (Pmode
, hi
, x
);
9728 if (TARGET_DEBUG_ADDR
)
9730 fprintf (stderr
, "\nlegitimize_reload_address push_reload #3:\n");
9733 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9734 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9735 opnum
, (enum reload_type
) type
);
9740 if (GET_CODE (x
) == PLUS
9741 && REG_P (XEXP (x
, 0))
9742 && REGNO (XEXP (x
, 0)) < FIRST_PSEUDO_REGISTER
9743 && INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 1)
9744 && CONST_INT_P (XEXP (x
, 1))
9746 && !PAIRED_VECTOR_MODE (mode
)
9747 && (quad_offset_p
|| !VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
)))
9749 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
9750 HOST_WIDE_INT low
= ((val
& 0xffff) ^ 0x8000) - 0x8000;
9752 = (((val
- low
) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9754 /* Check for 32-bit overflow or quad addresses with one of the
9755 four least significant bits set. */
9756 if (high
+ low
!= val
9757 || (quad_offset_p
&& (low
& 0xf)))
9763 /* Reload the high part into a base reg; leave the low part
9764 in the mem directly. */
9766 x
= gen_rtx_PLUS (GET_MODE (x
),
9767 gen_rtx_PLUS (GET_MODE (x
), XEXP (x
, 0),
9771 if (TARGET_DEBUG_ADDR
)
9773 fprintf (stderr
, "\nlegitimize_reload_address push_reload #4:\n");
9776 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9777 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
9778 opnum
, (enum reload_type
) type
);
9783 if (GET_CODE (x
) == SYMBOL_REF
9786 && (!VECTOR_MODE_P (mode
) || VECTOR_MEM_NONE_P (mode
))
9787 && !PAIRED_VECTOR_MODE (mode
)
9789 && DEFAULT_ABI
== ABI_DARWIN
9790 && (flag_pic
|| MACHO_DYNAMIC_NO_PIC_P
)
9791 && machopic_symbol_defined_p (x
)
9793 && DEFAULT_ABI
== ABI_V4
9796 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9797 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9799 ??? Assume floating point reg based on mode? This assumption is
9800 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9801 where reload ends up doing a DFmode load of a constant from
9802 mem using two gprs. Unfortunately, at this point reload
9803 hasn't yet selected regs so poking around in reload data
9804 won't help and even if we could figure out the regs reliably,
9805 we'd still want to allow this transformation when the mem is
9806 naturally aligned. Since we say the address is good here, we
9807 can't disable offsets from LO_SUMs in mem_operand_gpr.
9808 FIXME: Allow offset from lo_sum for other modes too, when
9809 mem is sufficiently aligned.
9811 Also disallow this if the type can go in VMX/Altivec registers, since
9812 those registers do not have d-form (reg+offset) address modes. */
9813 && !reg_addr
[mode
].scalar_in_vmx_p
9818 && (mode
!= TImode
|| !TARGET_VSX_TIMODE
)
9820 && (mode
!= DImode
|| TARGET_POWERPC64
)
9821 && ((mode
!= DFmode
&& mode
!= DDmode
) || TARGET_POWERPC64
9822 || (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)))
9827 rtx offset
= machopic_gen_offset (x
);
9828 x
= gen_rtx_LO_SUM (GET_MODE (x
),
9829 gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
9830 gen_rtx_HIGH (Pmode
, offset
)), offset
);
9834 x
= gen_rtx_LO_SUM (GET_MODE (x
),
9835 gen_rtx_HIGH (Pmode
, x
), x
);
9837 if (TARGET_DEBUG_ADDR
)
9839 fprintf (stderr
, "\nlegitimize_reload_address push_reload #5:\n");
9842 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9843 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9844 opnum
, (enum reload_type
) type
);
9849 /* Reload an offset address wrapped by an AND that represents the
9850 masking of the lower bits. Strip the outer AND and let reload
9851 convert the offset address into an indirect address. For VSX,
9852 force reload to create the address with an AND in a separate
9853 register, because we can't guarantee an altivec register will
9855 if (VECTOR_MEM_ALTIVEC_P (mode
)
9856 && GET_CODE (x
) == AND
9857 && GET_CODE (XEXP (x
, 0)) == PLUS
9858 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
9859 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
9860 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9861 && INTVAL (XEXP (x
, 1)) == -16)
9871 && GET_CODE (x
) == SYMBOL_REF
9872 && use_toc_relative_ref (x
, mode
))
9874 x
= create_TOC_reference (x
, NULL_RTX
);
9875 if (TARGET_CMODEL
!= CMODEL_SMALL
)
9877 if (TARGET_DEBUG_ADDR
)
9879 fprintf (stderr
, "\nlegitimize_reload_address push_reload #6:\n");
9882 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
9883 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
9884 opnum
, (enum reload_type
) type
);
9893 /* Debug version of rs6000_legitimize_reload_address. */
9895 rs6000_debug_legitimize_reload_address (rtx x
, machine_mode mode
,
9896 int opnum
, int type
,
9897 int ind_levels
, int *win
)
9899 rtx ret
= rs6000_legitimize_reload_address (x
, mode
, opnum
, type
,
9902 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9903 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9904 GET_MODE_NAME (mode
), opnum
, type
, ind_levels
, *win
);
9908 fprintf (stderr
, "Same address returned\n");
9910 fprintf (stderr
, "NULL returned\n");
9913 fprintf (stderr
, "New address:\n");
9920 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9921 that is a valid memory address for an instruction.
9922 The MODE argument is the machine mode for the MEM expression
9923 that wants to use this address.
9925 On the RS/6000, there are four valid address: a SYMBOL_REF that
9926 refers to a constant pool entry of an address (or the sum of it
9927 plus a constant), a short (16-bit signed) constant plus a register,
9928 the sum of two registers, or a register indirect, possibly with an
9929 auto-increment. For DFmode, DDmode and DImode with a constant plus
9930 register, we must ensure that both words are addressable or PowerPC64
9931 with offset word aligned.
9933 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9934 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9935 because adjacent memory cells are accessed by adding word-sized offsets
9936 during assembly output. */
9938 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9940 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9941 bool quad_offset_p
= mode_supports_vsx_dform_quad (mode
);
9943 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9944 if (VECTOR_MEM_ALTIVEC_P (mode
)
9945 && GET_CODE (x
) == AND
9946 && GET_CODE (XEXP (x
, 1)) == CONST_INT
9947 && INTVAL (XEXP (x
, 1)) == -16)
9950 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9952 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9955 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9956 && mode_supports_pre_incdec_p (mode
)
9957 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9959 /* Handle restricted vector d-form offsets in ISA 3.0. */
9962 if (quad_address_p (x
, mode
, reg_ok_strict
))
9965 else if (virtual_stack_registers_memory_p (x
))
9968 else if (reg_offset_p
)
9970 if (legitimate_small_data_p (mode
, x
))
9972 if (legitimate_constant_pool_address_p (x
, mode
,
9973 reg_ok_strict
|| lra_in_progress
))
9975 if (reg_addr
[mode
].fused_toc
&& GET_CODE (x
) == UNSPEC
9976 && XINT (x
, 1) == UNSPEC_FUSION_ADDIS
)
9980 /* For TImode, if we have TImode in VSX registers, only allow register
9981 indirect addresses. This will allow the values to go in either GPRs
9982 or VSX registers without reloading. The vector types would tend to
9983 go into VSX registers, so we allow REG+REG, while TImode seems
9984 somewhat split, in that some uses are GPR based, and some VSX based. */
9985 /* FIXME: We could loosen this by changing the following to
9986 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9987 but currently we cannot allow REG+REG addressing for TImode. See
9988 PR72827 for complete details on how this ends up hoodwinking DSE. */
9989 if (mode
== TImode
&& TARGET_VSX_TIMODE
)
9991 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9994 && GET_CODE (x
) == PLUS
9995 && GET_CODE (XEXP (x
, 0)) == REG
9996 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9997 || XEXP (x
, 0) == arg_pointer_rtx
)
9998 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
10000 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
10002 if (!FLOAT128_2REG_P (mode
)
10003 && ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
10004 || TARGET_POWERPC64
10005 || (mode
!= DFmode
&& mode
!= DDmode
))
10006 && (TARGET_POWERPC64
|| mode
!= DImode
)
10007 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
10009 && !avoiding_indexed_address_p (mode
)
10010 && legitimate_indexed_address_p (x
, reg_ok_strict
))
10012 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
10013 && mode_supports_pre_modify_p (mode
)
10014 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
10015 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
10016 reg_ok_strict
, false)
10017 || (!avoiding_indexed_address_p (mode
)
10018 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
10019 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
10021 if (reg_offset_p
&& !quad_offset_p
10022 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
10027 /* Debug version of rs6000_legitimate_address_p. */
10029 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
10030 bool reg_ok_strict
)
10032 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
10034 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10035 "strict = %d, reload = %s, code = %s\n",
10036 ret
? "true" : "false",
10037 GET_MODE_NAME (mode
),
10041 : (reload_in_progress
? "progress" : "before")),
10042 GET_RTX_NAME (GET_CODE (x
)));
10048 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10051 rs6000_mode_dependent_address_p (const_rtx addr
,
10052 addr_space_t as ATTRIBUTE_UNUSED
)
10054 return rs6000_mode_dependent_address_ptr (addr
);
10057 /* Go to LABEL if ADDR (a legitimate address expression)
10058 has an effect that depends on the machine mode it is used for.
10060 On the RS/6000 this is true of all integral offsets (since AltiVec
10061 and VSX modes don't allow them) or is a pre-increment or decrement.
10063 ??? Except that due to conceptual problems in offsettable_address_p
10064 we can't really report the problems of integral offsets. So leave
10065 this assuming that the adjustable offset must be valid for the
10066 sub-words of a TFmode operand, which is what we had before. */
10069 rs6000_mode_dependent_address (const_rtx addr
)
10071 switch (GET_CODE (addr
))
10074 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10075 is considered a legitimate address before reload, so there
10076 are no offset restrictions in that case. Note that this
10077 condition is safe in strict mode because any address involving
10078 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10079 been rejected as illegitimate. */
10080 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10081 && XEXP (addr
, 0) != arg_pointer_rtx
10082 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
)
10084 unsigned HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10085 return val
+ 0x8000 >= 0x10000 - (TARGET_POWERPC64
? 8 : 12);
10090 /* Anything in the constant pool is sufficiently aligned that
10091 all bytes have the same high part address. */
10092 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10094 /* Auto-increment cases are now treated generically in recog.c. */
10096 return TARGET_UPDATE
;
10098 /* AND is only allowed in Altivec loads. */
10109 /* Debug version of rs6000_mode_dependent_address. */
10111 rs6000_debug_mode_dependent_address (const_rtx addr
)
10113 bool ret
= rs6000_mode_dependent_address (addr
);
10115 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10116 ret
? "true" : "false");
10122 /* Implement FIND_BASE_TERM. */
10125 rs6000_find_base_term (rtx op
)
10130 if (GET_CODE (base
) == CONST
)
10131 base
= XEXP (base
, 0);
10132 if (GET_CODE (base
) == PLUS
)
10133 base
= XEXP (base
, 0);
10134 if (GET_CODE (base
) == UNSPEC
)
10135 switch (XINT (base
, 1))
10137 case UNSPEC_TOCREL
:
10138 case UNSPEC_MACHOPIC_OFFSET
:
10139 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10140 for aliasing purposes. */
10141 return XVECEXP (base
, 0, 0);
10147 /* More elaborate version of recog's offsettable_memref_p predicate
10148 that works around the ??? note of rs6000_mode_dependent_address.
10149 In particular it accepts
10151 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10153 in 32-bit mode, that the recog predicate rejects. */
10156 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
)
10163 /* First mimic offsettable_memref_p. */
10164 if (offsettable_address_p (true, GET_MODE (op
), XEXP (op
, 0)))
10167 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10168 the latter predicate knows nothing about the mode of the memory
10169 reference and, therefore, assumes that it is the largest supported
10170 mode (TFmode). As a consequence, legitimate offsettable memory
10171 references are rejected. rs6000_legitimate_offset_address_p contains
10172 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10173 at least with a little bit of help here given that we know the
10174 actual registers used. */
10175 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10176 || GET_MODE_SIZE (reg_mode
) == 4);
10177 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10181 /* Determine the reassociation width to be used in reassociate_bb.
10182 This takes into account how many parallel operations we
10183 can actually do of a given type, and also the latency.
10185 int add/sub 6/cycle
10187 vect add/sub/mul 2/cycle
10188 fp add/sub/mul 2/cycle
10193 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10196 switch (rs6000_cpu
)
10198 case PROCESSOR_POWER8
:
10199 case PROCESSOR_POWER9
:
10200 if (DECIMAL_FLOAT_MODE_P (mode
))
10202 if (VECTOR_MODE_P (mode
))
10204 if (INTEGRAL_MODE_P (mode
))
10205 return opc
== MULT_EXPR
? 4 : 6;
10206 if (FLOAT_MODE_P (mode
))
10215 /* Change register usage conditional on target flags. */
10217 rs6000_conditional_register_usage (void)
10221 if (TARGET_DEBUG_TARGET
)
10222 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10224 /* Set MQ register fixed (already call_used) so that it will not be
10226 fixed_regs
[64] = 1;
10228 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10230 fixed_regs
[13] = call_used_regs
[13]
10231 = call_really_used_regs
[13] = 1;
10233 /* Conditionally disable FPRs. */
10234 if (TARGET_SOFT_FLOAT
)
10235 for (i
= 32; i
< 64; i
++)
10236 fixed_regs
[i
] = call_used_regs
[i
]
10237 = call_really_used_regs
[i
] = 1;
10239 /* The TOC register is not killed across calls in a way that is
10240 visible to the compiler. */
10241 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10242 call_really_used_regs
[2] = 0;
10244 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10245 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10247 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10248 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10249 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10250 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10252 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10253 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10254 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10255 = call_really_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10257 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10258 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10259 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10261 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10263 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10264 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10265 call_really_used_regs
[VRSAVE_REGNO
] = 1;
10268 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10269 global_regs
[VSCR_REGNO
] = 1;
10271 if (TARGET_ALTIVEC_ABI
)
10273 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10274 call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10276 /* AIX reserves VR20:31 in non-extended ABI mode. */
10278 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10279 fixed_regs
[i
] = call_used_regs
[i
] = call_really_used_regs
[i
] = 1;
10284 /* Output insns to set DEST equal to the constant SOURCE as a series of
10285 lis, ori and shl instructions and return TRUE. */
10288 rs6000_emit_set_const (rtx dest
, rtx source
)
10290 machine_mode mode
= GET_MODE (dest
);
10295 gcc_checking_assert (CONST_INT_P (source
));
10296 c
= INTVAL (source
);
10301 emit_insn (gen_rtx_SET (dest
, source
));
10305 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10307 emit_insn (gen_rtx_SET (copy_rtx (temp
),
10308 GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10309 emit_insn (gen_rtx_SET (dest
,
10310 gen_rtx_IOR (SImode
, copy_rtx (temp
),
10311 GEN_INT (c
& 0xffff))));
10315 if (!TARGET_POWERPC64
)
10319 hi
= operand_subword_force (copy_rtx (dest
), WORDS_BIG_ENDIAN
== 0,
10321 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0,
10323 emit_move_insn (hi
, GEN_INT (c
>> 32));
10324 c
= ((c
& 0xffffffff) ^ 0x80000000) - 0x80000000;
10325 emit_move_insn (lo
, GEN_INT (c
));
10328 rs6000_emit_set_long_const (dest
, c
);
10332 gcc_unreachable ();
10335 insn
= get_last_insn ();
10336 set
= single_set (insn
);
10337 if (! CONSTANT_P (SET_SRC (set
)))
10338 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10343 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10344 Output insns to set DEST equal to the constant C as a series of
10345 lis, ori and shl instructions. */
10348 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10351 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10361 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10362 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10363 emit_move_insn (dest
, GEN_INT ((ud1
^ 0x8000) - 0x8000));
10365 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10366 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10368 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10370 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10371 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10373 emit_move_insn (dest
,
10374 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10377 else if (ud3
== 0 && ud4
== 0)
10379 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10381 gcc_assert (ud2
& 0x8000);
10382 emit_move_insn (copy_rtx (temp
),
10383 GEN_INT (((ud2
<< 16) ^ 0x80000000) - 0x80000000));
10385 emit_move_insn (copy_rtx (temp
),
10386 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10388 emit_move_insn (dest
,
10389 gen_rtx_ZERO_EXTEND (DImode
,
10390 gen_lowpart (SImode
,
10391 copy_rtx (temp
))));
10393 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10394 || (ud4
== 0 && ! (ud3
& 0x8000)))
10396 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10398 emit_move_insn (copy_rtx (temp
),
10399 GEN_INT (((ud3
<< 16) ^ 0x80000000) - 0x80000000));
10401 emit_move_insn (copy_rtx (temp
),
10402 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10404 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10405 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10408 emit_move_insn (dest
,
10409 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10414 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10416 emit_move_insn (copy_rtx (temp
),
10417 GEN_INT (((ud4
<< 16) ^ 0x80000000) - 0x80000000));
10419 emit_move_insn (copy_rtx (temp
),
10420 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10423 emit_move_insn (ud2
!= 0 || ud1
!= 0 ? copy_rtx (temp
) : dest
,
10424 gen_rtx_ASHIFT (DImode
, copy_rtx (temp
),
10427 emit_move_insn (ud1
!= 0 ? copy_rtx (temp
) : dest
,
10428 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10429 GEN_INT (ud2
<< 16)));
10431 emit_move_insn (dest
,
10432 gen_rtx_IOR (DImode
, copy_rtx (temp
),
10437 /* Helper for the following. Get rid of [r+r] memory refs
10438 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10441 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10443 if (reload_in_progress
)
10446 if (GET_CODE (operands
[0]) == MEM
10447 && GET_CODE (XEXP (operands
[0], 0)) != REG
10448 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10449 GET_MODE (operands
[0]), false))
10451 = replace_equiv_address (operands
[0],
10452 copy_addr_to_reg (XEXP (operands
[0], 0)));
10454 if (GET_CODE (operands
[1]) == MEM
10455 && GET_CODE (XEXP (operands
[1], 0)) != REG
10456 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10457 GET_MODE (operands
[1]), false))
10459 = replace_equiv_address (operands
[1],
10460 copy_addr_to_reg (XEXP (operands
[1], 0)));
10463 /* Generate a vector of constants to permute MODE for a little-endian
10464 storage operation by swapping the two halves of a vector. */
10466 rs6000_const_vec (machine_mode mode
)
10494 v
= rtvec_alloc (subparts
);
10496 for (i
= 0; i
< subparts
/ 2; ++i
)
10497 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10498 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10499 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10504 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10505 for a VSX load or store operation. */
10507 rs6000_gen_le_vsx_permute (rtx source
, machine_mode mode
)
10509 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10510 128-bit integers if they are allowed in VSX registers. */
10511 if (FLOAT128_VECTOR_P (mode
) || mode
== TImode
|| mode
== V1TImode
)
10512 return gen_rtx_ROTATE (mode
, source
, GEN_INT (64));
10515 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10516 return gen_rtx_VEC_SELECT (mode
, source
, par
);
10520 /* Emit a little-endian load from vector memory location SOURCE to VSX
10521 register DEST in mode MODE. The load is done with two permuting
10522 insn's that represent an lxvd2x and xxpermdi. */
10524 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10526 rtx tmp
, permute_mem
, permute_reg
;
10528 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10530 if (mode
== TImode
|| mode
== V1TImode
)
10533 dest
= gen_lowpart (V2DImode
, dest
);
10534 source
= adjust_address (source
, V2DImode
, 0);
10537 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10538 permute_mem
= rs6000_gen_le_vsx_permute (source
, mode
);
10539 permute_reg
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10540 emit_insn (gen_rtx_SET (tmp
, permute_mem
));
10541 emit_insn (gen_rtx_SET (dest
, permute_reg
));
10544 /* Emit a little-endian store to vector memory location DEST from VSX
10545 register SOURCE in mode MODE. The store is done with two permuting
10546 insn's that represent an xxpermdi and an stxvd2x. */
10548 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10550 rtx tmp
, permute_src
, permute_tmp
;
10552 /* This should never be called during or after reload, because it does
10553 not re-permute the source register. It is intended only for use
10555 gcc_assert (!reload_in_progress
&& !lra_in_progress
&& !reload_completed
);
10557 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10559 if (mode
== TImode
|| mode
== V1TImode
)
10562 dest
= adjust_address (dest
, V2DImode
, 0);
10563 source
= gen_lowpart (V2DImode
, source
);
10566 tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source
) : source
;
10567 permute_src
= rs6000_gen_le_vsx_permute (source
, mode
);
10568 permute_tmp
= rs6000_gen_le_vsx_permute (tmp
, mode
);
10569 emit_insn (gen_rtx_SET (tmp
, permute_src
));
10570 emit_insn (gen_rtx_SET (dest
, permute_tmp
));
10573 /* Emit a sequence representing a little-endian VSX load or store,
10574 moving data from SOURCE to DEST in mode MODE. This is done
10575 separately from rs6000_emit_move to ensure it is called only
10576 during expand. LE VSX loads and stores introduced later are
10577 handled with a split. The expand-time RTL generation allows
10578 us to optimize away redundant pairs of register-permutes. */
10580 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10582 gcc_assert (!BYTES_BIG_ENDIAN
10583 && VECTOR_MEM_VSX_P (mode
)
10584 && !TARGET_P9_VECTOR
10585 && !gpr_or_gpr_p (dest
, source
)
10586 && (MEM_P (source
) ^ MEM_P (dest
)));
10588 if (MEM_P (source
))
10590 gcc_assert (REG_P (dest
) || GET_CODE (dest
) == SUBREG
);
10591 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10595 if (!REG_P (source
))
10596 source
= force_reg (mode
, source
);
10597 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10601 /* Return whether a SFmode or SImode move can be done without converting one
10602 mode to another. This arrises when we have:
10604 (SUBREG:SF (REG:SI ...))
10605 (SUBREG:SI (REG:SF ...))
10607 and one of the values is in a floating point/vector register, where SFmode
10608 scalars are stored in DFmode format. */
10611 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10613 if (TARGET_ALLOW_SF_SUBREG
)
10616 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10619 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10622 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10623 if (SUBREG_P (dest
))
10625 rtx dest_subreg
= SUBREG_REG (dest
);
10626 rtx src_subreg
= SUBREG_REG (src
);
10627 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10634 /* Helper function to change moves with:
10636 (SUBREG:SF (REG:SI)) and
10637 (SUBREG:SI (REG:SF))
10639 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10640 values are stored as DFmode values in the VSX registers. We need to convert
10641 the bits before we can use a direct move or operate on the bits in the
10642 vector register as an integer type.
10644 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10647 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10649 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_in_progress
&& !reload_completed
10650 && !lra_in_progress
10651 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10652 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10654 rtx inner_source
= SUBREG_REG (source
);
10655 machine_mode inner_mode
= GET_MODE (inner_source
);
10657 if (mode
== SImode
&& inner_mode
== SFmode
)
10659 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10663 if (mode
== SFmode
&& inner_mode
== SImode
)
10665 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10673 /* Emit a move from SOURCE to DEST in mode MODE. */
10675 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10678 operands
[0] = dest
;
10679 operands
[1] = source
;
10681 if (TARGET_DEBUG_ADDR
)
10684 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10685 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10686 GET_MODE_NAME (mode
),
10687 reload_in_progress
,
10689 can_create_pseudo_p ());
10691 fprintf (stderr
, "source:\n");
10692 debug_rtx (source
);
10695 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10696 if (CONST_WIDE_INT_P (operands
[1])
10697 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10699 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10700 gcc_unreachable ();
10703 /* See if we need to special case SImode/SFmode SUBREG moves. */
10704 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10705 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10708 /* Check if GCC is setting up a block move that will end up using FP
10709 registers as temporaries. We must make sure this is acceptable. */
10710 if (GET_CODE (operands
[0]) == MEM
10711 && GET_CODE (operands
[1]) == MEM
10713 && (SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[0]))
10714 || SLOW_UNALIGNED_ACCESS (DImode
, MEM_ALIGN (operands
[1])))
10715 && ! (SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[0]) > 32
10716 ? 32 : MEM_ALIGN (operands
[0])))
10717 || SLOW_UNALIGNED_ACCESS (SImode
, (MEM_ALIGN (operands
[1]) > 32
10719 : MEM_ALIGN (operands
[1]))))
10720 && ! MEM_VOLATILE_P (operands
[0])
10721 && ! MEM_VOLATILE_P (operands
[1]))
10723 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10724 adjust_address (operands
[1], SImode
, 0));
10725 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10726 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10730 if (can_create_pseudo_p () && GET_CODE (operands
[0]) == MEM
10731 && !gpc_reg_operand (operands
[1], mode
))
10732 operands
[1] = force_reg (mode
, operands
[1]);
10734 /* Recognize the case where operand[1] is a reference to thread-local
10735 data and load its address to a register. */
10736 if (tls_referenced_p (operands
[1]))
10738 enum tls_model model
;
10739 rtx tmp
= operands
[1];
10742 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10744 addend
= XEXP (XEXP (tmp
, 0), 1);
10745 tmp
= XEXP (XEXP (tmp
, 0), 0);
10748 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
10749 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10750 gcc_assert (model
!= 0);
10752 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10755 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10756 tmp
= force_operand (tmp
, operands
[0]);
10761 /* Handle the case where reload calls us with an invalid address. */
10762 if (reload_in_progress
&& mode
== Pmode
10763 && (! general_operand (operands
[1], mode
)
10764 || ! nonimmediate_operand (operands
[0], mode
)))
10767 /* 128-bit constant floating-point values on Darwin should really be loaded
10768 as two parts. However, this premature splitting is a problem when DFmode
10769 values can go into Altivec registers. */
10770 if (FLOAT128_IBM_P (mode
) && !reg_addr
[DFmode
].scalar_in_vmx_p
10771 && GET_CODE (operands
[1]) == CONST_DOUBLE
)
10773 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10774 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10776 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10777 GET_MODE_SIZE (DFmode
)),
10778 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10779 GET_MODE_SIZE (DFmode
)),
10784 if (reload_in_progress
&& cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
10785 cfun
->machine
->sdmode_stack_slot
=
10786 eliminate_regs (cfun
->machine
->sdmode_stack_slot
, VOIDmode
, NULL_RTX
);
10789 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10790 p1:SD) if p1 is not of floating point class and p0 is spilled as
10791 we can have no analogous movsd_store for this. */
10792 if (lra_in_progress
&& mode
== DDmode
10793 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
10794 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10795 && GET_CODE (operands
[1]) == SUBREG
&& REG_P (SUBREG_REG (operands
[1]))
10796 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10799 int regno
= REGNO (SUBREG_REG (operands
[1]));
10801 if (regno
>= FIRST_PSEUDO_REGISTER
)
10803 cl
= reg_preferred_class (regno
);
10804 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10806 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10809 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10810 operands
[1] = SUBREG_REG (operands
[1]);
10813 if (lra_in_progress
10815 && REG_P (operands
[0]) && REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
10816 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10817 && (REG_P (operands
[1])
10818 || (GET_CODE (operands
[1]) == SUBREG
10819 && REG_P (SUBREG_REG (operands
[1])))))
10821 int regno
= REGNO (GET_CODE (operands
[1]) == SUBREG
10822 ? SUBREG_REG (operands
[1]) : operands
[1]);
10825 if (regno
>= FIRST_PSEUDO_REGISTER
)
10827 cl
= reg_preferred_class (regno
);
10828 gcc_assert (cl
!= NO_REGS
);
10829 regno
= ira_class_hard_regs
[cl
][0];
10831 if (FP_REGNO_P (regno
))
10833 if (GET_MODE (operands
[0]) != DDmode
)
10834 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10835 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10837 else if (INT_REGNO_P (regno
))
10838 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10843 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10844 p:DD)) if p0 is not of floating point class and p1 is spilled as
10845 we can have no analogous movsd_load for this. */
10846 if (lra_in_progress
&& mode
== DDmode
10847 && GET_CODE (operands
[0]) == SUBREG
&& REG_P (SUBREG_REG (operands
[0]))
10848 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10849 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
10850 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10853 int regno
= REGNO (SUBREG_REG (operands
[0]));
10855 if (regno
>= FIRST_PSEUDO_REGISTER
)
10857 cl
= reg_preferred_class (regno
);
10858 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10860 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10863 operands
[0] = SUBREG_REG (operands
[0]);
10864 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10867 if (lra_in_progress
10869 && (REG_P (operands
[0])
10870 || (GET_CODE (operands
[0]) == SUBREG
10871 && REG_P (SUBREG_REG (operands
[0]))))
10872 && REG_P (operands
[1]) && REGNO (operands
[1]) >= FIRST_PSEUDO_REGISTER
10873 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10875 int regno
= REGNO (GET_CODE (operands
[0]) == SUBREG
10876 ? SUBREG_REG (operands
[0]) : operands
[0]);
10879 if (regno
>= FIRST_PSEUDO_REGISTER
)
10881 cl
= reg_preferred_class (regno
);
10882 gcc_assert (cl
!= NO_REGS
);
10883 regno
= ira_class_hard_regs
[cl
][0];
10885 if (FP_REGNO_P (regno
))
10887 if (GET_MODE (operands
[1]) != DDmode
)
10888 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10889 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10891 else if (INT_REGNO_P (regno
))
10892 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10898 if (reload_in_progress
10900 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
10901 && MEM_P (operands
[0])
10902 && rtx_equal_p (operands
[0], cfun
->machine
->sdmode_stack_slot
)
10903 && REG_P (operands
[1]))
10905 if (FP_REGNO_P (REGNO (operands
[1])))
10907 rtx mem
= adjust_address_nv (operands
[0], DDmode
, 0);
10908 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
10909 emit_insn (gen_movsd_store (mem
, operands
[1]));
10911 else if (INT_REGNO_P (REGNO (operands
[1])))
10913 rtx mem
= operands
[0];
10914 if (BYTES_BIG_ENDIAN
)
10915 mem
= adjust_address_nv (mem
, mode
, 4);
10916 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
10917 emit_insn (gen_movsd_hardfloat (mem
, operands
[1]));
10923 if (reload_in_progress
10925 && REG_P (operands
[0])
10926 && MEM_P (operands
[1])
10927 && cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
10928 && rtx_equal_p (operands
[1], cfun
->machine
->sdmode_stack_slot
))
10930 if (FP_REGNO_P (REGNO (operands
[0])))
10932 rtx mem
= adjust_address_nv (operands
[1], DDmode
, 0);
10933 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
10934 emit_insn (gen_movsd_load (operands
[0], mem
));
10936 else if (INT_REGNO_P (REGNO (operands
[0])))
10938 rtx mem
= operands
[1];
10939 if (BYTES_BIG_ENDIAN
)
10940 mem
= adjust_address_nv (mem
, mode
, 4);
10941 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
10942 emit_insn (gen_movsd_hardfloat (operands
[0], mem
));
10949 /* FIXME: In the long term, this switch statement should go away
10950 and be replaced by a sequence of tests based on things like
10956 if (CONSTANT_P (operands
[1])
10957 && GET_CODE (operands
[1]) != CONST_INT
)
10958 operands
[1] = force_const_mem (mode
, operands
[1]);
10965 if (FLOAT128_2REG_P (mode
))
10966 rs6000_eliminate_indexed_memrefs (operands
);
10973 if (CONSTANT_P (operands
[1])
10974 && ! easy_fp_constant (operands
[1], mode
))
10975 operands
[1] = force_const_mem (mode
, operands
[1]);
10987 if (CONSTANT_P (operands
[1])
10988 && !easy_vector_constant (operands
[1], mode
))
10989 operands
[1] = force_const_mem (mode
, operands
[1]);
10994 /* Use default pattern for address of ELF small data */
10997 && DEFAULT_ABI
== ABI_V4
10998 && (GET_CODE (operands
[1]) == SYMBOL_REF
10999 || GET_CODE (operands
[1]) == CONST
)
11000 && small_data_operand (operands
[1], mode
))
11002 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11006 if (DEFAULT_ABI
== ABI_V4
11007 && mode
== Pmode
&& mode
== SImode
11008 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11010 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11014 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11018 && CONSTANT_P (operands
[1])
11019 && GET_CODE (operands
[1]) != HIGH
11020 && GET_CODE (operands
[1]) != CONST_INT
)
11022 rtx target
= (!can_create_pseudo_p ()
11024 : gen_reg_rtx (mode
));
11026 /* If this is a function address on -mcall-aixdesc,
11027 convert it to the address of the descriptor. */
11028 if (DEFAULT_ABI
== ABI_AIX
11029 && GET_CODE (operands
[1]) == SYMBOL_REF
11030 && XSTR (operands
[1], 0)[0] == '.')
11032 const char *name
= XSTR (operands
[1], 0);
11034 while (*name
== '.')
11036 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11037 CONSTANT_POOL_ADDRESS_P (new_ref
)
11038 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11039 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11040 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11041 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11042 operands
[1] = new_ref
;
11045 if (DEFAULT_ABI
== ABI_DARWIN
)
11048 if (MACHO_DYNAMIC_NO_PIC_P
)
11050 /* Take care of any required data indirection. */
11051 operands
[1] = rs6000_machopic_legitimize_pic_address (
11052 operands
[1], mode
, operands
[0]);
11053 if (operands
[0] != operands
[1])
11054 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11058 emit_insn (gen_macho_high (target
, operands
[1]));
11059 emit_insn (gen_macho_low (operands
[0], target
, operands
[1]));
11063 emit_insn (gen_elf_high (target
, operands
[1]));
11064 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11068 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11069 and we have put it in the TOC, we just need to make a TOC-relative
11070 reference to it. */
11072 && GET_CODE (operands
[1]) == SYMBOL_REF
11073 && use_toc_relative_ref (operands
[1], mode
))
11074 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11075 else if (mode
== Pmode
11076 && CONSTANT_P (operands
[1])
11077 && GET_CODE (operands
[1]) != HIGH
11078 && ((GET_CODE (operands
[1]) != CONST_INT
11079 && ! easy_fp_constant (operands
[1], mode
))
11080 || (GET_CODE (operands
[1]) == CONST_INT
11081 && (num_insns_constant (operands
[1], mode
)
11082 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11083 || (GET_CODE (operands
[0]) == REG
11084 && FP_REGNO_P (REGNO (operands
[0]))))
11085 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11086 && (TARGET_CMODEL
== CMODEL_SMALL
11087 || can_create_pseudo_p ()
11088 || (REG_P (operands
[0])
11089 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11093 /* Darwin uses a special PIC legitimizer. */
11094 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11097 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11099 if (operands
[0] != operands
[1])
11100 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11105 /* If we are to limit the number of things we put in the TOC and
11106 this is a symbol plus a constant we can add in one insn,
11107 just put the symbol in the TOC and add the constant. Don't do
11108 this if reload is in progress. */
11109 if (GET_CODE (operands
[1]) == CONST
11110 && TARGET_NO_SUM_IN_TOC
&& ! reload_in_progress
11111 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11112 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11113 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11114 || GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == SYMBOL_REF
)
11115 && ! side_effects_p (operands
[0]))
11118 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11119 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11121 sym
= force_reg (mode
, sym
);
11122 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11126 operands
[1] = force_const_mem (mode
, operands
[1]);
11129 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
11130 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11132 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11134 operands
[1] = gen_const_mem (mode
, tocref
);
11135 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11141 if (!VECTOR_MEM_VSX_P (TImode
))
11142 rs6000_eliminate_indexed_memrefs (operands
);
11146 rs6000_eliminate_indexed_memrefs (operands
);
11150 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11153 /* Above, we may have called force_const_mem which may have returned
11154 an invalid address. If we can, fix this up; otherwise, reload will
11155 have to deal with it. */
11156 if (GET_CODE (operands
[1]) == MEM
&& ! reload_in_progress
)
11157 operands
[1] = validize_mem (operands
[1]);
11160 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11163 /* Nonzero if we can use a floating-point register to pass this arg. */
11164 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11165 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11166 && (CUM)->fregno <= FP_ARG_MAX_REG \
11167 && TARGET_HARD_FLOAT)
11169 /* Nonzero if we can use an AltiVec register to pass this arg. */
11170 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11171 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11172 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11173 && TARGET_ALTIVEC_ABI \
11176 /* Walk down the type tree of TYPE counting consecutive base elements.
11177 If *MODEP is VOIDmode, then set it to the first valid floating point
11178 or vector type. If a non-floating point or vector type is found, or
11179 if a floating point or vector type that doesn't match a non-VOIDmode
11180 *MODEP is found, then return -1, otherwise return the count in the
11184 rs6000_aggregate_candidate (const_tree type
, machine_mode
*modep
)
11187 HOST_WIDE_INT size
;
11189 switch (TREE_CODE (type
))
11192 mode
= TYPE_MODE (type
);
11193 if (!SCALAR_FLOAT_MODE_P (mode
))
11196 if (*modep
== VOIDmode
)
11199 if (*modep
== mode
)
11205 mode
= TYPE_MODE (TREE_TYPE (type
));
11206 if (!SCALAR_FLOAT_MODE_P (mode
))
11209 if (*modep
== VOIDmode
)
11212 if (*modep
== mode
)
11218 if (!TARGET_ALTIVEC_ABI
|| !TARGET_ALTIVEC
)
11221 /* Use V4SImode as representative of all 128-bit vector types. */
11222 size
= int_size_in_bytes (type
);
11232 if (*modep
== VOIDmode
)
11235 /* Vector modes are considered to be opaque: two vectors are
11236 equivalent for the purposes of being homogeneous aggregates
11237 if they are the same size. */
11238 if (*modep
== mode
)
11246 tree index
= TYPE_DOMAIN (type
);
11248 /* Can't handle incomplete types nor sizes that are not
11250 if (!COMPLETE_TYPE_P (type
)
11251 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11254 count
= rs6000_aggregate_candidate (TREE_TYPE (type
), modep
);
11257 || !TYPE_MAX_VALUE (index
)
11258 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
11259 || !TYPE_MIN_VALUE (index
)
11260 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
11264 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
11265 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
11267 /* There must be no padding. */
11268 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11280 /* Can't handle incomplete types nor sizes that are not
11282 if (!COMPLETE_TYPE_P (type
)
11283 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11286 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11288 if (TREE_CODE (field
) != FIELD_DECL
)
11291 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11294 count
+= sub_count
;
11297 /* There must be no padding. */
11298 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11305 case QUAL_UNION_TYPE
:
11307 /* These aren't very interesting except in a degenerate case. */
11312 /* Can't handle incomplete types nor sizes that are not
11314 if (!COMPLETE_TYPE_P (type
)
11315 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11318 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11320 if (TREE_CODE (field
) != FIELD_DECL
)
11323 sub_count
= rs6000_aggregate_candidate (TREE_TYPE (field
), modep
);
11326 count
= count
> sub_count
? count
: sub_count
;
11329 /* There must be no padding. */
11330 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
11343 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11344 float or vector aggregate that shall be passed in FP/vector registers
11345 according to the ELFv2 ABI, return the homogeneous element mode in
11346 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11348 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11351 rs6000_discover_homogeneous_aggregate (machine_mode mode
, const_tree type
,
11352 machine_mode
*elt_mode
,
11355 /* Note that we do not accept complex types at the top level as
11356 homogeneous aggregates; these types are handled via the
11357 targetm.calls.split_complex_arg mechanism. Complex types
11358 can be elements of homogeneous aggregates, however. */
11359 if (DEFAULT_ABI
== ABI_ELFv2
&& type
&& AGGREGATE_TYPE_P (type
))
11361 machine_mode field_mode
= VOIDmode
;
11362 int field_count
= rs6000_aggregate_candidate (type
, &field_mode
);
11364 if (field_count
> 0)
11366 int n_regs
= (SCALAR_FLOAT_MODE_P (field_mode
) ?
11367 (GET_MODE_SIZE (field_mode
) + 7) >> 3 : 1);
11369 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11370 up to AGGR_ARG_NUM_REG registers. */
11371 if (field_count
* n_regs
<= AGGR_ARG_NUM_REG
)
11374 *elt_mode
= field_mode
;
11376 *n_elts
= field_count
;
11389 /* Return a nonzero value to say to return the function value in
11390 memory, just as large structures are always returned. TYPE will be
11391 the data type of the value, and FNTYPE will be the type of the
11392 function doing the returning, or @code{NULL} for libcalls.
11394 The AIX ABI for the RS/6000 specifies that all structures are
11395 returned in memory. The Darwin ABI does the same.
11397 For the Darwin 64 Bit ABI, a function result can be returned in
11398 registers or in memory, depending on the size of the return data
11399 type. If it is returned in registers, the value occupies the same
11400 registers as it would if it were the first and only function
11401 argument. Otherwise, the function places its result in memory at
11402 the location pointed to by GPR3.
11404 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11405 but a draft put them in memory, and GCC used to implement the draft
11406 instead of the final standard. Therefore, aix_struct_return
11407 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11408 compatibility can change DRAFT_V4_STRUCT_RET to override the
11409 default, and -m switches get the final word. See
11410 rs6000_option_override_internal for more details.
11412 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11413 long double support is enabled. These values are returned in memory.
11415 int_size_in_bytes returns -1 for variable size objects, which go in
11416 memory always. The cast to unsigned makes -1 > 8. */
11419 rs6000_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
11421 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11423 && rs6000_darwin64_abi
11424 && TREE_CODE (type
) == RECORD_TYPE
11425 && int_size_in_bytes (type
) > 0)
11427 CUMULATIVE_ARGS valcum
;
11431 valcum
.fregno
= FP_ARG_MIN_REG
;
11432 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
11433 /* Do a trial code generation as if this were going to be passed
11434 as an argument; if any part goes in memory, we return NULL. */
11435 valret
= rs6000_darwin64_record_arg (&valcum
, type
, true, true);
11438 /* Otherwise fall through to more conventional ABI rules. */
11441 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11442 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type
), type
,
11446 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11447 if (DEFAULT_ABI
== ABI_ELFv2
&& AGGREGATE_TYPE_P (type
)
11448 && (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) <= 16)
11451 if (AGGREGATE_TYPE_P (type
)
11452 && (aix_struct_return
11453 || (unsigned HOST_WIDE_INT
) int_size_in_bytes (type
) > 8))
11456 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11457 modes only exist for GCC vector types if -maltivec. */
11458 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
11459 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type
)))
11462 /* Return synthetic vectors in memory. */
11463 if (TREE_CODE (type
) == VECTOR_TYPE
11464 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
11466 static bool warned_for_return_big_vectors
= false;
11467 if (!warned_for_return_big_vectors
)
11469 warning (OPT_Wpsabi
, "GCC vector returned by reference: "
11470 "non-standard ABI extension with no compatibility guarantee");
11471 warned_for_return_big_vectors
= true;
11476 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
11477 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
11483 /* Specify whether values returned in registers should be at the most
11484 significant end of a register. We want aggregates returned by
11485 value to match the way aggregates are passed to functions. */
11488 rs6000_return_in_msb (const_tree valtype
)
11490 return (DEFAULT_ABI
== ABI_ELFv2
11491 && BYTES_BIG_ENDIAN
11492 && AGGREGATE_TYPE_P (valtype
)
11493 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype
), valtype
) == upward
);
11496 #ifdef HAVE_AS_GNU_ATTRIBUTE
11497 /* Return TRUE if a call to function FNDECL may be one that
11498 potentially affects the function calling ABI of the object file. */
11501 call_ABI_of_interest (tree fndecl
)
11503 if (rs6000_gnu_attr
&& symtab
->state
== EXPANSION
)
11505 struct cgraph_node
*c_node
;
11507 /* Libcalls are always interesting. */
11508 if (fndecl
== NULL_TREE
)
11511 /* Any call to an external function is interesting. */
11512 if (DECL_EXTERNAL (fndecl
))
11515 /* Interesting functions that we are emitting in this object file. */
11516 c_node
= cgraph_node::get (fndecl
);
11517 c_node
= c_node
->ultimate_alias_target ();
11518 return !c_node
->only_called_directly_p ();
11524 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11525 for a call to a function whose data type is FNTYPE.
11526 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11528 For incoming args we set the number of arguments in the prototype large
11529 so we never return a PARALLEL. */
11532 init_cumulative_args (CUMULATIVE_ARGS
*cum
, tree fntype
,
11533 rtx libname ATTRIBUTE_UNUSED
, int incoming
,
11534 int libcall
, int n_named_args
,
11535 tree fndecl ATTRIBUTE_UNUSED
,
11536 machine_mode return_mode ATTRIBUTE_UNUSED
)
11538 static CUMULATIVE_ARGS zero_cumulative
;
11540 *cum
= zero_cumulative
;
11542 cum
->fregno
= FP_ARG_MIN_REG
;
11543 cum
->vregno
= ALTIVEC_ARG_MIN_REG
;
11544 cum
->prototype
= (fntype
&& prototype_p (fntype
));
11545 cum
->call_cookie
= ((DEFAULT_ABI
== ABI_V4
&& libcall
)
11546 ? CALL_LIBCALL
: CALL_NORMAL
);
11547 cum
->sysv_gregno
= GP_ARG_MIN_REG
;
11548 cum
->stdarg
= stdarg_p (fntype
);
11549 cum
->libcall
= libcall
;
11551 cum
->nargs_prototype
= 0;
11552 if (incoming
|| cum
->prototype
)
11553 cum
->nargs_prototype
= n_named_args
;
11555 /* Check for a longcall attribute. */
11556 if ((!fntype
&& rs6000_default_long_calls
)
11558 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype
))
11559 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype
))))
11560 cum
->call_cookie
|= CALL_LONG
;
11562 if (TARGET_DEBUG_ARG
)
11564 fprintf (stderr
, "\ninit_cumulative_args:");
11567 tree ret_type
= TREE_TYPE (fntype
);
11568 fprintf (stderr
, " ret code = %s,",
11569 get_tree_code_name (TREE_CODE (ret_type
)));
11572 if (cum
->call_cookie
& CALL_LONG
)
11573 fprintf (stderr
, " longcall,");
11575 fprintf (stderr
, " proto = %d, nargs = %d\n",
11576 cum
->prototype
, cum
->nargs_prototype
);
11579 #ifdef HAVE_AS_GNU_ATTRIBUTE
11580 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
))
11582 cum
->escapes
= call_ABI_of_interest (fndecl
);
11589 return_type
= TREE_TYPE (fntype
);
11590 return_mode
= TYPE_MODE (return_type
);
11593 return_type
= lang_hooks
.types
.type_for_mode (return_mode
, 0);
11595 if (return_type
!= NULL
)
11597 if (TREE_CODE (return_type
) == RECORD_TYPE
11598 && TYPE_TRANSPARENT_AGGR (return_type
))
11600 return_type
= TREE_TYPE (first_field (return_type
));
11601 return_mode
= TYPE_MODE (return_type
);
11603 if (AGGREGATE_TYPE_P (return_type
)
11604 && ((unsigned HOST_WIDE_INT
) int_size_in_bytes (return_type
)
11606 rs6000_returns_struct
= true;
11608 if (SCALAR_FLOAT_MODE_P (return_mode
))
11610 rs6000_passes_float
= true;
11611 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
11612 && (FLOAT128_IBM_P (return_mode
)
11613 || FLOAT128_IEEE_P (return_mode
)
11614 || (return_type
!= NULL
11615 && (TYPE_MAIN_VARIANT (return_type
)
11616 == long_double_type_node
))))
11617 rs6000_passes_long_double
= true;
11619 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode
)
11620 || PAIRED_VECTOR_MODE (return_mode
))
11621 rs6000_passes_vector
= true;
11628 && TARGET_ALTIVEC_ABI
11629 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype
))))
11631 error ("cannot return value in vector register because"
11632 " altivec instructions are disabled, use -maltivec"
11633 " to enable them");
11637 /* The mode the ABI uses for a word. This is not the same as word_mode
11638 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11640 static machine_mode
11641 rs6000_abi_word_mode (void)
11643 return TARGET_32BIT
? SImode
: DImode
;
11646 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11648 rs6000_offload_options (void)
11651 return xstrdup ("-foffload-abi=lp64");
11653 return xstrdup ("-foffload-abi=ilp32");
11656 /* On rs6000, function arguments are promoted, as are function return
11659 static machine_mode
11660 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
11662 int *punsignedp ATTRIBUTE_UNUSED
,
11665 PROMOTE_MODE (mode
, *punsignedp
, type
);
11670 /* Return true if TYPE must be passed on the stack and not in registers. */
11673 rs6000_must_pass_in_stack (machine_mode mode
, const_tree type
)
11675 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
|| TARGET_64BIT
)
11676 return must_pass_in_stack_var_size (mode
, type
);
11678 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11682 is_complex_IBM_long_double (machine_mode mode
)
11684 return mode
== ICmode
|| (!TARGET_IEEEQUAD
&& mode
== TCmode
);
11687 /* Whether ABI_V4 passes MODE args to a function in floating point
11691 abi_v4_pass_in_fpr (machine_mode mode
)
11693 if (!TARGET_HARD_FLOAT
)
11695 if (TARGET_SINGLE_FLOAT
&& mode
== SFmode
)
11697 if (TARGET_DOUBLE_FLOAT
&& mode
== DFmode
)
11699 /* ABI_V4 passes complex IBM long double in 8 gprs.
11700 Stupid, but we can't change the ABI now. */
11701 if (is_complex_IBM_long_double (mode
))
11703 if (FLOAT128_2REG_P (mode
))
11705 if (DECIMAL_FLOAT_MODE_P (mode
))
11710 /* If defined, a C expression which determines whether, and in which
11711 direction, to pad out an argument with extra space. The value
11712 should be of type `enum direction': either `upward' to pad above
11713 the argument, `downward' to pad below, or `none' to inhibit
11716 For the AIX ABI structs are always stored left shifted in their
11720 function_arg_padding (machine_mode mode
, const_tree type
)
11722 #ifndef AGGREGATE_PADDING_FIXED
11723 #define AGGREGATE_PADDING_FIXED 0
11725 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11726 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11729 if (!AGGREGATE_PADDING_FIXED
)
11731 /* GCC used to pass structures of the same size as integer types as
11732 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11733 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11734 passed padded downward, except that -mstrict-align further
11735 muddied the water in that multi-component structures of 2 and 4
11736 bytes in size were passed padded upward.
11738 The following arranges for best compatibility with previous
11739 versions of gcc, but removes the -mstrict-align dependency. */
11740 if (BYTES_BIG_ENDIAN
)
11742 HOST_WIDE_INT size
= 0;
11744 if (mode
== BLKmode
)
11746 if (type
&& TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
11747 size
= int_size_in_bytes (type
);
11750 size
= GET_MODE_SIZE (mode
);
11752 if (size
== 1 || size
== 2 || size
== 4)
11758 if (AGGREGATES_PAD_UPWARD_ALWAYS
)
11760 if (type
!= 0 && AGGREGATE_TYPE_P (type
))
11764 /* Fall back to the default. */
11765 return DEFAULT_FUNCTION_ARG_PADDING (mode
, type
);
11768 /* If defined, a C expression that gives the alignment boundary, in bits,
11769 of an argument with the specified mode and type. If it is not defined,
11770 PARM_BOUNDARY is used for all arguments.
11772 V.4 wants long longs and doubles to be double word aligned. Just
11773 testing the mode size is a boneheaded way to do this as it means
11774 that other types such as complex int are also double word aligned.
11775 However, we're stuck with this because changing the ABI might break
11776 existing library interfaces.
11778 Quadword align Altivec/VSX vectors.
11779 Quadword align large synthetic vector types. */
11781 static unsigned int
11782 rs6000_function_arg_boundary (machine_mode mode
, const_tree type
)
11784 machine_mode elt_mode
;
11787 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
11789 if (DEFAULT_ABI
== ABI_V4
11790 && (GET_MODE_SIZE (mode
) == 8
11791 || (TARGET_HARD_FLOAT
11792 && !is_complex_IBM_long_double (mode
)
11793 && FLOAT128_2REG_P (mode
))))
11795 else if (FLOAT128_VECTOR_P (mode
))
11797 else if (PAIRED_VECTOR_MODE (mode
)
11798 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
11799 && int_size_in_bytes (type
) >= 8
11800 && int_size_in_bytes (type
) < 16))
11802 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
11803 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
11804 && int_size_in_bytes (type
) >= 16))
11807 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11808 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11809 -mcompat-align-parm is used. */
11810 if (((DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
)
11811 || DEFAULT_ABI
== ABI_ELFv2
)
11812 && type
&& TYPE_ALIGN (type
) > 64)
11814 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11815 or homogeneous float/vector aggregates here. We already handled
11816 vector aggregates above, but still need to check for float here. */
11817 bool aggregate_p
= (AGGREGATE_TYPE_P (type
)
11818 && !SCALAR_FLOAT_MODE_P (elt_mode
));
11820 /* We used to check for BLKmode instead of the above aggregate type
11821 check. Warn when this results in any difference to the ABI. */
11822 if (aggregate_p
!= (mode
== BLKmode
))
11824 static bool warned
;
11825 if (!warned
&& warn_psabi
)
11828 inform (input_location
,
11829 "the ABI of passing aggregates with %d-byte alignment"
11830 " has changed in GCC 5",
11831 (int) TYPE_ALIGN (type
) / BITS_PER_UNIT
);
11839 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11840 implement the "aggregate type" check as a BLKmode check here; this
11841 means certain aggregate types are in fact not aligned. */
11842 if (TARGET_MACHO
&& rs6000_darwin64_abi
11844 && type
&& TYPE_ALIGN (type
) > 64)
11847 return PARM_BOUNDARY
;
11850 /* The offset in words to the start of the parameter save area. */
11852 static unsigned int
11853 rs6000_parm_offset (void)
11855 return (DEFAULT_ABI
== ABI_V4
? 2
11856 : DEFAULT_ABI
== ABI_ELFv2
? 4
11860 /* For a function parm of MODE and TYPE, return the starting word in
11861 the parameter area. NWORDS of the parameter area are already used. */
11863 static unsigned int
11864 rs6000_parm_start (machine_mode mode
, const_tree type
,
11865 unsigned int nwords
)
11867 unsigned int align
;
11869 align
= rs6000_function_arg_boundary (mode
, type
) / PARM_BOUNDARY
- 1;
11870 return nwords
+ (-(rs6000_parm_offset () + nwords
) & align
);
11873 /* Compute the size (in words) of a function argument. */
11875 static unsigned long
11876 rs6000_arg_size (machine_mode mode
, const_tree type
)
11878 unsigned long size
;
11880 if (mode
!= BLKmode
)
11881 size
= GET_MODE_SIZE (mode
);
11883 size
= int_size_in_bytes (type
);
11886 return (size
+ 3) >> 2;
11888 return (size
+ 7) >> 3;
11891 /* Use this to flush pending int fields. */
11894 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS
*cum
,
11895 HOST_WIDE_INT bitpos
, int final
)
11897 unsigned int startbit
, endbit
;
11898 int intregs
, intoffset
;
11901 /* Handle the situations where a float is taking up the first half
11902 of the GPR, and the other half is empty (typically due to
11903 alignment restrictions). We can detect this by a 8-byte-aligned
11904 int field, or by seeing that this is the final flush for this
11905 argument. Count the word and continue on. */
11906 if (cum
->floats_in_gpr
== 1
11907 && (cum
->intoffset
% 64 == 0
11908 || (cum
->intoffset
== -1 && final
)))
11911 cum
->floats_in_gpr
= 0;
11914 if (cum
->intoffset
== -1)
11917 intoffset
= cum
->intoffset
;
11918 cum
->intoffset
= -1;
11919 cum
->floats_in_gpr
= 0;
11921 if (intoffset
% BITS_PER_WORD
!= 0)
11923 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
11925 if (mode
== BLKmode
)
11927 /* We couldn't find an appropriate mode, which happens,
11928 e.g., in packed structs when there are 3 bytes to load.
11929 Back intoffset back to the beginning of the word in this
11931 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
11935 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
11936 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
11937 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
11938 cum
->words
+= intregs
;
11939 /* words should be unsigned. */
11940 if ((unsigned)cum
->words
< (endbit
/BITS_PER_WORD
))
11942 int pad
= (endbit
/BITS_PER_WORD
) - cum
->words
;
11947 /* The darwin64 ABI calls for us to recurse down through structs,
11948 looking for elements passed in registers. Unfortunately, we have
11949 to track int register count here also because of misalignments
11950 in powerpc alignment mode. */
11953 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS
*cum
,
11955 HOST_WIDE_INT startbitpos
)
11959 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
11960 if (TREE_CODE (f
) == FIELD_DECL
)
11962 HOST_WIDE_INT bitpos
= startbitpos
;
11963 tree ftype
= TREE_TYPE (f
);
11965 if (ftype
== error_mark_node
)
11967 mode
= TYPE_MODE (ftype
);
11969 if (DECL_SIZE (f
) != 0
11970 && tree_fits_uhwi_p (bit_position (f
)))
11971 bitpos
+= int_bit_position (f
);
11973 /* ??? FIXME: else assume zero offset. */
11975 if (TREE_CODE (ftype
) == RECORD_TYPE
)
11976 rs6000_darwin64_record_arg_advance_recurse (cum
, ftype
, bitpos
);
11977 else if (USE_FP_FOR_ARG_P (cum
, mode
))
11979 unsigned n_fpregs
= (GET_MODE_SIZE (mode
) + 7) >> 3;
11980 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
11981 cum
->fregno
+= n_fpregs
;
11982 /* Single-precision floats present a special problem for
11983 us, because they are smaller than an 8-byte GPR, and so
11984 the structure-packing rules combined with the standard
11985 varargs behavior mean that we want to pack float/float
11986 and float/int combinations into a single register's
11987 space. This is complicated by the arg advance flushing,
11988 which works on arbitrarily large groups of int-type
11990 if (mode
== SFmode
)
11992 if (cum
->floats_in_gpr
== 1)
11994 /* Two floats in a word; count the word and reset
11995 the float count. */
11997 cum
->floats_in_gpr
= 0;
11999 else if (bitpos
% 64 == 0)
12001 /* A float at the beginning of an 8-byte word;
12002 count it and put off adjusting cum->words until
12003 we see if a arg advance flush is going to do it
12005 cum
->floats_in_gpr
++;
12009 /* The float is at the end of a word, preceded
12010 by integer fields, so the arg advance flush
12011 just above has already set cum->words and
12012 everything is taken care of. */
12016 cum
->words
+= n_fpregs
;
12018 else if (USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12020 rs6000_darwin64_record_arg_advance_flush (cum
, bitpos
, 0);
12024 else if (cum
->intoffset
== -1)
12025 cum
->intoffset
= bitpos
;
12029 /* Check for an item that needs to be considered specially under the darwin 64
12030 bit ABI. These are record types where the mode is BLK or the structure is
12031 8 bytes in size. */
12033 rs6000_darwin64_struct_check_p (machine_mode mode
, const_tree type
)
12035 return rs6000_darwin64_abi
12036 && ((mode
== BLKmode
12037 && TREE_CODE (type
) == RECORD_TYPE
12038 && int_size_in_bytes (type
) > 0)
12039 || (type
&& TREE_CODE (type
) == RECORD_TYPE
12040 && int_size_in_bytes (type
) == 8)) ? 1 : 0;
12043 /* Update the data in CUM to advance over an argument
12044 of mode MODE and data type TYPE.
12045 (TYPE is null for libcalls where that information may not be available.)
12047 Note that for args passed by reference, function_arg will be called
12048 with MODE and TYPE set to that of the pointer to the arg, not the arg
12052 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
12053 const_tree type
, bool named
, int depth
)
12055 machine_mode elt_mode
;
12058 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12060 /* Only tick off an argument if we're not recursing. */
12062 cum
->nargs_prototype
--;
12064 #ifdef HAVE_AS_GNU_ATTRIBUTE
12065 if (TARGET_ELF
&& (TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
12068 if (SCALAR_FLOAT_MODE_P (mode
))
12070 rs6000_passes_float
= true;
12071 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
)
12072 && (FLOAT128_IBM_P (mode
)
12073 || FLOAT128_IEEE_P (mode
)
12075 && TYPE_MAIN_VARIANT (type
) == long_double_type_node
)))
12076 rs6000_passes_long_double
= true;
12078 if ((named
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
12079 || (PAIRED_VECTOR_MODE (mode
)
12081 && cum
->sysv_gregno
<= GP_ARG_MAX_REG
))
12082 rs6000_passes_vector
= true;
12086 if (TARGET_ALTIVEC_ABI
12087 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode
)
12088 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12089 && int_size_in_bytes (type
) == 16)))
12091 bool stack
= false;
12093 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12095 cum
->vregno
+= n_elts
;
12097 if (!TARGET_ALTIVEC
)
12098 error ("cannot pass argument in vector register because"
12099 " altivec instructions are disabled, use -maltivec"
12100 " to enable them");
12102 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12103 even if it is going to be passed in a vector register.
12104 Darwin does the same for variable-argument functions. */
12105 if (((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12107 || (cum
->stdarg
&& DEFAULT_ABI
!= ABI_V4
))
12117 /* Vector parameters must be 16-byte aligned. In 32-bit
12118 mode this means we need to take into account the offset
12119 to the parameter save area. In 64-bit mode, they just
12120 have to start on an even word, since the parameter save
12121 area is 16-byte aligned. */
12123 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12125 align
= cum
->words
& 1;
12126 cum
->words
+= align
+ rs6000_arg_size (mode
, type
);
12128 if (TARGET_DEBUG_ARG
)
12130 fprintf (stderr
, "function_adv: words = %2d, align=%d, ",
12131 cum
->words
, align
);
12132 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s\n",
12133 cum
->nargs_prototype
, cum
->prototype
,
12134 GET_MODE_NAME (mode
));
12138 else if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12140 int size
= int_size_in_bytes (type
);
12141 /* Variable sized types have size == -1 and are
12142 treated as if consisting entirely of ints.
12143 Pad to 16 byte boundary if needed. */
12144 if (TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12145 && (cum
->words
% 2) != 0)
12147 /* For varargs, we can just go up by the size of the struct. */
12149 cum
->words
+= (size
+ 7) / 8;
12152 /* It is tempting to say int register count just goes up by
12153 sizeof(type)/8, but this is wrong in a case such as
12154 { int; double; int; } [powerpc alignment]. We have to
12155 grovel through the fields for these too. */
12156 cum
->intoffset
= 0;
12157 cum
->floats_in_gpr
= 0;
12158 rs6000_darwin64_record_arg_advance_recurse (cum
, type
, 0);
12159 rs6000_darwin64_record_arg_advance_flush (cum
,
12160 size
* BITS_PER_UNIT
, 1);
12162 if (TARGET_DEBUG_ARG
)
12164 fprintf (stderr
, "function_adv: words = %2d, align=%d, size=%d",
12165 cum
->words
, TYPE_ALIGN (type
), size
);
12167 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12168 cum
->nargs_prototype
, cum
->prototype
,
12169 GET_MODE_NAME (mode
));
12172 else if (DEFAULT_ABI
== ABI_V4
)
12174 if (abi_v4_pass_in_fpr (mode
))
12176 /* _Decimal128 must use an even/odd register pair. This assumes
12177 that the register number is odd when fregno is odd. */
12178 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12181 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12182 <= FP_ARG_V4_MAX_REG
)
12183 cum
->fregno
+= (GET_MODE_SIZE (mode
) + 7) >> 3;
12186 cum
->fregno
= FP_ARG_V4_MAX_REG
+ 1;
12187 if (mode
== DFmode
|| FLOAT128_IBM_P (mode
)
12188 || mode
== DDmode
|| mode
== TDmode
)
12189 cum
->words
+= cum
->words
& 1;
12190 cum
->words
+= rs6000_arg_size (mode
, type
);
12195 int n_words
= rs6000_arg_size (mode
, type
);
12196 int gregno
= cum
->sysv_gregno
;
12198 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12199 As does any other 2 word item such as complex int due to a
12200 historical mistake. */
12202 gregno
+= (1 - gregno
) & 1;
12204 /* Multi-reg args are not split between registers and stack. */
12205 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12207 /* Long long is aligned on the stack. So are other 2 word
12208 items such as complex int due to a historical mistake. */
12210 cum
->words
+= cum
->words
& 1;
12211 cum
->words
+= n_words
;
12214 /* Note: continuing to accumulate gregno past when we've started
12215 spilling to the stack indicates the fact that we've started
12216 spilling to the stack to expand_builtin_saveregs. */
12217 cum
->sysv_gregno
= gregno
+ n_words
;
12220 if (TARGET_DEBUG_ARG
)
12222 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12223 cum
->words
, cum
->fregno
);
12224 fprintf (stderr
, "gregno = %2d, nargs = %4d, proto = %d, ",
12225 cum
->sysv_gregno
, cum
->nargs_prototype
, cum
->prototype
);
12226 fprintf (stderr
, "mode = %4s, named = %d\n",
12227 GET_MODE_NAME (mode
), named
);
12232 int n_words
= rs6000_arg_size (mode
, type
);
12233 int start_words
= cum
->words
;
12234 int align_words
= rs6000_parm_start (mode
, type
, start_words
);
12236 cum
->words
= align_words
+ n_words
;
12238 if (SCALAR_FLOAT_MODE_P (elt_mode
) && TARGET_HARD_FLOAT
)
12240 /* _Decimal128 must be passed in an even/odd float register pair.
12241 This assumes that the register number is odd when fregno is
12243 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12245 cum
->fregno
+= n_elts
* ((GET_MODE_SIZE (elt_mode
) + 7) >> 3);
12248 if (TARGET_DEBUG_ARG
)
12250 fprintf (stderr
, "function_adv: words = %2d, fregno = %2d, ",
12251 cum
->words
, cum
->fregno
);
12252 fprintf (stderr
, "nargs = %4d, proto = %d, mode = %4s, ",
12253 cum
->nargs_prototype
, cum
->prototype
, GET_MODE_NAME (mode
));
12254 fprintf (stderr
, "named = %d, align = %d, depth = %d\n",
12255 named
, align_words
- start_words
, depth
);
12261 rs6000_function_arg_advance (cumulative_args_t cum
, machine_mode mode
,
12262 const_tree type
, bool named
)
12264 rs6000_function_arg_advance_1 (get_cumulative_args (cum
), mode
, type
, named
,
12268 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12269 structure between cum->intoffset and bitpos to integer registers. */
12272 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS
*cum
,
12273 HOST_WIDE_INT bitpos
, rtx rvec
[], int *k
)
12276 unsigned int regno
;
12277 unsigned int startbit
, endbit
;
12278 int this_regno
, intregs
, intoffset
;
12281 if (cum
->intoffset
== -1)
12284 intoffset
= cum
->intoffset
;
12285 cum
->intoffset
= -1;
12287 /* If this is the trailing part of a word, try to only load that
12288 much into the register. Otherwise load the whole register. Note
12289 that in the latter case we may pick up unwanted bits. It's not a
12290 problem at the moment but may wish to revisit. */
12292 if (intoffset
% BITS_PER_WORD
!= 0)
12294 mode
= mode_for_size (BITS_PER_WORD
- intoffset
% BITS_PER_WORD
,
12296 if (mode
== BLKmode
)
12298 /* We couldn't find an appropriate mode, which happens,
12299 e.g., in packed structs when there are 3 bytes to load.
12300 Back intoffset back to the beginning of the word in this
12302 intoffset
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12309 startbit
= ROUND_DOWN (intoffset
, BITS_PER_WORD
);
12310 endbit
= ROUND_UP (bitpos
, BITS_PER_WORD
);
12311 intregs
= (endbit
- startbit
) / BITS_PER_WORD
;
12312 this_regno
= cum
->words
+ intoffset
/ BITS_PER_WORD
;
12314 if (intregs
> 0 && intregs
> GP_ARG_NUM_REG
- this_regno
)
12315 cum
->use_stack
= 1;
12317 intregs
= MIN (intregs
, GP_ARG_NUM_REG
- this_regno
);
12321 intoffset
/= BITS_PER_UNIT
;
12324 regno
= GP_ARG_MIN_REG
+ this_regno
;
12325 reg
= gen_rtx_REG (mode
, regno
);
12327 gen_rtx_EXPR_LIST (VOIDmode
, reg
, GEN_INT (intoffset
));
12330 intoffset
= (intoffset
| (UNITS_PER_WORD
-1)) + 1;
12334 while (intregs
> 0);
12337 /* Recursive workhorse for the following. */
12340 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS
*cum
, const_tree type
,
12341 HOST_WIDE_INT startbitpos
, rtx rvec
[],
12346 for (f
= TYPE_FIELDS (type
); f
; f
= DECL_CHAIN (f
))
12347 if (TREE_CODE (f
) == FIELD_DECL
)
12349 HOST_WIDE_INT bitpos
= startbitpos
;
12350 tree ftype
= TREE_TYPE (f
);
12352 if (ftype
== error_mark_node
)
12354 mode
= TYPE_MODE (ftype
);
12356 if (DECL_SIZE (f
) != 0
12357 && tree_fits_uhwi_p (bit_position (f
)))
12358 bitpos
+= int_bit_position (f
);
12360 /* ??? FIXME: else assume zero offset. */
12362 if (TREE_CODE (ftype
) == RECORD_TYPE
)
12363 rs6000_darwin64_record_arg_recurse (cum
, ftype
, bitpos
, rvec
, k
);
12364 else if (cum
->named
&& USE_FP_FOR_ARG_P (cum
, mode
))
12366 unsigned n_fpreg
= (GET_MODE_SIZE (mode
) + 7) >> 3;
12370 case SCmode
: mode
= SFmode
; break;
12371 case DCmode
: mode
= DFmode
; break;
12372 case TCmode
: mode
= TFmode
; break;
12376 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12377 if (cum
->fregno
+ n_fpreg
> FP_ARG_MAX_REG
+ 1)
12379 gcc_assert (cum
->fregno
== FP_ARG_MAX_REG
12380 && (mode
== TFmode
|| mode
== TDmode
));
12381 /* Long double or _Decimal128 split over regs and memory. */
12382 mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
: DFmode
;
12386 = gen_rtx_EXPR_LIST (VOIDmode
,
12387 gen_rtx_REG (mode
, cum
->fregno
++),
12388 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12389 if (FLOAT128_2REG_P (mode
))
12392 else if (cum
->named
&& USE_ALTIVEC_FOR_ARG_P (cum
, mode
, 1))
12394 rs6000_darwin64_record_arg_flush (cum
, bitpos
, rvec
, k
);
12396 = gen_rtx_EXPR_LIST (VOIDmode
,
12397 gen_rtx_REG (mode
, cum
->vregno
++),
12398 GEN_INT (bitpos
/ BITS_PER_UNIT
));
12400 else if (cum
->intoffset
== -1)
12401 cum
->intoffset
= bitpos
;
12405 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12406 the register(s) to be used for each field and subfield of a struct
12407 being passed by value, along with the offset of where the
12408 register's value may be found in the block. FP fields go in FP
12409 register, vector fields go in vector registers, and everything
12410 else goes in int registers, packed as in memory.
12412 This code is also used for function return values. RETVAL indicates
12413 whether this is the case.
12415 Much of this is taken from the SPARC V9 port, which has a similar
12416 calling convention. */
12419 rs6000_darwin64_record_arg (CUMULATIVE_ARGS
*orig_cum
, const_tree type
,
12420 bool named
, bool retval
)
12422 rtx rvec
[FIRST_PSEUDO_REGISTER
];
12423 int k
= 1, kbase
= 1;
12424 HOST_WIDE_INT typesize
= int_size_in_bytes (type
);
12425 /* This is a copy; modifications are not visible to our caller. */
12426 CUMULATIVE_ARGS copy_cum
= *orig_cum
;
12427 CUMULATIVE_ARGS
*cum
= ©_cum
;
12429 /* Pad to 16 byte boundary if needed. */
12430 if (!retval
&& TYPE_ALIGN (type
) >= 2 * BITS_PER_WORD
12431 && (cum
->words
% 2) != 0)
12434 cum
->intoffset
= 0;
12435 cum
->use_stack
= 0;
12436 cum
->named
= named
;
12438 /* Put entries into rvec[] for individual FP and vector fields, and
12439 for the chunks of memory that go in int regs. Note we start at
12440 element 1; 0 is reserved for an indication of using memory, and
12441 may or may not be filled in below. */
12442 rs6000_darwin64_record_arg_recurse (cum
, type
, /* startbit pos= */ 0, rvec
, &k
);
12443 rs6000_darwin64_record_arg_flush (cum
, typesize
* BITS_PER_UNIT
, rvec
, &k
);
12445 /* If any part of the struct went on the stack put all of it there.
12446 This hack is because the generic code for
12447 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12448 parts of the struct are not at the beginning. */
12449 if (cum
->use_stack
)
12452 return NULL_RTX
; /* doesn't go in registers at all */
12454 rvec
[0] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12456 if (k
> 1 || cum
->use_stack
)
12457 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (k
- kbase
, &rvec
[kbase
]));
12462 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12465 rs6000_mixed_function_arg (machine_mode mode
, const_tree type
,
12470 rtx rvec
[GP_ARG_NUM_REG
+ 1];
12472 if (align_words
>= GP_ARG_NUM_REG
)
12475 n_units
= rs6000_arg_size (mode
, type
);
12477 /* Optimize the simple case where the arg fits in one gpr, except in
12478 the case of BLKmode due to assign_parms assuming that registers are
12479 BITS_PER_WORD wide. */
12481 || (n_units
== 1 && mode
!= BLKmode
))
12482 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12485 if (align_words
+ n_units
> GP_ARG_NUM_REG
)
12486 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12487 using a magic NULL_RTX component.
12488 This is not strictly correct. Only some of the arg belongs in
12489 memory, not all of it. However, the normal scheme using
12490 function_arg_partial_nregs can result in unusual subregs, eg.
12491 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12492 store the whole arg to memory is often more efficient than code
12493 to store pieces, and we know that space is available in the right
12494 place for the whole arg. */
12495 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12500 rtx r
= gen_rtx_REG (SImode
, GP_ARG_MIN_REG
+ align_words
);
12501 rtx off
= GEN_INT (i
++ * 4);
12502 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12504 while (++align_words
< GP_ARG_NUM_REG
&& --n_units
!= 0);
12506 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12509 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12510 but must also be copied into the parameter save area starting at
12511 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12512 to the GPRs and/or memory. Return the number of elements used. */
12515 rs6000_psave_function_arg (machine_mode mode
, const_tree type
,
12516 int align_words
, rtx
*rvec
)
12520 if (align_words
< GP_ARG_NUM_REG
)
12522 int n_words
= rs6000_arg_size (mode
, type
);
12524 if (align_words
+ n_words
> GP_ARG_NUM_REG
12526 || (TARGET_32BIT
&& TARGET_POWERPC64
))
12528 /* If this is partially on the stack, then we only
12529 include the portion actually in registers here. */
12530 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
12533 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12535 /* Not all of the arg fits in gprs. Say that it goes in memory
12536 too, using a magic NULL_RTX component. Also see comment in
12537 rs6000_mixed_function_arg for why the normal
12538 function_arg_partial_nregs scheme doesn't work in this case. */
12539 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12544 rtx r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
12545 rtx off
= GEN_INT (i
++ * GET_MODE_SIZE (rmode
));
12546 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12548 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
12552 /* The whole arg fits in gprs. */
12553 rtx r
= gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12554 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, const0_rtx
);
12559 /* It's entirely in memory. */
12560 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, NULL_RTX
, const0_rtx
);
12566 /* RVEC is a vector of K components of an argument of mode MODE.
12567 Construct the final function_arg return value from it. */
12570 rs6000_finish_function_arg (machine_mode mode
, rtx
*rvec
, int k
)
12572 gcc_assert (k
>= 1);
12574 /* Avoid returning a PARALLEL in the trivial cases. */
12577 if (XEXP (rvec
[0], 0) == NULL_RTX
)
12580 if (GET_MODE (XEXP (rvec
[0], 0)) == mode
)
12581 return XEXP (rvec
[0], 0);
12584 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (k
, rvec
));
12587 /* Determine where to put an argument to a function.
12588 Value is zero to push the argument on the stack,
12589 or a hard register in which to store the argument.
12591 MODE is the argument's machine mode.
12592 TYPE is the data type of the argument (as a tree).
12593 This is null for libcalls where that information may
12595 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12596 the preceding args and about the function being called. It is
12597 not modified in this routine.
12598 NAMED is nonzero if this argument is a named parameter
12599 (otherwise it is an extra parameter matching an ellipsis).
12601 On RS/6000 the first eight words of non-FP are normally in registers
12602 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12603 Under V.4, the first 8 FP args are in registers.
12605 If this is floating-point and no prototype is specified, we use
12606 both an FP and integer register (or possibly FP reg and stack). Library
12607 functions (when CALL_LIBCALL is set) always have the proper types for args,
12608 so we can pass the FP value just in one register. emit_library_function
12609 doesn't support PARALLEL anyway.
12611 Note that for args passed by reference, function_arg will be called
12612 with MODE and TYPE set to that of the pointer to the arg, not the arg
12616 rs6000_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
12617 const_tree type
, bool named
)
12619 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12620 enum rs6000_abi abi
= DEFAULT_ABI
;
12621 machine_mode elt_mode
;
12624 /* Return a marker to indicate whether CR1 needs to set or clear the
12625 bit that V.4 uses to say fp args were passed in registers.
12626 Assume that we don't need the marker for software floating point,
12627 or compiler generated library calls. */
12628 if (mode
== VOIDmode
)
12631 && (cum
->call_cookie
& CALL_LIBCALL
) == 0
12633 || (cum
->nargs_prototype
< 0
12634 && (cum
->prototype
|| TARGET_NO_PROTOTYPE
)))
12635 && TARGET_HARD_FLOAT
)
12636 return GEN_INT (cum
->call_cookie
12637 | ((cum
->fregno
== FP_ARG_MIN_REG
)
12638 ? CALL_V4_SET_FP_ARGS
12639 : CALL_V4_CLEAR_FP_ARGS
));
12641 return GEN_INT (cum
->call_cookie
& ~CALL_LIBCALL
);
12644 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12646 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12648 rtx rslt
= rs6000_darwin64_record_arg (cum
, type
, named
, /*retval= */false);
12649 if (rslt
!= NULL_RTX
)
12651 /* Else fall through to usual handling. */
12654 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12656 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
12660 /* Do we also need to pass this argument in the parameter save area?
12661 Library support functions for IEEE 128-bit are assumed to not need the
12662 value passed both in GPRs and in vector registers. */
12663 if (TARGET_64BIT
&& !cum
->prototype
12664 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
12666 int align_words
= ROUND_UP (cum
->words
, 2);
12667 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
12670 /* Describe where this argument goes in the vector registers. */
12671 for (i
= 0; i
< n_elts
&& cum
->vregno
+ i
<= ALTIVEC_ARG_MAX_REG
; i
++)
12673 r
= gen_rtx_REG (elt_mode
, cum
->vregno
+ i
);
12674 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
12675 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12678 return rs6000_finish_function_arg (mode
, rvec
, k
);
12680 else if (TARGET_ALTIVEC_ABI
12681 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
12682 || (type
&& TREE_CODE (type
) == VECTOR_TYPE
12683 && int_size_in_bytes (type
) == 16)))
12685 if (named
|| abi
== ABI_V4
)
12689 /* Vector parameters to varargs functions under AIX or Darwin
12690 get passed in memory and possibly also in GPRs. */
12691 int align
, align_words
, n_words
;
12692 machine_mode part_mode
;
12694 /* Vector parameters must be 16-byte aligned. In 32-bit
12695 mode this means we need to take into account the offset
12696 to the parameter save area. In 64-bit mode, they just
12697 have to start on an even word, since the parameter save
12698 area is 16-byte aligned. */
12700 align
= -(rs6000_parm_offset () + cum
->words
) & 3;
12702 align
= cum
->words
& 1;
12703 align_words
= cum
->words
+ align
;
12705 /* Out of registers? Memory, then. */
12706 if (align_words
>= GP_ARG_NUM_REG
)
12709 if (TARGET_32BIT
&& TARGET_POWERPC64
)
12710 return rs6000_mixed_function_arg (mode
, type
, align_words
);
12712 /* The vector value goes in GPRs. Only the part of the
12713 value in GPRs is reported here. */
12715 n_words
= rs6000_arg_size (mode
, type
);
12716 if (align_words
+ n_words
> GP_ARG_NUM_REG
)
12717 /* Fortunately, there are only two possibilities, the value
12718 is either wholly in GPRs or half in GPRs and half not. */
12719 part_mode
= DImode
;
12721 return gen_rtx_REG (part_mode
, GP_ARG_MIN_REG
+ align_words
);
12725 else if (abi
== ABI_V4
)
12727 if (abi_v4_pass_in_fpr (mode
))
12729 /* _Decimal128 must use an even/odd register pair. This assumes
12730 that the register number is odd when fregno is odd. */
12731 if (mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12734 if (cum
->fregno
+ (FLOAT128_2REG_P (mode
) ? 1 : 0)
12735 <= FP_ARG_V4_MAX_REG
)
12736 return gen_rtx_REG (mode
, cum
->fregno
);
12742 int n_words
= rs6000_arg_size (mode
, type
);
12743 int gregno
= cum
->sysv_gregno
;
12745 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12746 As does any other 2 word item such as complex int due to a
12747 historical mistake. */
12749 gregno
+= (1 - gregno
) & 1;
12751 /* Multi-reg args are not split between registers and stack. */
12752 if (gregno
+ n_words
- 1 > GP_ARG_MAX_REG
)
12755 if (TARGET_32BIT
&& TARGET_POWERPC64
)
12756 return rs6000_mixed_function_arg (mode
, type
,
12757 gregno
- GP_ARG_MIN_REG
);
12758 return gen_rtx_REG (mode
, gregno
);
12763 int align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
12765 /* _Decimal128 must be passed in an even/odd float register pair.
12766 This assumes that the register number is odd when fregno is odd. */
12767 if (elt_mode
== TDmode
&& (cum
->fregno
% 2) == 1)
12770 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
12772 rtx rvec
[GP_ARG_NUM_REG
+ AGGR_ARG_NUM_REG
+ 1];
12775 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
12778 /* Do we also need to pass this argument in the parameter
12780 if (type
&& (cum
->nargs_prototype
<= 0
12781 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12782 && TARGET_XL_COMPAT
12783 && align_words
>= GP_ARG_NUM_REG
)))
12784 k
= rs6000_psave_function_arg (mode
, type
, align_words
, rvec
);
12786 /* Describe where this argument goes in the fprs. */
12787 for (i
= 0; i
< n_elts
12788 && cum
->fregno
+ i
* n_fpreg
<= FP_ARG_MAX_REG
; i
++)
12790 /* Check if the argument is split over registers and memory.
12791 This can only ever happen for long double or _Decimal128;
12792 complex types are handled via split_complex_arg. */
12793 machine_mode fmode
= elt_mode
;
12794 if (cum
->fregno
+ (i
+ 1) * n_fpreg
> FP_ARG_MAX_REG
+ 1)
12796 gcc_assert (FLOAT128_2REG_P (fmode
));
12797 fmode
= DECIMAL_FLOAT_MODE_P (fmode
) ? DDmode
: DFmode
;
12800 r
= gen_rtx_REG (fmode
, cum
->fregno
+ i
* n_fpreg
);
12801 off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
12802 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12805 /* If there were not enough FPRs to hold the argument, the rest
12806 usually goes into memory. However, if the current position
12807 is still within the register parameter area, a portion may
12808 actually have to go into GPRs.
12810 Note that it may happen that the portion of the argument
12811 passed in the first "half" of the first GPR was already
12812 passed in the last FPR as well.
12814 For unnamed arguments, we already set up GPRs to cover the
12815 whole argument in rs6000_psave_function_arg, so there is
12816 nothing further to do at this point. */
12817 fpr_words
= (i
* GET_MODE_SIZE (elt_mode
)) / (TARGET_32BIT
? 4 : 8);
12818 if (i
< n_elts
&& align_words
+ fpr_words
< GP_ARG_NUM_REG
12819 && cum
->nargs_prototype
> 0)
12821 static bool warned
;
12823 machine_mode rmode
= TARGET_32BIT
? SImode
: DImode
;
12824 int n_words
= rs6000_arg_size (mode
, type
);
12826 align_words
+= fpr_words
;
12827 n_words
-= fpr_words
;
12831 r
= gen_rtx_REG (rmode
, GP_ARG_MIN_REG
+ align_words
);
12832 off
= GEN_INT (fpr_words
++ * GET_MODE_SIZE (rmode
));
12833 rvec
[k
++] = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
12835 while (++align_words
< GP_ARG_NUM_REG
&& --n_words
!= 0);
12837 if (!warned
&& warn_psabi
)
12840 inform (input_location
,
12841 "the ABI of passing homogeneous float aggregates"
12842 " has changed in GCC 5");
12846 return rs6000_finish_function_arg (mode
, rvec
, k
);
12848 else if (align_words
< GP_ARG_NUM_REG
)
12850 if (TARGET_32BIT
&& TARGET_POWERPC64
)
12851 return rs6000_mixed_function_arg (mode
, type
, align_words
);
12853 return gen_rtx_REG (mode
, GP_ARG_MIN_REG
+ align_words
);
12860 /* For an arg passed partly in registers and partly in memory, this is
12861 the number of bytes passed in registers. For args passed entirely in
12862 registers or entirely in memory, zero. When an arg is described by a
12863 PARALLEL, perhaps using more than one register type, this function
12864 returns the number of bytes used by the first element of the PARALLEL. */
12867 rs6000_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
12868 tree type
, bool named
)
12870 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12871 bool passed_in_gprs
= true;
12874 machine_mode elt_mode
;
12877 rs6000_discover_homogeneous_aggregate (mode
, type
, &elt_mode
, &n_elts
);
12879 if (DEFAULT_ABI
== ABI_V4
)
12882 if (USE_ALTIVEC_FOR_ARG_P (cum
, elt_mode
, named
))
12884 /* If we are passing this arg in the fixed parameter save area (gprs or
12885 memory) as well as VRs, we do not use the partial bytes mechanism;
12886 instead, rs6000_function_arg will return a PARALLEL including a memory
12887 element as necessary. Library support functions for IEEE 128-bit are
12888 assumed to not need the value passed both in GPRs and in vector
12890 if (TARGET_64BIT
&& !cum
->prototype
12891 && (!cum
->libcall
|| !FLOAT128_VECTOR_P (elt_mode
)))
12894 /* Otherwise, we pass in VRs only. Check for partial copies. */
12895 passed_in_gprs
= false;
12896 if (cum
->vregno
+ n_elts
> ALTIVEC_ARG_MAX_REG
+ 1)
12897 ret
= (ALTIVEC_ARG_MAX_REG
+ 1 - cum
->vregno
) * 16;
12900 /* In this complicated case we just disable the partial_nregs code. */
12901 if (TARGET_MACHO
&& rs6000_darwin64_struct_check_p (mode
, type
))
12904 align_words
= rs6000_parm_start (mode
, type
, cum
->words
);
12906 if (USE_FP_FOR_ARG_P (cum
, elt_mode
))
12908 unsigned long n_fpreg
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
12910 /* If we are passing this arg in the fixed parameter save area
12911 (gprs or memory) as well as FPRs, we do not use the partial
12912 bytes mechanism; instead, rs6000_function_arg will return a
12913 PARALLEL including a memory element as necessary. */
12915 && (cum
->nargs_prototype
<= 0
12916 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
12917 && TARGET_XL_COMPAT
12918 && align_words
>= GP_ARG_NUM_REG
)))
12921 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12922 passed_in_gprs
= false;
12923 if (cum
->fregno
+ n_elts
* n_fpreg
> FP_ARG_MAX_REG
+ 1)
12925 /* Compute number of bytes / words passed in FPRs. If there
12926 is still space available in the register parameter area
12927 *after* that amount, a part of the argument will be passed
12928 in GPRs. In that case, the total amount passed in any
12929 registers is equal to the amount that would have been passed
12930 in GPRs if everything were passed there, so we fall back to
12931 the GPR code below to compute the appropriate value. */
12932 int fpr
= ((FP_ARG_MAX_REG
+ 1 - cum
->fregno
)
12933 * MIN (8, GET_MODE_SIZE (elt_mode
)));
12934 int fpr_words
= fpr
/ (TARGET_32BIT
? 4 : 8);
12936 if (align_words
+ fpr_words
< GP_ARG_NUM_REG
)
12937 passed_in_gprs
= true;
12944 && align_words
< GP_ARG_NUM_REG
12945 && GP_ARG_NUM_REG
< align_words
+ rs6000_arg_size (mode
, type
))
12946 ret
= (GP_ARG_NUM_REG
- align_words
) * (TARGET_32BIT
? 4 : 8);
12948 if (ret
!= 0 && TARGET_DEBUG_ARG
)
12949 fprintf (stderr
, "rs6000_arg_partial_bytes: %d\n", ret
);
12954 /* A C expression that indicates when an argument must be passed by
12955 reference. If nonzero for an argument, a copy of that argument is
12956 made in memory and a pointer to the argument is passed instead of
12957 the argument itself. The pointer is passed in whatever way is
12958 appropriate for passing a pointer to that type.
12960 Under V.4, aggregates and long double are passed by reference.
12962 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12963 reference unless the AltiVec vector extension ABI is in force.
12965 As an extension to all ABIs, variable sized types are passed by
12969 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
12970 machine_mode mode
, const_tree type
,
12971 bool named ATTRIBUTE_UNUSED
)
12976 if (DEFAULT_ABI
== ABI_V4
&& TARGET_IEEEQUAD
12977 && FLOAT128_IEEE_P (TYPE_MODE (type
)))
12979 if (TARGET_DEBUG_ARG
)
12980 fprintf (stderr
, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12984 if (DEFAULT_ABI
== ABI_V4
&& AGGREGATE_TYPE_P (type
))
12986 if (TARGET_DEBUG_ARG
)
12987 fprintf (stderr
, "function_arg_pass_by_reference: V4 aggregate\n");
12991 if (int_size_in_bytes (type
) < 0)
12993 if (TARGET_DEBUG_ARG
)
12994 fprintf (stderr
, "function_arg_pass_by_reference: variable size\n");
12998 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12999 modes only exist for GCC vector types if -maltivec. */
13000 if (TARGET_32BIT
&& !TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13002 if (TARGET_DEBUG_ARG
)
13003 fprintf (stderr
, "function_arg_pass_by_reference: AltiVec\n");
13007 /* Pass synthetic vectors in memory. */
13008 if (TREE_CODE (type
) == VECTOR_TYPE
13009 && int_size_in_bytes (type
) > (TARGET_ALTIVEC_ABI
? 16 : 8))
13011 static bool warned_for_pass_big_vectors
= false;
13012 if (TARGET_DEBUG_ARG
)
13013 fprintf (stderr
, "function_arg_pass_by_reference: synthetic vector\n");
13014 if (!warned_for_pass_big_vectors
)
13016 warning (OPT_Wpsabi
, "GCC vector passed by reference: "
13017 "non-standard ABI extension with no compatibility guarantee");
13018 warned_for_pass_big_vectors
= true;
13026 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13027 already processes. Return true if the parameter must be passed
13028 (fully or partially) on the stack. */
13031 rs6000_parm_needs_stack (cumulative_args_t args_so_far
, tree type
)
13037 /* Catch errors. */
13038 if (type
== NULL
|| type
== error_mark_node
)
13041 /* Handle types with no storage requirement. */
13042 if (TYPE_MODE (type
) == VOIDmode
)
13045 /* Handle complex types. */
13046 if (TREE_CODE (type
) == COMPLEX_TYPE
)
13047 return (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
))
13048 || rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (type
)));
13050 /* Handle transparent aggregates. */
13051 if ((TREE_CODE (type
) == UNION_TYPE
|| TREE_CODE (type
) == RECORD_TYPE
)
13052 && TYPE_TRANSPARENT_AGGR (type
))
13053 type
= TREE_TYPE (first_field (type
));
13055 /* See if this arg was passed by invisible reference. */
13056 if (pass_by_reference (get_cumulative_args (args_so_far
),
13057 TYPE_MODE (type
), type
, true))
13058 type
= build_pointer_type (type
);
13060 /* Find mode as it is passed by the ABI. */
13061 unsignedp
= TYPE_UNSIGNED (type
);
13062 mode
= promote_mode (type
, TYPE_MODE (type
), &unsignedp
);
13064 /* If we must pass in stack, we need a stack. */
13065 if (rs6000_must_pass_in_stack (mode
, type
))
13068 /* If there is no incoming register, we need a stack. */
13069 entry_parm
= rs6000_function_arg (args_so_far
, mode
, type
, true);
13070 if (entry_parm
== NULL
)
13073 /* Likewise if we need to pass both in registers and on the stack. */
13074 if (GET_CODE (entry_parm
) == PARALLEL
13075 && XEXP (XVECEXP (entry_parm
, 0, 0), 0) == NULL_RTX
)
13078 /* Also true if we're partially in registers and partially not. */
13079 if (rs6000_arg_partial_bytes (args_so_far
, mode
, type
, true) != 0)
13082 /* Update info on where next arg arrives in registers. */
13083 rs6000_function_arg_advance (args_so_far
, mode
, type
, true);
13087 /* Return true if FUN has no prototype, has a variable argument
13088 list, or passes any parameter in memory. */
13091 rs6000_function_parms_need_stack (tree fun
, bool incoming
)
13093 tree fntype
, result
;
13094 CUMULATIVE_ARGS args_so_far_v
;
13095 cumulative_args_t args_so_far
;
13098 /* Must be a libcall, all of which only use reg parms. */
13103 fntype
= TREE_TYPE (fun
);
13105 /* Varargs functions need the parameter save area. */
13106 if ((!incoming
&& !prototype_p (fntype
)) || stdarg_p (fntype
))
13109 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v
, fntype
, NULL_RTX
);
13110 args_so_far
= pack_cumulative_args (&args_so_far_v
);
13112 /* When incoming, we will have been passed the function decl.
13113 It is necessary to use the decl to handle K&R style functions,
13114 where TYPE_ARG_TYPES may not be available. */
13117 gcc_assert (DECL_P (fun
));
13118 result
= DECL_RESULT (fun
);
13121 result
= TREE_TYPE (fntype
);
13123 if (result
&& aggregate_value_p (result
, fntype
))
13125 if (!TYPE_P (result
))
13126 result
= TREE_TYPE (result
);
13127 result
= build_pointer_type (result
);
13128 rs6000_parm_needs_stack (args_so_far
, result
);
13135 for (parm
= DECL_ARGUMENTS (fun
);
13136 parm
&& parm
!= void_list_node
;
13137 parm
= TREE_CHAIN (parm
))
13138 if (rs6000_parm_needs_stack (args_so_far
, TREE_TYPE (parm
)))
13143 function_args_iterator args_iter
;
13146 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
13147 if (rs6000_parm_needs_stack (args_so_far
, arg_type
))
13154 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13155 usually a constant depending on the ABI. However, in the ELFv2 ABI
13156 the register parameter area is optional when calling a function that
13157 has a prototype is scope, has no variable argument list, and passes
13158 all parameters in registers. */
13161 rs6000_reg_parm_stack_space (tree fun
, bool incoming
)
13163 int reg_parm_stack_space
;
13165 switch (DEFAULT_ABI
)
13168 reg_parm_stack_space
= 0;
13173 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13177 /* ??? Recomputing this every time is a bit expensive. Is there
13178 a place to cache this information? */
13179 if (rs6000_function_parms_need_stack (fun
, incoming
))
13180 reg_parm_stack_space
= TARGET_64BIT
? 64 : 32;
13182 reg_parm_stack_space
= 0;
13186 return reg_parm_stack_space
;
13190 rs6000_move_block_from_reg (int regno
, rtx x
, int nregs
)
13193 machine_mode reg_mode
= TARGET_32BIT
? SImode
: DImode
;
13198 for (i
= 0; i
< nregs
; i
++)
13200 rtx tem
= adjust_address_nv (x
, reg_mode
, i
* GET_MODE_SIZE (reg_mode
));
13201 if (reload_completed
)
13203 if (! strict_memory_address_p (reg_mode
, XEXP (tem
, 0)))
13206 tem
= simplify_gen_subreg (reg_mode
, x
, BLKmode
,
13207 i
* GET_MODE_SIZE (reg_mode
));
13210 tem
= replace_equiv_address (tem
, XEXP (tem
, 0));
13214 emit_move_insn (tem
, gen_rtx_REG (reg_mode
, regno
+ i
));
13218 /* Perform any needed actions needed for a function that is receiving a
13219 variable number of arguments.
13223 MODE and TYPE are the mode and type of the current parameter.
13225 PRETEND_SIZE is a variable that should be set to the amount of stack
13226 that must be pushed by the prolog to pretend that our caller pushed
13229 Normally, this macro will push all remaining incoming registers on the
13230 stack and set PRETEND_SIZE to the length of the registers pushed. */
13233 setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
13234 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
13237 CUMULATIVE_ARGS next_cum
;
13238 int reg_size
= TARGET_32BIT
? 4 : 8;
13239 rtx save_area
= NULL_RTX
, mem
;
13240 int first_reg_offset
;
13241 alias_set_type set
;
13243 /* Skip the last named argument. */
13244 next_cum
= *get_cumulative_args (cum
);
13245 rs6000_function_arg_advance_1 (&next_cum
, mode
, type
, true, 0);
13247 if (DEFAULT_ABI
== ABI_V4
)
13249 first_reg_offset
= next_cum
.sysv_gregno
- GP_ARG_MIN_REG
;
13253 int gpr_reg_num
= 0, gpr_size
= 0, fpr_size
= 0;
13254 HOST_WIDE_INT offset
= 0;
13256 /* Try to optimize the size of the varargs save area.
13257 The ABI requires that ap.reg_save_area is doubleword
13258 aligned, but we don't need to allocate space for all
13259 the bytes, only those to which we actually will save
13261 if (cfun
->va_list_gpr_size
&& first_reg_offset
< GP_ARG_NUM_REG
)
13262 gpr_reg_num
= GP_ARG_NUM_REG
- first_reg_offset
;
13263 if (TARGET_HARD_FLOAT
13264 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13265 && cfun
->va_list_fpr_size
)
13268 fpr_size
= (next_cum
.fregno
- FP_ARG_MIN_REG
)
13269 * UNITS_PER_FP_WORD
;
13270 if (cfun
->va_list_fpr_size
13271 < FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13272 fpr_size
+= cfun
->va_list_fpr_size
* UNITS_PER_FP_WORD
;
13274 fpr_size
+= (FP_ARG_V4_MAX_REG
+ 1 - next_cum
.fregno
)
13275 * UNITS_PER_FP_WORD
;
13279 offset
= -((first_reg_offset
* reg_size
) & ~7);
13280 if (!fpr_size
&& gpr_reg_num
> cfun
->va_list_gpr_size
)
13282 gpr_reg_num
= cfun
->va_list_gpr_size
;
13283 if (reg_size
== 4 && (first_reg_offset
& 1))
13286 gpr_size
= (gpr_reg_num
* reg_size
+ 7) & ~7;
13289 offset
= - (int) (next_cum
.fregno
- FP_ARG_MIN_REG
)
13290 * UNITS_PER_FP_WORD
13291 - (int) (GP_ARG_NUM_REG
* reg_size
);
13293 if (gpr_size
+ fpr_size
)
13296 = assign_stack_local (BLKmode
, gpr_size
+ fpr_size
, 64);
13297 gcc_assert (GET_CODE (reg_save_area
) == MEM
);
13298 reg_save_area
= XEXP (reg_save_area
, 0);
13299 if (GET_CODE (reg_save_area
) == PLUS
)
13301 gcc_assert (XEXP (reg_save_area
, 0)
13302 == virtual_stack_vars_rtx
);
13303 gcc_assert (GET_CODE (XEXP (reg_save_area
, 1)) == CONST_INT
);
13304 offset
+= INTVAL (XEXP (reg_save_area
, 1));
13307 gcc_assert (reg_save_area
== virtual_stack_vars_rtx
);
13310 cfun
->machine
->varargs_save_offset
= offset
;
13311 save_area
= plus_constant (Pmode
, virtual_stack_vars_rtx
, offset
);
13316 first_reg_offset
= next_cum
.words
;
13317 save_area
= crtl
->args
.internal_arg_pointer
;
13319 if (targetm
.calls
.must_pass_in_stack (mode
, type
))
13320 first_reg_offset
+= rs6000_arg_size (TYPE_MODE (type
), type
);
13323 set
= get_varargs_alias_set ();
13324 if (! no_rtl
&& first_reg_offset
< GP_ARG_NUM_REG
13325 && cfun
->va_list_gpr_size
)
13327 int n_gpr
, nregs
= GP_ARG_NUM_REG
- first_reg_offset
;
13329 if (va_list_gpr_counter_field
)
13330 /* V4 va_list_gpr_size counts number of registers needed. */
13331 n_gpr
= cfun
->va_list_gpr_size
;
13333 /* char * va_list instead counts number of bytes needed. */
13334 n_gpr
= (cfun
->va_list_gpr_size
+ reg_size
- 1) / reg_size
;
13339 mem
= gen_rtx_MEM (BLKmode
,
13340 plus_constant (Pmode
, save_area
,
13341 first_reg_offset
* reg_size
));
13342 MEM_NOTRAP_P (mem
) = 1;
13343 set_mem_alias_set (mem
, set
);
13344 set_mem_align (mem
, BITS_PER_WORD
);
13346 rs6000_move_block_from_reg (GP_ARG_MIN_REG
+ first_reg_offset
, mem
,
13350 /* Save FP registers if needed. */
13351 if (DEFAULT_ABI
== ABI_V4
13352 && TARGET_HARD_FLOAT
13354 && next_cum
.fregno
<= FP_ARG_V4_MAX_REG
13355 && cfun
->va_list_fpr_size
)
13357 int fregno
= next_cum
.fregno
, nregs
;
13358 rtx cr1
= gen_rtx_REG (CCmode
, CR1_REGNO
);
13359 rtx lab
= gen_label_rtx ();
13360 int off
= (GP_ARG_NUM_REG
* reg_size
) + ((fregno
- FP_ARG_MIN_REG
)
13361 * UNITS_PER_FP_WORD
);
13364 (gen_rtx_SET (pc_rtx
,
13365 gen_rtx_IF_THEN_ELSE (VOIDmode
,
13366 gen_rtx_NE (VOIDmode
, cr1
,
13368 gen_rtx_LABEL_REF (VOIDmode
, lab
),
13372 fregno
<= FP_ARG_V4_MAX_REG
&& nregs
< cfun
->va_list_fpr_size
;
13373 fregno
++, off
+= UNITS_PER_FP_WORD
, nregs
++)
13375 mem
= gen_rtx_MEM ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13377 plus_constant (Pmode
, save_area
, off
));
13378 MEM_NOTRAP_P (mem
) = 1;
13379 set_mem_alias_set (mem
, set
);
13380 set_mem_align (mem
, GET_MODE_ALIGNMENT (
13381 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13382 ? DFmode
: SFmode
));
13383 emit_move_insn (mem
, gen_rtx_REG (
13384 (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
13385 ? DFmode
: SFmode
, fregno
));
13392 /* Create the va_list data type. */
13395 rs6000_build_builtin_va_list (void)
13397 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
, record
, type_decl
;
13399 /* For AIX, prefer 'char *' because that's what the system
13400 header files like. */
13401 if (DEFAULT_ABI
!= ABI_V4
)
13402 return build_pointer_type (char_type_node
);
13404 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
13405 type_decl
= build_decl (BUILTINS_LOCATION
, TYPE_DECL
,
13406 get_identifier ("__va_list_tag"), record
);
13408 f_gpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("gpr"),
13409 unsigned_char_type_node
);
13410 f_fpr
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
, get_identifier ("fpr"),
13411 unsigned_char_type_node
);
13412 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13413 every user file. */
13414 f_res
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13415 get_identifier ("reserved"), short_unsigned_type_node
);
13416 f_ovf
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13417 get_identifier ("overflow_arg_area"),
13419 f_sav
= build_decl (BUILTINS_LOCATION
, FIELD_DECL
,
13420 get_identifier ("reg_save_area"),
13423 va_list_gpr_counter_field
= f_gpr
;
13424 va_list_fpr_counter_field
= f_fpr
;
13426 DECL_FIELD_CONTEXT (f_gpr
) = record
;
13427 DECL_FIELD_CONTEXT (f_fpr
) = record
;
13428 DECL_FIELD_CONTEXT (f_res
) = record
;
13429 DECL_FIELD_CONTEXT (f_ovf
) = record
;
13430 DECL_FIELD_CONTEXT (f_sav
) = record
;
13432 TYPE_STUB_DECL (record
) = type_decl
;
13433 TYPE_NAME (record
) = type_decl
;
13434 TYPE_FIELDS (record
) = f_gpr
;
13435 DECL_CHAIN (f_gpr
) = f_fpr
;
13436 DECL_CHAIN (f_fpr
) = f_res
;
13437 DECL_CHAIN (f_res
) = f_ovf
;
13438 DECL_CHAIN (f_ovf
) = f_sav
;
13440 layout_type (record
);
13442 /* The correct type is an array type of one element. */
13443 return build_array_type (record
, build_index_type (size_zero_node
));
13446 /* Implement va_start. */
13449 rs6000_va_start (tree valist
, rtx nextarg
)
13451 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
13452 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13453 tree gpr
, fpr
, ovf
, sav
, t
;
13455 /* Only SVR4 needs something special. */
13456 if (DEFAULT_ABI
!= ABI_V4
)
13458 std_expand_builtin_va_start (valist
, nextarg
);
13462 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13463 f_fpr
= DECL_CHAIN (f_gpr
);
13464 f_res
= DECL_CHAIN (f_fpr
);
13465 f_ovf
= DECL_CHAIN (f_res
);
13466 f_sav
= DECL_CHAIN (f_ovf
);
13468 valist
= build_simple_mem_ref (valist
);
13469 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13470 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13472 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13474 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13477 /* Count number of gp and fp argument registers used. */
13478 words
= crtl
->args
.info
.words
;
13479 n_gpr
= MIN (crtl
->args
.info
.sysv_gregno
- GP_ARG_MIN_REG
,
13481 n_fpr
= MIN (crtl
->args
.info
.fregno
- FP_ARG_MIN_REG
,
13484 if (TARGET_DEBUG_ARG
)
13485 fprintf (stderr
, "va_start: words = " HOST_WIDE_INT_PRINT_DEC
", n_gpr = "
13486 HOST_WIDE_INT_PRINT_DEC
", n_fpr = " HOST_WIDE_INT_PRINT_DEC
"\n",
13487 words
, n_gpr
, n_fpr
);
13489 if (cfun
->va_list_gpr_size
)
13491 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
13492 build_int_cst (NULL_TREE
, n_gpr
));
13493 TREE_SIDE_EFFECTS (t
) = 1;
13494 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13497 if (cfun
->va_list_fpr_size
)
13499 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
13500 build_int_cst (NULL_TREE
, n_fpr
));
13501 TREE_SIDE_EFFECTS (t
) = 1;
13502 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13504 #ifdef HAVE_AS_GNU_ATTRIBUTE
13505 if (call_ABI_of_interest (cfun
->decl
))
13506 rs6000_passes_float
= true;
13510 /* Find the overflow area. */
13511 t
= make_tree (TREE_TYPE (ovf
), crtl
->args
.internal_arg_pointer
);
13513 t
= fold_build_pointer_plus_hwi (t
, words
* MIN_UNITS_PER_WORD
);
13514 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
13515 TREE_SIDE_EFFECTS (t
) = 1;
13516 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13518 /* If there were no va_arg invocations, don't set up the register
13520 if (!cfun
->va_list_gpr_size
13521 && !cfun
->va_list_fpr_size
13522 && n_gpr
< GP_ARG_NUM_REG
13523 && n_fpr
< FP_ARG_V4_MAX_REG
)
13526 /* Find the register save area. */
13527 t
= make_tree (TREE_TYPE (sav
), virtual_stack_vars_rtx
);
13528 if (cfun
->machine
->varargs_save_offset
)
13529 t
= fold_build_pointer_plus_hwi (t
, cfun
->machine
->varargs_save_offset
);
13530 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
13531 TREE_SIDE_EFFECTS (t
) = 1;
13532 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
13535 /* Implement va_arg. */
13538 rs6000_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
13539 gimple_seq
*post_p
)
13541 tree f_gpr
, f_fpr
, f_res
, f_ovf
, f_sav
;
13542 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
13543 int size
, rsize
, n_reg
, sav_ofs
, sav_scale
;
13544 tree lab_false
, lab_over
, addr
;
13546 tree ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13550 if (pass_by_reference (NULL
, TYPE_MODE (type
), type
, false))
13552 t
= rs6000_gimplify_va_arg (valist
, ptrtype
, pre_p
, post_p
);
13553 return build_va_arg_indirect_ref (t
);
13556 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13557 earlier version of gcc, with the property that it always applied alignment
13558 adjustments to the va-args (even for zero-sized types). The cheapest way
13559 to deal with this is to replicate the effect of the part of
13560 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13562 We don't need to check for pass-by-reference because of the test above.
13563 We can return a simplifed answer, since we know there's no offset to add. */
13566 && rs6000_darwin64_abi
)
13567 || DEFAULT_ABI
== ABI_ELFv2
13568 || (DEFAULT_ABI
== ABI_AIX
&& !rs6000_compat_align_parm
))
13569 && integer_zerop (TYPE_SIZE (type
)))
13571 unsigned HOST_WIDE_INT align
, boundary
;
13572 tree valist_tmp
= get_initialized_tmp_var (valist
, pre_p
, NULL
);
13573 align
= PARM_BOUNDARY
/ BITS_PER_UNIT
;
13574 boundary
= rs6000_function_arg_boundary (TYPE_MODE (type
), type
);
13575 if (boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
13576 boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
13577 boundary
/= BITS_PER_UNIT
;
13578 if (boundary
> align
)
13581 /* This updates arg ptr by the amount that would be necessary
13582 to align the zero-sized (but not zero-alignment) item. */
13583 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13584 fold_build_pointer_plus_hwi (valist_tmp
, boundary
- 1));
13585 gimplify_and_add (t
, pre_p
);
13587 t
= fold_convert (sizetype
, valist_tmp
);
13588 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist_tmp
,
13589 fold_convert (TREE_TYPE (valist
),
13590 fold_build2 (BIT_AND_EXPR
, sizetype
, t
,
13591 size_int (-boundary
))));
13592 t
= build2 (MODIFY_EXPR
, TREE_TYPE (valist
), valist
, t
);
13593 gimplify_and_add (t
, pre_p
);
13595 /* Since it is zero-sized there's no increment for the item itself. */
13596 valist_tmp
= fold_convert (build_pointer_type (type
), valist_tmp
);
13597 return build_va_arg_indirect_ref (valist_tmp
);
13600 if (DEFAULT_ABI
!= ABI_V4
)
13602 if (targetm
.calls
.split_complex_arg
&& TREE_CODE (type
) == COMPLEX_TYPE
)
13604 tree elem_type
= TREE_TYPE (type
);
13605 machine_mode elem_mode
= TYPE_MODE (elem_type
);
13606 int elem_size
= GET_MODE_SIZE (elem_mode
);
13608 if (elem_size
< UNITS_PER_WORD
)
13610 tree real_part
, imag_part
;
13611 gimple_seq post
= NULL
;
13613 real_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13615 /* Copy the value into a temporary, lest the formal temporary
13616 be reused out from under us. */
13617 real_part
= get_initialized_tmp_var (real_part
, pre_p
, &post
);
13618 gimple_seq_add_seq (pre_p
, post
);
13620 imag_part
= rs6000_gimplify_va_arg (valist
, elem_type
, pre_p
,
13623 return build2 (COMPLEX_EXPR
, type
, real_part
, imag_part
);
13627 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
13630 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
13631 f_fpr
= DECL_CHAIN (f_gpr
);
13632 f_res
= DECL_CHAIN (f_fpr
);
13633 f_ovf
= DECL_CHAIN (f_res
);
13634 f_sav
= DECL_CHAIN (f_ovf
);
13636 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
13637 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
13639 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
13641 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
13644 size
= int_size_in_bytes (type
);
13645 rsize
= (size
+ 3) / 4;
13646 int pad
= 4 * rsize
- size
;
13649 machine_mode mode
= TYPE_MODE (type
);
13650 if (abi_v4_pass_in_fpr (mode
))
13652 /* FP args go in FP registers, if present. */
13654 n_reg
= (size
+ 7) / 8;
13655 sav_ofs
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4) * 4;
13656 sav_scale
= ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? 8 : 4);
13657 if (mode
!= SFmode
&& mode
!= SDmode
)
13662 /* Otherwise into GP registers. */
13671 /* Pull the value out of the saved registers.... */
13674 addr
= create_tmp_var (ptr_type_node
, "addr");
13676 /* AltiVec vectors never go in registers when -mabi=altivec. */
13677 if (TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
13681 lab_false
= create_artificial_label (input_location
);
13682 lab_over
= create_artificial_label (input_location
);
13684 /* Long long is aligned in the registers. As are any other 2 gpr
13685 item such as complex int due to a historical mistake. */
13687 if (n_reg
== 2 && reg
== gpr
)
13690 u
= build2 (BIT_AND_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13691 build_int_cst (TREE_TYPE (reg
), n_reg
- 1));
13692 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
),
13693 unshare_expr (reg
), u
);
13695 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13696 reg number is 0 for f1, so we want to make it odd. */
13697 else if (reg
== fpr
&& mode
== TDmode
)
13699 t
= build2 (BIT_IOR_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13700 build_int_cst (TREE_TYPE (reg
), 1));
13701 u
= build2 (MODIFY_EXPR
, void_type_node
, unshare_expr (reg
), t
);
13704 t
= fold_convert (TREE_TYPE (reg
), size_int (8 - n_reg
+ 1));
13705 t
= build2 (GE_EXPR
, boolean_type_node
, u
, t
);
13706 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
13707 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
13708 gimplify_and_add (t
, pre_p
);
13712 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
13714 u
= build2 (POSTINCREMENT_EXPR
, TREE_TYPE (reg
), unshare_expr (reg
),
13715 build_int_cst (TREE_TYPE (reg
), n_reg
));
13716 u
= fold_convert (sizetype
, u
);
13717 u
= build2 (MULT_EXPR
, sizetype
, u
, size_int (sav_scale
));
13718 t
= fold_build_pointer_plus (t
, u
);
13720 /* _Decimal32 varargs are located in the second word of the 64-bit
13721 FP register for 32-bit binaries. */
13722 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& mode
== SDmode
)
13723 t
= fold_build_pointer_plus_hwi (t
, size
);
13725 /* Args are passed right-aligned. */
13726 if (BYTES_BIG_ENDIAN
)
13727 t
= fold_build_pointer_plus_hwi (t
, pad
);
13729 gimplify_assign (addr
, t
, pre_p
);
13731 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
13733 stmt
= gimple_build_label (lab_false
);
13734 gimple_seq_add_stmt (pre_p
, stmt
);
13736 if ((n_reg
== 2 && !regalign
) || n_reg
> 2)
13738 /* Ensure that we don't find any more args in regs.
13739 Alignment has taken care of for special cases. */
13740 gimplify_assign (reg
, build_int_cst (TREE_TYPE (reg
), 8), pre_p
);
13744 /* ... otherwise out of the overflow area. */
13746 /* Care for on-stack alignment if needed. */
13750 t
= fold_build_pointer_plus_hwi (t
, align
- 1);
13751 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
13752 build_int_cst (TREE_TYPE (t
), -align
));
13755 /* Args are passed right-aligned. */
13756 if (BYTES_BIG_ENDIAN
)
13757 t
= fold_build_pointer_plus_hwi (t
, pad
);
13759 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
13761 gimplify_assign (unshare_expr (addr
), t
, pre_p
);
13763 t
= fold_build_pointer_plus_hwi (t
, size
);
13764 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
13768 stmt
= gimple_build_label (lab_over
);
13769 gimple_seq_add_stmt (pre_p
, stmt
);
13772 if (STRICT_ALIGNMENT
13773 && (TYPE_ALIGN (type
)
13774 > (unsigned) BITS_PER_UNIT
* (align
< 4 ? 4 : align
)))
13776 /* The value (of type complex double, for example) may not be
13777 aligned in memory in the saved registers, so copy via a
13778 temporary. (This is the same code as used for SPARC.) */
13779 tree tmp
= create_tmp_var (type
, "va_arg_tmp");
13780 tree dest_addr
= build_fold_addr_expr (tmp
);
13782 tree copy
= build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
13783 3, dest_addr
, addr
, size_int (rsize
* 4));
13785 gimplify_and_add (copy
, pre_p
);
13789 addr
= fold_convert (ptrtype
, addr
);
13790 return build_va_arg_indirect_ref (addr
);
13796 def_builtin (const char *name
, tree type
, enum rs6000_builtins code
)
13799 unsigned classify
= rs6000_builtin_info
[(int)code
].attr
;
13800 const char *attr_string
= "";
13802 gcc_assert (name
!= NULL
);
13803 gcc_assert (IN_RANGE ((int)code
, 0, (int)RS6000_BUILTIN_COUNT
));
13805 if (rs6000_builtin_decls
[(int)code
])
13806 fatal_error (input_location
,
13807 "internal error: builtin function %s already processed", name
);
13809 rs6000_builtin_decls
[(int)code
] = t
=
13810 add_builtin_function (name
, type
, (int)code
, BUILT_IN_MD
, NULL
, NULL_TREE
);
13812 /* Set any special attributes. */
13813 if ((classify
& RS6000_BTC_CONST
) != 0)
13815 /* const function, function only depends on the inputs. */
13816 TREE_READONLY (t
) = 1;
13817 TREE_NOTHROW (t
) = 1;
13818 attr_string
= ", const";
13820 else if ((classify
& RS6000_BTC_PURE
) != 0)
13822 /* pure function, function can read global memory, but does not set any
13824 DECL_PURE_P (t
) = 1;
13825 TREE_NOTHROW (t
) = 1;
13826 attr_string
= ", pure";
13828 else if ((classify
& RS6000_BTC_FP
) != 0)
13830 /* Function is a math function. If rounding mode is on, then treat the
13831 function as not reading global memory, but it can have arbitrary side
13832 effects. If it is off, then assume the function is a const function.
13833 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13834 builtin-attribute.def that is used for the math functions. */
13835 TREE_NOTHROW (t
) = 1;
13836 if (flag_rounding_math
)
13838 DECL_PURE_P (t
) = 1;
13839 DECL_IS_NOVOPS (t
) = 1;
13840 attr_string
= ", fp, pure";
13844 TREE_READONLY (t
) = 1;
13845 attr_string
= ", fp, const";
13848 else if ((classify
& RS6000_BTC_ATTR_MASK
) != 0)
13849 gcc_unreachable ();
13851 if (TARGET_DEBUG_BUILTIN
)
13852 fprintf (stderr
, "rs6000_builtin, code = %4d, %s%s\n",
13853 (int)code
, name
, attr_string
);
13856 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13858 #undef RS6000_BUILTIN_0
13859 #undef RS6000_BUILTIN_1
13860 #undef RS6000_BUILTIN_2
13861 #undef RS6000_BUILTIN_3
13862 #undef RS6000_BUILTIN_A
13863 #undef RS6000_BUILTIN_D
13864 #undef RS6000_BUILTIN_H
13865 #undef RS6000_BUILTIN_P
13866 #undef RS6000_BUILTIN_Q
13867 #undef RS6000_BUILTIN_X
13869 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13870 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13871 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13872 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13873 { MASK, ICODE, NAME, ENUM },
13875 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13876 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13877 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13878 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13879 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13880 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13882 static const struct builtin_description bdesc_3arg
[] =
13884 #include "rs6000-builtin.def"
13887 /* DST operations: void foo (void *, const int, const char). */
13889 #undef RS6000_BUILTIN_0
13890 #undef RS6000_BUILTIN_1
13891 #undef RS6000_BUILTIN_2
13892 #undef RS6000_BUILTIN_3
13893 #undef RS6000_BUILTIN_A
13894 #undef RS6000_BUILTIN_D
13895 #undef RS6000_BUILTIN_H
13896 #undef RS6000_BUILTIN_P
13897 #undef RS6000_BUILTIN_Q
13898 #undef RS6000_BUILTIN_X
13900 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13901 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13902 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13903 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13904 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13905 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13906 { MASK, ICODE, NAME, ENUM },
13908 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13909 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13910 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13913 static const struct builtin_description bdesc_dst
[] =
13915 #include "rs6000-builtin.def"
13918 /* Simple binary operations: VECc = foo (VECa, VECb). */
13920 #undef RS6000_BUILTIN_0
13921 #undef RS6000_BUILTIN_1
13922 #undef RS6000_BUILTIN_2
13923 #undef RS6000_BUILTIN_3
13924 #undef RS6000_BUILTIN_A
13925 #undef RS6000_BUILTIN_D
13926 #undef RS6000_BUILTIN_H
13927 #undef RS6000_BUILTIN_P
13928 #undef RS6000_BUILTIN_Q
13929 #undef RS6000_BUILTIN_X
13931 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13932 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13933 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13934 { MASK, ICODE, NAME, ENUM },
13936 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13937 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13938 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13939 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13940 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13941 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13942 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13944 static const struct builtin_description bdesc_2arg
[] =
13946 #include "rs6000-builtin.def"
13949 #undef RS6000_BUILTIN_0
13950 #undef RS6000_BUILTIN_1
13951 #undef RS6000_BUILTIN_2
13952 #undef RS6000_BUILTIN_3
13953 #undef RS6000_BUILTIN_A
13954 #undef RS6000_BUILTIN_D
13955 #undef RS6000_BUILTIN_H
13956 #undef RS6000_BUILTIN_P
13957 #undef RS6000_BUILTIN_Q
13958 #undef RS6000_BUILTIN_X
13960 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13961 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13962 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13963 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13964 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13965 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13966 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13967 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13968 { MASK, ICODE, NAME, ENUM },
13970 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13971 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13973 /* AltiVec predicates. */
13975 static const struct builtin_description bdesc_altivec_preds
[] =
13977 #include "rs6000-builtin.def"
13980 /* PAIRED predicates. */
13981 #undef RS6000_BUILTIN_0
13982 #undef RS6000_BUILTIN_1
13983 #undef RS6000_BUILTIN_2
13984 #undef RS6000_BUILTIN_3
13985 #undef RS6000_BUILTIN_A
13986 #undef RS6000_BUILTIN_D
13987 #undef RS6000_BUILTIN_H
13988 #undef RS6000_BUILTIN_P
13989 #undef RS6000_BUILTIN_Q
13990 #undef RS6000_BUILTIN_X
13992 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13993 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13994 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13995 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13996 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13997 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13998 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13999 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14000 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14001 { MASK, ICODE, NAME, ENUM },
14003 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14005 static const struct builtin_description bdesc_paired_preds
[] =
14007 #include "rs6000-builtin.def"
14010 /* ABS* operations. */
14012 #undef RS6000_BUILTIN_0
14013 #undef RS6000_BUILTIN_1
14014 #undef RS6000_BUILTIN_2
14015 #undef RS6000_BUILTIN_3
14016 #undef RS6000_BUILTIN_A
14017 #undef RS6000_BUILTIN_D
14018 #undef RS6000_BUILTIN_H
14019 #undef RS6000_BUILTIN_P
14020 #undef RS6000_BUILTIN_Q
14021 #undef RS6000_BUILTIN_X
14023 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14024 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14025 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14026 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14027 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14028 { MASK, ICODE, NAME, ENUM },
14030 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14031 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14032 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14033 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14034 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14036 static const struct builtin_description bdesc_abs
[] =
14038 #include "rs6000-builtin.def"
14041 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14044 #undef RS6000_BUILTIN_0
14045 #undef RS6000_BUILTIN_1
14046 #undef RS6000_BUILTIN_2
14047 #undef RS6000_BUILTIN_3
14048 #undef RS6000_BUILTIN_A
14049 #undef RS6000_BUILTIN_D
14050 #undef RS6000_BUILTIN_H
14051 #undef RS6000_BUILTIN_P
14052 #undef RS6000_BUILTIN_Q
14053 #undef RS6000_BUILTIN_X
14055 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14056 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14057 { MASK, ICODE, NAME, ENUM },
14059 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14060 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14061 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14062 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14063 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14064 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14065 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14066 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14068 static const struct builtin_description bdesc_1arg
[] =
14070 #include "rs6000-builtin.def"
14073 /* Simple no-argument operations: result = __builtin_darn_32 () */
14075 #undef RS6000_BUILTIN_0
14076 #undef RS6000_BUILTIN_1
14077 #undef RS6000_BUILTIN_2
14078 #undef RS6000_BUILTIN_3
14079 #undef RS6000_BUILTIN_A
14080 #undef RS6000_BUILTIN_D
14081 #undef RS6000_BUILTIN_H
14082 #undef RS6000_BUILTIN_P
14083 #undef RS6000_BUILTIN_Q
14084 #undef RS6000_BUILTIN_X
14086 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14087 { MASK, ICODE, NAME, ENUM },
14089 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14090 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14091 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14092 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14093 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14094 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14095 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14096 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14097 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14099 static const struct builtin_description bdesc_0arg
[] =
14101 #include "rs6000-builtin.def"
14104 /* HTM builtins. */
14105 #undef RS6000_BUILTIN_0
14106 #undef RS6000_BUILTIN_1
14107 #undef RS6000_BUILTIN_2
14108 #undef RS6000_BUILTIN_3
14109 #undef RS6000_BUILTIN_A
14110 #undef RS6000_BUILTIN_D
14111 #undef RS6000_BUILTIN_H
14112 #undef RS6000_BUILTIN_P
14113 #undef RS6000_BUILTIN_Q
14114 #undef RS6000_BUILTIN_X
14116 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14117 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14118 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14119 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14120 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14121 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14122 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14123 { MASK, ICODE, NAME, ENUM },
14125 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14126 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14127 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14129 static const struct builtin_description bdesc_htm
[] =
14131 #include "rs6000-builtin.def"
14134 #undef RS6000_BUILTIN_0
14135 #undef RS6000_BUILTIN_1
14136 #undef RS6000_BUILTIN_2
14137 #undef RS6000_BUILTIN_3
14138 #undef RS6000_BUILTIN_A
14139 #undef RS6000_BUILTIN_D
14140 #undef RS6000_BUILTIN_H
14141 #undef RS6000_BUILTIN_P
14142 #undef RS6000_BUILTIN_Q
14144 /* Return true if a builtin function is overloaded. */
14146 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode
)
14148 return (rs6000_builtin_info
[(int)fncode
].attr
& RS6000_BTC_OVERLOADED
) != 0;
14152 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode
)
14154 return rs6000_builtin_info
[(int)fncode
].name
;
14157 /* Expand an expression EXP that calls a builtin without arguments. */
14159 rs6000_expand_zeroop_builtin (enum insn_code icode
, rtx target
)
14162 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14164 if (icode
== CODE_FOR_nothing
)
14165 /* Builtin not supported on this processor. */
14169 || GET_MODE (target
) != tmode
14170 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14171 target
= gen_reg_rtx (tmode
);
14173 pat
= GEN_FCN (icode
) (target
);
14183 rs6000_expand_mtfsf_builtin (enum insn_code icode
, tree exp
)
14186 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14187 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14188 rtx op0
= expand_normal (arg0
);
14189 rtx op1
= expand_normal (arg1
);
14190 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14191 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14193 if (icode
== CODE_FOR_nothing
)
14194 /* Builtin not supported on this processor. */
14197 /* If we got invalid arguments bail out before generating bad rtl. */
14198 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14201 if (GET_CODE (op0
) != CONST_INT
14202 || INTVAL (op0
) > 255
14203 || INTVAL (op0
) < 0)
14205 error ("argument 1 must be an 8-bit field value");
14209 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
14210 op0
= copy_to_mode_reg (mode0
, op0
);
14212 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
14213 op1
= copy_to_mode_reg (mode1
, op1
);
14215 pat
= GEN_FCN (icode
) (op0
, op1
);
14224 rs6000_expand_unop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14227 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14228 rtx op0
= expand_normal (arg0
);
14229 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14230 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14232 if (icode
== CODE_FOR_nothing
)
14233 /* Builtin not supported on this processor. */
14236 /* If we got invalid arguments bail out before generating bad rtl. */
14237 if (arg0
== error_mark_node
)
14240 if (icode
== CODE_FOR_altivec_vspltisb
14241 || icode
== CODE_FOR_altivec_vspltish
14242 || icode
== CODE_FOR_altivec_vspltisw
)
14244 /* Only allow 5-bit *signed* literals. */
14245 if (GET_CODE (op0
) != CONST_INT
14246 || INTVAL (op0
) > 15
14247 || INTVAL (op0
) < -16)
14249 error ("argument 1 must be a 5-bit signed literal");
14250 return CONST0_RTX (tmode
);
14255 || GET_MODE (target
) != tmode
14256 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14257 target
= gen_reg_rtx (tmode
);
14259 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14260 op0
= copy_to_mode_reg (mode0
, op0
);
14262 pat
= GEN_FCN (icode
) (target
, op0
);
14271 altivec_expand_abs_builtin (enum insn_code icode
, tree exp
, rtx target
)
14273 rtx pat
, scratch1
, scratch2
;
14274 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14275 rtx op0
= expand_normal (arg0
);
14276 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14277 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14279 /* If we have invalid arguments, bail out before generating bad rtl. */
14280 if (arg0
== error_mark_node
)
14284 || GET_MODE (target
) != tmode
14285 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14286 target
= gen_reg_rtx (tmode
);
14288 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14289 op0
= copy_to_mode_reg (mode0
, op0
);
14291 scratch1
= gen_reg_rtx (mode0
);
14292 scratch2
= gen_reg_rtx (mode0
);
14294 pat
= GEN_FCN (icode
) (target
, op0
, scratch1
, scratch2
);
14303 rs6000_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
14306 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14307 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14308 rtx op0
= expand_normal (arg0
);
14309 rtx op1
= expand_normal (arg1
);
14310 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14311 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14312 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14314 if (icode
== CODE_FOR_nothing
)
14315 /* Builtin not supported on this processor. */
14318 /* If we got invalid arguments bail out before generating bad rtl. */
14319 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14322 if (icode
== CODE_FOR_altivec_vcfux
14323 || icode
== CODE_FOR_altivec_vcfsx
14324 || icode
== CODE_FOR_altivec_vctsxs
14325 || icode
== CODE_FOR_altivec_vctuxs
14326 || icode
== CODE_FOR_altivec_vspltb
14327 || icode
== CODE_FOR_altivec_vsplth
14328 || icode
== CODE_FOR_altivec_vspltw
)
14330 /* Only allow 5-bit unsigned literals. */
14332 if (TREE_CODE (arg1
) != INTEGER_CST
14333 || TREE_INT_CST_LOW (arg1
) & ~0x1f)
14335 error ("argument 2 must be a 5-bit unsigned literal");
14336 return CONST0_RTX (tmode
);
14339 else if (icode
== CODE_FOR_dfptstsfi_eq_dd
14340 || icode
== CODE_FOR_dfptstsfi_lt_dd
14341 || icode
== CODE_FOR_dfptstsfi_gt_dd
14342 || icode
== CODE_FOR_dfptstsfi_unordered_dd
14343 || icode
== CODE_FOR_dfptstsfi_eq_td
14344 || icode
== CODE_FOR_dfptstsfi_lt_td
14345 || icode
== CODE_FOR_dfptstsfi_gt_td
14346 || icode
== CODE_FOR_dfptstsfi_unordered_td
)
14348 /* Only allow 6-bit unsigned literals. */
14350 if (TREE_CODE (arg0
) != INTEGER_CST
14351 || !IN_RANGE (TREE_INT_CST_LOW (arg0
), 0, 63))
14353 error ("argument 1 must be a 6-bit unsigned literal");
14354 return CONST0_RTX (tmode
);
14357 else if (icode
== CODE_FOR_xststdcdp
14358 || icode
== CODE_FOR_xststdcsp
14359 || icode
== CODE_FOR_xvtstdcdp
14360 || icode
== CODE_FOR_xvtstdcsp
)
14362 /* Only allow 7-bit unsigned literals. */
14364 if (TREE_CODE (arg1
) != INTEGER_CST
14365 || !IN_RANGE (TREE_INT_CST_LOW (arg1
), 0, 127))
14367 error ("argument 2 must be a 7-bit unsigned literal");
14368 return CONST0_RTX (tmode
);
14373 || GET_MODE (target
) != tmode
14374 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14375 target
= gen_reg_rtx (tmode
);
14377 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14378 op0
= copy_to_mode_reg (mode0
, op0
);
14379 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14380 op1
= copy_to_mode_reg (mode1
, op1
);
14382 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
14391 altivec_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
14394 tree cr6_form
= CALL_EXPR_ARG (exp
, 0);
14395 tree arg0
= CALL_EXPR_ARG (exp
, 1);
14396 tree arg1
= CALL_EXPR_ARG (exp
, 2);
14397 rtx op0
= expand_normal (arg0
);
14398 rtx op1
= expand_normal (arg1
);
14399 machine_mode tmode
= SImode
;
14400 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
14401 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
14404 if (TREE_CODE (cr6_form
) != INTEGER_CST
)
14406 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14410 cr6_form_int
= TREE_INT_CST_LOW (cr6_form
);
14412 gcc_assert (mode0
== mode1
);
14414 /* If we have invalid arguments, bail out before generating bad rtl. */
14415 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14419 || GET_MODE (target
) != tmode
14420 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14421 target
= gen_reg_rtx (tmode
);
14423 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14424 op0
= copy_to_mode_reg (mode0
, op0
);
14425 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14426 op1
= copy_to_mode_reg (mode1
, op1
);
14428 /* Note that for many of the relevant operations (e.g. cmpne or
14429 cmpeq) with float or double operands, it makes more sense for the
14430 mode of the allocated scratch register to select a vector of
14431 integer. But the choice to copy the mode of operand 0 was made
14432 long ago and there are no plans to change it. */
14433 scratch
= gen_reg_rtx (mode0
);
14435 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
14440 /* The vec_any* and vec_all* predicates use the same opcodes for two
14441 different operations, but the bits in CR6 will be different
14442 depending on what information we want. So we have to play tricks
14443 with CR6 to get the right bits out.
14445 If you think this is disgusting, look at the specs for the
14446 AltiVec predicates. */
14448 switch (cr6_form_int
)
14451 emit_insn (gen_cr6_test_for_zero (target
));
14454 emit_insn (gen_cr6_test_for_zero_reverse (target
));
14457 emit_insn (gen_cr6_test_for_lt (target
));
14460 emit_insn (gen_cr6_test_for_lt_reverse (target
));
14463 error ("argument 1 of __builtin_altivec_predicate is out of range");
14471 paired_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
)
14474 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14475 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14476 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14477 machine_mode mode0
= Pmode
;
14478 machine_mode mode1
= Pmode
;
14479 rtx op0
= expand_normal (arg0
);
14480 rtx op1
= expand_normal (arg1
);
14482 if (icode
== CODE_FOR_nothing
)
14483 /* Builtin not supported on this processor. */
14486 /* If we got invalid arguments bail out before generating bad rtl. */
14487 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14491 || GET_MODE (target
) != tmode
14492 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14493 target
= gen_reg_rtx (tmode
);
14495 op1
= copy_to_mode_reg (mode1
, op1
);
14497 if (op0
== const0_rtx
)
14499 addr
= gen_rtx_MEM (tmode
, op1
);
14503 op0
= copy_to_mode_reg (mode0
, op0
);
14504 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op0
, op1
));
14507 pat
= GEN_FCN (icode
) (target
, addr
);
14516 /* Return a constant vector for use as a little-endian permute control vector
14517 to reverse the order of elements of the given vector mode. */
14519 swap_selector_for_mode (machine_mode mode
)
14521 /* These are little endian vectors, so their elements are reversed
14522 from what you would normally expect for a permute control vector. */
14523 unsigned int swap2
[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14524 unsigned int swap4
[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14525 unsigned int swap8
[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14526 unsigned int swap16
[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14527 unsigned int *swaparray
, i
;
14544 swaparray
= swap16
;
14547 gcc_unreachable ();
14550 for (i
= 0; i
< 16; ++i
)
14551 perm
[i
] = GEN_INT (swaparray
[i
]);
14553 return force_reg (V16QImode
, gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
)));
14556 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14557 with -maltivec=be specified. Issue the load followed by an element-
14558 reversing permute. */
14560 altivec_expand_lvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14562 rtx tmp
= gen_reg_rtx (mode
);
14563 rtx load
= gen_rtx_SET (tmp
, op1
);
14564 rtx lvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
14565 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, load
, lvx
));
14566 rtx sel
= swap_selector_for_mode (mode
);
14567 rtx vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, tmp
, tmp
, sel
), UNSPEC_VPERM
);
14569 gcc_assert (REG_P (op0
));
14571 emit_insn (gen_rtx_SET (op0
, vperm
));
14574 /* Generate code for a "stvxl" built-in for a little endian target with
14575 -maltivec=be specified. Issue the store preceded by an element-reversing
14578 altivec_expand_stvx_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14580 rtx tmp
= gen_reg_rtx (mode
);
14581 rtx store
= gen_rtx_SET (op0
, tmp
);
14582 rtx stvx
= gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), unspec
);
14583 rtx par
= gen_rtx_PARALLEL (mode
, gen_rtvec (2, store
, stvx
));
14584 rtx sel
= swap_selector_for_mode (mode
);
14587 gcc_assert (REG_P (op1
));
14588 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
14589 emit_insn (gen_rtx_SET (tmp
, vperm
));
14593 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14594 specified. Issue the store preceded by an element-reversing permute. */
14596 altivec_expand_stvex_be (rtx op0
, rtx op1
, machine_mode mode
, unsigned unspec
)
14598 machine_mode inner_mode
= GET_MODE_INNER (mode
);
14599 rtx tmp
= gen_reg_rtx (mode
);
14600 rtx stvx
= gen_rtx_UNSPEC (inner_mode
, gen_rtvec (1, tmp
), unspec
);
14601 rtx sel
= swap_selector_for_mode (mode
);
14604 gcc_assert (REG_P (op1
));
14605 vperm
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op1
, sel
), UNSPEC_VPERM
);
14606 emit_insn (gen_rtx_SET (tmp
, vperm
));
14607 emit_insn (gen_rtx_SET (op0
, stvx
));
14611 altivec_expand_lv_builtin (enum insn_code icode
, tree exp
, rtx target
, bool blk
)
14614 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14615 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14616 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14617 machine_mode mode0
= Pmode
;
14618 machine_mode mode1
= Pmode
;
14619 rtx op0
= expand_normal (arg0
);
14620 rtx op1
= expand_normal (arg1
);
14622 if (icode
== CODE_FOR_nothing
)
14623 /* Builtin not supported on this processor. */
14626 /* If we got invalid arguments bail out before generating bad rtl. */
14627 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
14631 || GET_MODE (target
) != tmode
14632 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
14633 target
= gen_reg_rtx (tmode
);
14635 op1
= copy_to_mode_reg (mode1
, op1
);
14637 /* For LVX, express the RTL accurately by ANDing the address with -16.
14638 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14639 so the raw address is fine. */
14640 if (icode
== CODE_FOR_altivec_lvx_v2df_2op
14641 || icode
== CODE_FOR_altivec_lvx_v2di_2op
14642 || icode
== CODE_FOR_altivec_lvx_v4sf_2op
14643 || icode
== CODE_FOR_altivec_lvx_v4si_2op
14644 || icode
== CODE_FOR_altivec_lvx_v8hi_2op
14645 || icode
== CODE_FOR_altivec_lvx_v16qi_2op
)
14648 if (op0
== const0_rtx
)
14652 op0
= copy_to_mode_reg (mode0
, op0
);
14653 rawaddr
= gen_rtx_PLUS (Pmode
, op1
, op0
);
14655 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
14656 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, addr
);
14658 /* For -maltivec=be, emit the load and follow it up with a
14659 permute to swap the elements. */
14660 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
14662 rtx temp
= gen_reg_rtx (tmode
);
14663 emit_insn (gen_rtx_SET (temp
, addr
));
14665 rtx sel
= swap_selector_for_mode (tmode
);
14666 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, temp
, temp
, sel
),
14668 emit_insn (gen_rtx_SET (target
, vperm
));
14671 emit_insn (gen_rtx_SET (target
, addr
));
14675 if (op0
== const0_rtx
)
14676 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
, op1
);
14679 op0
= copy_to_mode_reg (mode0
, op0
);
14680 addr
= gen_rtx_MEM (blk
? BLKmode
: tmode
,
14681 gen_rtx_PLUS (Pmode
, op1
, op0
));
14684 pat
= GEN_FCN (icode
) (target
, addr
);
14694 paired_expand_stv_builtin (enum insn_code icode
, tree exp
)
14696 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14697 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14698 tree arg2
= CALL_EXPR_ARG (exp
, 2);
14699 rtx op0
= expand_normal (arg0
);
14700 rtx op1
= expand_normal (arg1
);
14701 rtx op2
= expand_normal (arg2
);
14703 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14704 machine_mode mode1
= Pmode
;
14705 machine_mode mode2
= Pmode
;
14707 /* Invalid arguments. Bail before doing anything stoopid! */
14708 if (arg0
== error_mark_node
14709 || arg1
== error_mark_node
14710 || arg2
== error_mark_node
)
14713 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, tmode
))
14714 op0
= copy_to_mode_reg (tmode
, op0
);
14716 op2
= copy_to_mode_reg (mode2
, op2
);
14718 if (op1
== const0_rtx
)
14720 addr
= gen_rtx_MEM (tmode
, op2
);
14724 op1
= copy_to_mode_reg (mode1
, op1
);
14725 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op1
, op2
));
14728 pat
= GEN_FCN (icode
) (addr
, op0
);
14735 altivec_expand_stxvl_builtin (enum insn_code icode
, tree exp
)
14738 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14739 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14740 tree arg2
= CALL_EXPR_ARG (exp
, 2);
14741 rtx op0
= expand_normal (arg0
);
14742 rtx op1
= expand_normal (arg1
);
14743 rtx op2
= expand_normal (arg2
);
14744 machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
14745 machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
14746 machine_mode mode2
= insn_data
[icode
].operand
[2].mode
;
14748 if (icode
== CODE_FOR_nothing
)
14749 /* Builtin not supported on this processor. */
14752 /* If we got invalid arguments bail out before generating bad rtl. */
14753 if (arg0
== error_mark_node
14754 || arg1
== error_mark_node
14755 || arg2
== error_mark_node
)
14758 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
14759 op0
= copy_to_mode_reg (mode0
, op0
);
14760 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
14761 op1
= copy_to_mode_reg (mode1
, op1
);
14762 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
14763 op2
= copy_to_mode_reg (mode2
, op2
);
14765 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
14773 altivec_expand_stv_builtin (enum insn_code icode
, tree exp
)
14775 tree arg0
= CALL_EXPR_ARG (exp
, 0);
14776 tree arg1
= CALL_EXPR_ARG (exp
, 1);
14777 tree arg2
= CALL_EXPR_ARG (exp
, 2);
14778 rtx op0
= expand_normal (arg0
);
14779 rtx op1
= expand_normal (arg1
);
14780 rtx op2
= expand_normal (arg2
);
14781 rtx pat
, addr
, rawaddr
;
14782 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
14783 machine_mode smode
= insn_data
[icode
].operand
[1].mode
;
14784 machine_mode mode1
= Pmode
;
14785 machine_mode mode2
= Pmode
;
14787 /* Invalid arguments. Bail before doing anything stoopid! */
14788 if (arg0
== error_mark_node
14789 || arg1
== error_mark_node
14790 || arg2
== error_mark_node
)
14793 op2
= copy_to_mode_reg (mode2
, op2
);
14795 /* For STVX, express the RTL accurately by ANDing the address with -16.
14796 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14797 so the raw address is fine. */
14798 if (icode
== CODE_FOR_altivec_stvx_v2df_2op
14799 || icode
== CODE_FOR_altivec_stvx_v2di_2op
14800 || icode
== CODE_FOR_altivec_stvx_v4sf_2op
14801 || icode
== CODE_FOR_altivec_stvx_v4si_2op
14802 || icode
== CODE_FOR_altivec_stvx_v8hi_2op
14803 || icode
== CODE_FOR_altivec_stvx_v16qi_2op
)
14805 if (op1
== const0_rtx
)
14809 op1
= copy_to_mode_reg (mode1
, op1
);
14810 rawaddr
= gen_rtx_PLUS (Pmode
, op2
, op1
);
14813 addr
= gen_rtx_AND (Pmode
, rawaddr
, gen_rtx_CONST_INT (Pmode
, -16));
14814 addr
= gen_rtx_MEM (tmode
, addr
);
14816 op0
= copy_to_mode_reg (tmode
, op0
);
14818 /* For -maltivec=be, emit a permute to swap the elements, followed
14820 if (!BYTES_BIG_ENDIAN
&& VECTOR_ELT_ORDER_BIG
)
14822 rtx temp
= gen_reg_rtx (tmode
);
14823 rtx sel
= swap_selector_for_mode (tmode
);
14824 rtx vperm
= gen_rtx_UNSPEC (tmode
, gen_rtvec (3, op0
, op0
, sel
),
14826 emit_insn (gen_rtx_SET (temp
, vperm
));
14827 emit_insn (gen_rtx_SET (addr
, temp
));
14830 emit_insn (gen_rtx_SET (addr
, op0
));
14834 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, smode
))
14835 op0
= copy_to_mode_reg (smode
, op0
);
14837 if (op1
== const0_rtx
)
14838 addr
= gen_rtx_MEM (tmode
, op2
);
14841 op1
= copy_to_mode_reg (mode1
, op1
);
14842 addr
= gen_rtx_MEM (tmode
, gen_rtx_PLUS (Pmode
, op2
, op1
));
14845 pat
= GEN_FCN (icode
) (addr
, op0
);
14853 /* Return the appropriate SPR number associated with the given builtin. */
14854 static inline HOST_WIDE_INT
14855 htm_spr_num (enum rs6000_builtins code
)
14857 if (code
== HTM_BUILTIN_GET_TFHAR
14858 || code
== HTM_BUILTIN_SET_TFHAR
)
14860 else if (code
== HTM_BUILTIN_GET_TFIAR
14861 || code
== HTM_BUILTIN_SET_TFIAR
)
14863 else if (code
== HTM_BUILTIN_GET_TEXASR
14864 || code
== HTM_BUILTIN_SET_TEXASR
)
14866 gcc_assert (code
== HTM_BUILTIN_GET_TEXASRU
14867 || code
== HTM_BUILTIN_SET_TEXASRU
);
14868 return TEXASRU_SPR
;
14871 /* Return the appropriate SPR regno associated with the given builtin. */
14872 static inline HOST_WIDE_INT
14873 htm_spr_regno (enum rs6000_builtins code
)
14875 if (code
== HTM_BUILTIN_GET_TFHAR
14876 || code
== HTM_BUILTIN_SET_TFHAR
)
14877 return TFHAR_REGNO
;
14878 else if (code
== HTM_BUILTIN_GET_TFIAR
14879 || code
== HTM_BUILTIN_SET_TFIAR
)
14880 return TFIAR_REGNO
;
14881 gcc_assert (code
== HTM_BUILTIN_GET_TEXASR
14882 || code
== HTM_BUILTIN_SET_TEXASR
14883 || code
== HTM_BUILTIN_GET_TEXASRU
14884 || code
== HTM_BUILTIN_SET_TEXASRU
);
14885 return TEXASR_REGNO
;
14888 /* Return the correct ICODE value depending on whether we are
14889 setting or reading the HTM SPRs. */
14890 static inline enum insn_code
14891 rs6000_htm_spr_icode (bool nonvoid
)
14894 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mfspr_di
: CODE_FOR_htm_mfspr_si
;
14896 return (TARGET_POWERPC64
) ? CODE_FOR_htm_mtspr_di
: CODE_FOR_htm_mtspr_si
;
14899 /* Expand the HTM builtin in EXP and store the result in TARGET.
14900 Store true in *EXPANDEDP if we found a builtin to expand. */
14902 htm_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
14904 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
14905 bool nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
14906 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
14907 const struct builtin_description
*d
;
14912 if (!TARGET_POWERPC64
14913 && (fcode
== HTM_BUILTIN_TABORTDC
14914 || fcode
== HTM_BUILTIN_TABORTDCI
))
14916 size_t uns_fcode
= (size_t)fcode
;
14917 const char *name
= rs6000_builtin_info
[uns_fcode
].name
;
14918 error ("builtin %s is only valid in 64-bit mode", name
);
14922 /* Expand the HTM builtins. */
14924 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
14925 if (d
->code
== fcode
)
14927 rtx op
[MAX_HTM_OPERANDS
], pat
;
14930 call_expr_arg_iterator iter
;
14931 unsigned attr
= rs6000_builtin_info
[fcode
].attr
;
14932 enum insn_code icode
= d
->icode
;
14933 const struct insn_operand_data
*insn_op
;
14934 bool uses_spr
= (attr
& RS6000_BTC_SPR
);
14938 icode
= rs6000_htm_spr_icode (nonvoid
);
14939 insn_op
= &insn_data
[icode
].operand
[0];
14943 machine_mode tmode
= (uses_spr
) ? insn_op
->mode
: SImode
;
14945 || GET_MODE (target
) != tmode
14946 || (uses_spr
&& !(*insn_op
->predicate
) (target
, tmode
)))
14947 target
= gen_reg_rtx (tmode
);
14949 op
[nopnds
++] = target
;
14952 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
14954 if (arg
== error_mark_node
|| nopnds
>= MAX_HTM_OPERANDS
)
14957 insn_op
= &insn_data
[icode
].operand
[nopnds
];
14959 op
[nopnds
] = expand_normal (arg
);
14961 if (!(*insn_op
->predicate
) (op
[nopnds
], insn_op
->mode
))
14963 if (!strcmp (insn_op
->constraint
, "n"))
14965 int arg_num
= (nonvoid
) ? nopnds
: nopnds
+ 1;
14966 if (!CONST_INT_P (op
[nopnds
]))
14967 error ("argument %d must be an unsigned literal", arg_num
);
14969 error ("argument %d is an unsigned literal that is "
14970 "out of range", arg_num
);
14973 op
[nopnds
] = copy_to_mode_reg (insn_op
->mode
, op
[nopnds
]);
14979 /* Handle the builtins for extended mnemonics. These accept
14980 no arguments, but map to builtins that take arguments. */
14983 case HTM_BUILTIN_TENDALL
: /* Alias for: tend. 1 */
14984 case HTM_BUILTIN_TRESUME
: /* Alias for: tsr. 1 */
14985 op
[nopnds
++] = GEN_INT (1);
14987 attr
|= RS6000_BTC_UNARY
;
14989 case HTM_BUILTIN_TSUSPEND
: /* Alias for: tsr. 0 */
14990 op
[nopnds
++] = GEN_INT (0);
14992 attr
|= RS6000_BTC_UNARY
;
14998 /* If this builtin accesses SPRs, then pass in the appropriate
14999 SPR number and SPR regno as the last two operands. */
15002 machine_mode mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
15003 op
[nopnds
++] = gen_rtx_CONST_INT (mode
, htm_spr_num (fcode
));
15004 op
[nopnds
++] = gen_rtx_REG (mode
, htm_spr_regno (fcode
));
15006 /* If this builtin accesses a CR, then pass in a scratch
15007 CR as the last operand. */
15008 else if (attr
& RS6000_BTC_CR
)
15009 { cr
= gen_reg_rtx (CCmode
);
15015 int expected_nopnds
= 0;
15016 if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_UNARY
)
15017 expected_nopnds
= 1;
15018 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_BINARY
)
15019 expected_nopnds
= 2;
15020 else if ((attr
& RS6000_BTC_TYPE_MASK
) == RS6000_BTC_TERNARY
)
15021 expected_nopnds
= 3;
15022 if (!(attr
& RS6000_BTC_VOID
))
15023 expected_nopnds
+= 1;
15025 expected_nopnds
+= 2;
15027 gcc_assert (nopnds
== expected_nopnds
15028 && nopnds
<= MAX_HTM_OPERANDS
);
15034 pat
= GEN_FCN (icode
) (op
[0]);
15037 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
15040 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
15043 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
15046 gcc_unreachable ();
15052 if (attr
& RS6000_BTC_CR
)
15054 if (fcode
== HTM_BUILTIN_TBEGIN
)
15056 /* Emit code to set TARGET to true or false depending on
15057 whether the tbegin. instruction successfully or failed
15058 to start a transaction. We do this by placing the 1's
15059 complement of CR's EQ bit into TARGET. */
15060 rtx scratch
= gen_reg_rtx (SImode
);
15061 emit_insn (gen_rtx_SET (scratch
,
15062 gen_rtx_EQ (SImode
, cr
,
15064 emit_insn (gen_rtx_SET (target
,
15065 gen_rtx_XOR (SImode
, scratch
,
15070 /* Emit code to copy the 4-bit condition register field
15071 CR into the least significant end of register TARGET. */
15072 rtx scratch1
= gen_reg_rtx (SImode
);
15073 rtx scratch2
= gen_reg_rtx (SImode
);
15074 rtx subreg
= simplify_gen_subreg (CCmode
, scratch1
, SImode
, 0);
15075 emit_insn (gen_movcc (subreg
, cr
));
15076 emit_insn (gen_lshrsi3 (scratch2
, scratch1
, GEN_INT (28)));
15077 emit_insn (gen_andsi3 (target
, scratch2
, GEN_INT (0xf)));
15086 *expandedp
= false;
15090 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15093 cpu_expand_builtin (enum rs6000_builtins fcode
, tree exp ATTRIBUTE_UNUSED
,
15096 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15097 if (fcode
== RS6000_BUILTIN_CPU_INIT
)
15100 if (target
== 0 || GET_MODE (target
) != SImode
)
15101 target
= gen_reg_rtx (SImode
);
15103 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15104 tree arg
= TREE_OPERAND (CALL_EXPR_ARG (exp
, 0), 0);
15105 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
15106 to a STRING_CST. */
15107 if (TREE_CODE (arg
) == ARRAY_REF
15108 && TREE_CODE (TREE_OPERAND (arg
, 0)) == STRING_CST
15109 && TREE_CODE (TREE_OPERAND (arg
, 1)) == INTEGER_CST
15110 && compare_tree_int (TREE_OPERAND (arg
, 1), 0) == 0)
15111 arg
= TREE_OPERAND (arg
, 0);
15113 if (TREE_CODE (arg
) != STRING_CST
)
15115 error ("builtin %s only accepts a string argument",
15116 rs6000_builtin_info
[(size_t) fcode
].name
);
15120 if (fcode
== RS6000_BUILTIN_CPU_IS
)
15122 const char *cpu
= TREE_STRING_POINTER (arg
);
15123 rtx cpuid
= NULL_RTX
;
15124 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_is_info
); i
++)
15125 if (strcmp (cpu
, cpu_is_info
[i
].cpu
) == 0)
15127 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15128 cpuid
= GEN_INT (cpu_is_info
[i
].cpuid
+ _DL_FIRST_PLATFORM
);
15131 if (cpuid
== NULL_RTX
)
15133 /* Invalid CPU argument. */
15134 error ("cpu %s is an invalid argument to builtin %s",
15135 cpu
, rs6000_builtin_info
[(size_t) fcode
].name
);
15139 rtx platform
= gen_reg_rtx (SImode
);
15140 rtx tcbmem
= gen_const_mem (SImode
,
15141 gen_rtx_PLUS (Pmode
,
15142 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15143 GEN_INT (TCB_PLATFORM_OFFSET
)));
15144 emit_move_insn (platform
, tcbmem
);
15145 emit_insn (gen_eqsi3 (target
, platform
, cpuid
));
15147 else if (fcode
== RS6000_BUILTIN_CPU_SUPPORTS
)
15149 const char *hwcap
= TREE_STRING_POINTER (arg
);
15150 rtx mask
= NULL_RTX
;
15152 for (size_t i
= 0; i
< ARRAY_SIZE (cpu_supports_info
); i
++)
15153 if (strcmp (hwcap
, cpu_supports_info
[i
].hwcap
) == 0)
15155 mask
= GEN_INT (cpu_supports_info
[i
].mask
);
15156 hwcap_offset
= TCB_HWCAP_OFFSET (cpu_supports_info
[i
].id
);
15159 if (mask
== NULL_RTX
)
15161 /* Invalid HWCAP argument. */
15162 error ("hwcap %s is an invalid argument to builtin %s",
15163 hwcap
, rs6000_builtin_info
[(size_t) fcode
].name
);
15167 rtx tcb_hwcap
= gen_reg_rtx (SImode
);
15168 rtx tcbmem
= gen_const_mem (SImode
,
15169 gen_rtx_PLUS (Pmode
,
15170 gen_rtx_REG (Pmode
, TLS_REGNUM
),
15171 GEN_INT (hwcap_offset
)));
15172 emit_move_insn (tcb_hwcap
, tcbmem
);
15173 rtx scratch1
= gen_reg_rtx (SImode
);
15174 emit_insn (gen_rtx_SET (scratch1
, gen_rtx_AND (SImode
, tcb_hwcap
, mask
)));
15175 rtx scratch2
= gen_reg_rtx (SImode
);
15176 emit_insn (gen_eqsi3 (scratch2
, scratch1
, const0_rtx
));
15177 emit_insn (gen_rtx_SET (target
, gen_rtx_XOR (SImode
, scratch2
, const1_rtx
)));
15180 /* Record that we have expanded a CPU builtin, so that we can later
15181 emit a reference to the special symbol exported by LIBC to ensure we
15182 do not link against an old LIBC that doesn't support this feature. */
15183 cpu_builtin_p
= true;
15186 /* For old LIBCs, always return FALSE. */
15187 emit_move_insn (target
, GEN_INT (0));
15188 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15194 rs6000_expand_ternop_builtin (enum insn_code icode
, tree exp
, rtx target
)
15197 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15198 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15199 tree arg2
= CALL_EXPR_ARG (exp
, 2);
15200 rtx op0
= expand_normal (arg0
);
15201 rtx op1
= expand_normal (arg1
);
15202 rtx op2
= expand_normal (arg2
);
15203 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15204 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15205 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15206 machine_mode mode2
= insn_data
[icode
].operand
[3].mode
;
15208 if (icode
== CODE_FOR_nothing
)
15209 /* Builtin not supported on this processor. */
15212 /* If we got invalid arguments bail out before generating bad rtl. */
15213 if (arg0
== error_mark_node
15214 || arg1
== error_mark_node
15215 || arg2
== error_mark_node
)
15218 /* Check and prepare argument depending on the instruction code.
15220 Note that a switch statement instead of the sequence of tests
15221 would be incorrect as many of the CODE_FOR values could be
15222 CODE_FOR_nothing and that would yield multiple alternatives
15223 with identical values. We'd never reach here at runtime in
15225 if (icode
== CODE_FOR_altivec_vsldoi_v4sf
15226 || icode
== CODE_FOR_altivec_vsldoi_v2df
15227 || icode
== CODE_FOR_altivec_vsldoi_v4si
15228 || icode
== CODE_FOR_altivec_vsldoi_v8hi
15229 || icode
== CODE_FOR_altivec_vsldoi_v16qi
)
15231 /* Only allow 4-bit unsigned literals. */
15233 if (TREE_CODE (arg2
) != INTEGER_CST
15234 || TREE_INT_CST_LOW (arg2
) & ~0xf)
15236 error ("argument 3 must be a 4-bit unsigned literal");
15237 return CONST0_RTX (tmode
);
15240 else if (icode
== CODE_FOR_vsx_xxpermdi_v2df
15241 || icode
== CODE_FOR_vsx_xxpermdi_v2di
15242 || icode
== CODE_FOR_vsx_xxpermdi_v2df_be
15243 || icode
== CODE_FOR_vsx_xxpermdi_v2di_be
15244 || icode
== CODE_FOR_vsx_xxpermdi_v1ti
15245 || icode
== CODE_FOR_vsx_xxpermdi_v4sf
15246 || icode
== CODE_FOR_vsx_xxpermdi_v4si
15247 || icode
== CODE_FOR_vsx_xxpermdi_v8hi
15248 || icode
== CODE_FOR_vsx_xxpermdi_v16qi
15249 || icode
== CODE_FOR_vsx_xxsldwi_v16qi
15250 || icode
== CODE_FOR_vsx_xxsldwi_v8hi
15251 || icode
== CODE_FOR_vsx_xxsldwi_v4si
15252 || icode
== CODE_FOR_vsx_xxsldwi_v4sf
15253 || icode
== CODE_FOR_vsx_xxsldwi_v2di
15254 || icode
== CODE_FOR_vsx_xxsldwi_v2df
)
15256 /* Only allow 2-bit unsigned literals. */
15258 if (TREE_CODE (arg2
) != INTEGER_CST
15259 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15261 error ("argument 3 must be a 2-bit unsigned literal");
15262 return CONST0_RTX (tmode
);
15265 else if (icode
== CODE_FOR_vsx_set_v2df
15266 || icode
== CODE_FOR_vsx_set_v2di
15267 || icode
== CODE_FOR_bcdadd
15268 || icode
== CODE_FOR_bcdadd_lt
15269 || icode
== CODE_FOR_bcdadd_eq
15270 || icode
== CODE_FOR_bcdadd_gt
15271 || icode
== CODE_FOR_bcdsub
15272 || icode
== CODE_FOR_bcdsub_lt
15273 || icode
== CODE_FOR_bcdsub_eq
15274 || icode
== CODE_FOR_bcdsub_gt
)
15276 /* Only allow 1-bit unsigned literals. */
15278 if (TREE_CODE (arg2
) != INTEGER_CST
15279 || TREE_INT_CST_LOW (arg2
) & ~0x1)
15281 error ("argument 3 must be a 1-bit unsigned literal");
15282 return CONST0_RTX (tmode
);
15285 else if (icode
== CODE_FOR_dfp_ddedpd_dd
15286 || icode
== CODE_FOR_dfp_ddedpd_td
)
15288 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15290 if (TREE_CODE (arg0
) != INTEGER_CST
15291 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15293 error ("argument 1 must be 0 or 2");
15294 return CONST0_RTX (tmode
);
15297 else if (icode
== CODE_FOR_dfp_denbcd_dd
15298 || icode
== CODE_FOR_dfp_denbcd_td
)
15300 /* Only allow 1-bit unsigned literals. */
15302 if (TREE_CODE (arg0
) != INTEGER_CST
15303 || TREE_INT_CST_LOW (arg0
) & ~0x1)
15305 error ("argument 1 must be a 1-bit unsigned literal");
15306 return CONST0_RTX (tmode
);
15309 else if (icode
== CODE_FOR_dfp_dscli_dd
15310 || icode
== CODE_FOR_dfp_dscli_td
15311 || icode
== CODE_FOR_dfp_dscri_dd
15312 || icode
== CODE_FOR_dfp_dscri_td
)
15314 /* Only allow 6-bit unsigned literals. */
15316 if (TREE_CODE (arg1
) != INTEGER_CST
15317 || TREE_INT_CST_LOW (arg1
) & ~0x3f)
15319 error ("argument 2 must be a 6-bit unsigned literal");
15320 return CONST0_RTX (tmode
);
15323 else if (icode
== CODE_FOR_crypto_vshasigmaw
15324 || icode
== CODE_FOR_crypto_vshasigmad
)
15326 /* Check whether the 2nd and 3rd arguments are integer constants and in
15327 range and prepare arguments. */
15329 if (TREE_CODE (arg1
) != INTEGER_CST
|| wi::geu_p (arg1
, 2))
15331 error ("argument 2 must be 0 or 1");
15332 return CONST0_RTX (tmode
);
15336 if (TREE_CODE (arg2
) != INTEGER_CST
|| wi::geu_p (arg2
, 16))
15338 error ("argument 3 must be in the range 0..15");
15339 return CONST0_RTX (tmode
);
15344 || GET_MODE (target
) != tmode
15345 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15346 target
= gen_reg_rtx (tmode
);
15348 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15349 op0
= copy_to_mode_reg (mode0
, op0
);
15350 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15351 op1
= copy_to_mode_reg (mode1
, op1
);
15352 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
15353 op2
= copy_to_mode_reg (mode2
, op2
);
15355 if (TARGET_PAIRED_FLOAT
&& icode
== CODE_FOR_selv2sf4
)
15356 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, CONST0_RTX (SFmode
));
15358 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
15366 /* Expand the lvx builtins. */
15368 altivec_expand_ld_builtin (tree exp
, rtx target
, bool *expandedp
)
15370 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15371 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15373 machine_mode tmode
, mode0
;
15375 enum insn_code icode
;
15379 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi
:
15380 icode
= CODE_FOR_vector_altivec_load_v16qi
;
15382 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi
:
15383 icode
= CODE_FOR_vector_altivec_load_v8hi
;
15385 case ALTIVEC_BUILTIN_LD_INTERNAL_4si
:
15386 icode
= CODE_FOR_vector_altivec_load_v4si
;
15388 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf
:
15389 icode
= CODE_FOR_vector_altivec_load_v4sf
;
15391 case ALTIVEC_BUILTIN_LD_INTERNAL_2df
:
15392 icode
= CODE_FOR_vector_altivec_load_v2df
;
15394 case ALTIVEC_BUILTIN_LD_INTERNAL_2di
:
15395 icode
= CODE_FOR_vector_altivec_load_v2di
;
15397 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti
:
15398 icode
= CODE_FOR_vector_altivec_load_v1ti
;
15401 *expandedp
= false;
15407 arg0
= CALL_EXPR_ARG (exp
, 0);
15408 op0
= expand_normal (arg0
);
15409 tmode
= insn_data
[icode
].operand
[0].mode
;
15410 mode0
= insn_data
[icode
].operand
[1].mode
;
15413 || GET_MODE (target
) != tmode
15414 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15415 target
= gen_reg_rtx (tmode
);
15417 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15418 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15420 pat
= GEN_FCN (icode
) (target
, op0
);
15427 /* Expand the stvx builtins. */
15429 altivec_expand_st_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15432 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15433 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
15435 machine_mode mode0
, mode1
;
15437 enum insn_code icode
;
15441 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi
:
15442 icode
= CODE_FOR_vector_altivec_store_v16qi
;
15444 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi
:
15445 icode
= CODE_FOR_vector_altivec_store_v8hi
;
15447 case ALTIVEC_BUILTIN_ST_INTERNAL_4si
:
15448 icode
= CODE_FOR_vector_altivec_store_v4si
;
15450 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf
:
15451 icode
= CODE_FOR_vector_altivec_store_v4sf
;
15453 case ALTIVEC_BUILTIN_ST_INTERNAL_2df
:
15454 icode
= CODE_FOR_vector_altivec_store_v2df
;
15456 case ALTIVEC_BUILTIN_ST_INTERNAL_2di
:
15457 icode
= CODE_FOR_vector_altivec_store_v2di
;
15459 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti
:
15460 icode
= CODE_FOR_vector_altivec_store_v1ti
;
15463 *expandedp
= false;
15467 arg0
= CALL_EXPR_ARG (exp
, 0);
15468 arg1
= CALL_EXPR_ARG (exp
, 1);
15469 op0
= expand_normal (arg0
);
15470 op1
= expand_normal (arg1
);
15471 mode0
= insn_data
[icode
].operand
[0].mode
;
15472 mode1
= insn_data
[icode
].operand
[1].mode
;
15474 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15475 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15476 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
15477 op1
= copy_to_mode_reg (mode1
, op1
);
15479 pat
= GEN_FCN (icode
) (op0
, op1
);
15487 /* Expand the dst builtins. */
15489 altivec_expand_dst_builtin (tree exp
, rtx target ATTRIBUTE_UNUSED
,
15492 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15493 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15494 tree arg0
, arg1
, arg2
;
15495 machine_mode mode0
, mode1
;
15496 rtx pat
, op0
, op1
, op2
;
15497 const struct builtin_description
*d
;
15500 *expandedp
= false;
15502 /* Handle DST variants. */
15504 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
15505 if (d
->code
== fcode
)
15507 arg0
= CALL_EXPR_ARG (exp
, 0);
15508 arg1
= CALL_EXPR_ARG (exp
, 1);
15509 arg2
= CALL_EXPR_ARG (exp
, 2);
15510 op0
= expand_normal (arg0
);
15511 op1
= expand_normal (arg1
);
15512 op2
= expand_normal (arg2
);
15513 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
15514 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
15516 /* Invalid arguments, bail out before generating bad rtl. */
15517 if (arg0
== error_mark_node
15518 || arg1
== error_mark_node
15519 || arg2
== error_mark_node
)
15524 if (TREE_CODE (arg2
) != INTEGER_CST
15525 || TREE_INT_CST_LOW (arg2
) & ~0x3)
15527 error ("argument to %qs must be a 2-bit unsigned literal", d
->name
);
15531 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
15532 op0
= copy_to_mode_reg (Pmode
, op0
);
15533 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
15534 op1
= copy_to_mode_reg (mode1
, op1
);
15536 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
15546 /* Expand vec_init builtin. */
15548 altivec_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
15550 machine_mode tmode
= TYPE_MODE (type
);
15551 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
15552 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
15554 gcc_assert (VECTOR_MODE_P (tmode
));
15555 gcc_assert (n_elt
== call_expr_nargs (exp
));
15557 if (!target
|| !register_operand (target
, tmode
))
15558 target
= gen_reg_rtx (tmode
);
15560 /* If we have a vector compromised of a single element, such as V1TImode, do
15561 the initialization directly. */
15562 if (n_elt
== 1 && GET_MODE_SIZE (tmode
) == GET_MODE_SIZE (inner_mode
))
15564 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, 0));
15565 emit_move_insn (target
, gen_lowpart (tmode
, x
));
15569 rtvec v
= rtvec_alloc (n_elt
);
15571 for (i
= 0; i
< n_elt
; ++i
)
15573 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
15574 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
15577 rs6000_expand_vector_init (target
, gen_rtx_PARALLEL (tmode
, v
));
15583 /* Return the integer constant in ARG. Constrain it to be in the range
15584 of the subparts of VEC_TYPE; issue an error if not. */
15587 get_element_number (tree vec_type
, tree arg
)
15589 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
15591 if (!tree_fits_uhwi_p (arg
)
15592 || (elt
= tree_to_uhwi (arg
), elt
> max
))
15594 error ("selector must be an integer constant in the range 0..%wi", max
);
15601 /* Expand vec_set builtin. */
15603 altivec_expand_vec_set_builtin (tree exp
)
15605 machine_mode tmode
, mode1
;
15606 tree arg0
, arg1
, arg2
;
15610 arg0
= CALL_EXPR_ARG (exp
, 0);
15611 arg1
= CALL_EXPR_ARG (exp
, 1);
15612 arg2
= CALL_EXPR_ARG (exp
, 2);
15614 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
15615 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15616 gcc_assert (VECTOR_MODE_P (tmode
));
15618 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
15619 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
15620 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
15622 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
15623 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
15625 op0
= force_reg (tmode
, op0
);
15626 op1
= force_reg (mode1
, op1
);
15628 rs6000_expand_vector_set (op0
, op1
, elt
);
15633 /* Expand vec_ext builtin. */
15635 altivec_expand_vec_ext_builtin (tree exp
, rtx target
)
15637 machine_mode tmode
, mode0
;
15642 arg0
= CALL_EXPR_ARG (exp
, 0);
15643 arg1
= CALL_EXPR_ARG (exp
, 1);
15645 op0
= expand_normal (arg0
);
15646 op1
= expand_normal (arg1
);
15648 /* Call get_element_number to validate arg1 if it is a constant. */
15649 if (TREE_CODE (arg1
) == INTEGER_CST
)
15650 (void) get_element_number (TREE_TYPE (arg0
), arg1
);
15652 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
15653 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
15654 gcc_assert (VECTOR_MODE_P (mode0
));
15656 op0
= force_reg (mode0
, op0
);
15658 if (optimize
|| !target
|| !register_operand (target
, tmode
))
15659 target
= gen_reg_rtx (tmode
);
15661 rs6000_expand_vector_extract (target
, op0
, op1
);
15666 /* Expand the builtin in EXP and store the result in TARGET. Store
15667 true in *EXPANDEDP if we found a builtin to expand. */
15669 altivec_expand_builtin (tree exp
, rtx target
, bool *expandedp
)
15671 const struct builtin_description
*d
;
15673 enum insn_code icode
;
15674 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15675 tree arg0
, arg1
, arg2
;
15677 machine_mode tmode
, mode0
;
15678 enum rs6000_builtins fcode
15679 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
15681 if (rs6000_overloaded_builtin_p (fcode
))
15684 error ("unresolved overload for Altivec builtin %qF", fndecl
);
15686 /* Given it is invalid, just generate a normal call. */
15687 return expand_call (exp
, target
, false);
15690 target
= altivec_expand_ld_builtin (exp
, target
, expandedp
);
15694 target
= altivec_expand_st_builtin (exp
, target
, expandedp
);
15698 target
= altivec_expand_dst_builtin (exp
, target
, expandedp
);
15706 case ALTIVEC_BUILTIN_STVX_V2DF
:
15707 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op
, exp
);
15708 case ALTIVEC_BUILTIN_STVX_V2DI
:
15709 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op
, exp
);
15710 case ALTIVEC_BUILTIN_STVX_V4SF
:
15711 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op
, exp
);
15712 case ALTIVEC_BUILTIN_STVX
:
15713 case ALTIVEC_BUILTIN_STVX_V4SI
:
15714 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op
, exp
);
15715 case ALTIVEC_BUILTIN_STVX_V8HI
:
15716 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op
, exp
);
15717 case ALTIVEC_BUILTIN_STVX_V16QI
:
15718 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op
, exp
);
15719 case ALTIVEC_BUILTIN_STVEBX
:
15720 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx
, exp
);
15721 case ALTIVEC_BUILTIN_STVEHX
:
15722 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx
, exp
);
15723 case ALTIVEC_BUILTIN_STVEWX
:
15724 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx
, exp
);
15725 case ALTIVEC_BUILTIN_STVXL_V2DF
:
15726 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df
, exp
);
15727 case ALTIVEC_BUILTIN_STVXL_V2DI
:
15728 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di
, exp
);
15729 case ALTIVEC_BUILTIN_STVXL_V4SF
:
15730 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf
, exp
);
15731 case ALTIVEC_BUILTIN_STVXL
:
15732 case ALTIVEC_BUILTIN_STVXL_V4SI
:
15733 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si
, exp
);
15734 case ALTIVEC_BUILTIN_STVXL_V8HI
:
15735 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi
, exp
);
15736 case ALTIVEC_BUILTIN_STVXL_V16QI
:
15737 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi
, exp
);
15739 case ALTIVEC_BUILTIN_STVLX
:
15740 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx
, exp
);
15741 case ALTIVEC_BUILTIN_STVLXL
:
15742 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl
, exp
);
15743 case ALTIVEC_BUILTIN_STVRX
:
15744 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx
, exp
);
15745 case ALTIVEC_BUILTIN_STVRXL
:
15746 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl
, exp
);
15748 case P9V_BUILTIN_STXVL
:
15749 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl
, exp
);
15751 case VSX_BUILTIN_STXVD2X_V1TI
:
15752 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti
, exp
);
15753 case VSX_BUILTIN_STXVD2X_V2DF
:
15754 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df
, exp
);
15755 case VSX_BUILTIN_STXVD2X_V2DI
:
15756 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di
, exp
);
15757 case VSX_BUILTIN_STXVW4X_V4SF
:
15758 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf
, exp
);
15759 case VSX_BUILTIN_STXVW4X_V4SI
:
15760 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si
, exp
);
15761 case VSX_BUILTIN_STXVW4X_V8HI
:
15762 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi
, exp
);
15763 case VSX_BUILTIN_STXVW4X_V16QI
:
15764 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi
, exp
);
15766 /* For the following on big endian, it's ok to use any appropriate
15767 unaligned-supporting store, so use a generic expander. For
15768 little-endian, the exact element-reversing instruction must
15770 case VSX_BUILTIN_ST_ELEMREV_V2DF
:
15772 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2df
15773 : CODE_FOR_vsx_st_elemrev_v2df
);
15774 return altivec_expand_stv_builtin (code
, exp
);
15776 case VSX_BUILTIN_ST_ELEMREV_V2DI
:
15778 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v2di
15779 : CODE_FOR_vsx_st_elemrev_v2di
);
15780 return altivec_expand_stv_builtin (code
, exp
);
15782 case VSX_BUILTIN_ST_ELEMREV_V4SF
:
15784 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4sf
15785 : CODE_FOR_vsx_st_elemrev_v4sf
);
15786 return altivec_expand_stv_builtin (code
, exp
);
15788 case VSX_BUILTIN_ST_ELEMREV_V4SI
:
15790 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v4si
15791 : CODE_FOR_vsx_st_elemrev_v4si
);
15792 return altivec_expand_stv_builtin (code
, exp
);
15794 case VSX_BUILTIN_ST_ELEMREV_V8HI
:
15796 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v8hi
15797 : CODE_FOR_vsx_st_elemrev_v8hi
);
15798 return altivec_expand_stv_builtin (code
, exp
);
15800 case VSX_BUILTIN_ST_ELEMREV_V16QI
:
15802 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_store_v16qi
15803 : CODE_FOR_vsx_st_elemrev_v16qi
);
15804 return altivec_expand_stv_builtin (code
, exp
);
15807 case ALTIVEC_BUILTIN_MFVSCR
:
15808 icode
= CODE_FOR_altivec_mfvscr
;
15809 tmode
= insn_data
[icode
].operand
[0].mode
;
15812 || GET_MODE (target
) != tmode
15813 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15814 target
= gen_reg_rtx (tmode
);
15816 pat
= GEN_FCN (icode
) (target
);
15822 case ALTIVEC_BUILTIN_MTVSCR
:
15823 icode
= CODE_FOR_altivec_mtvscr
;
15824 arg0
= CALL_EXPR_ARG (exp
, 0);
15825 op0
= expand_normal (arg0
);
15826 mode0
= insn_data
[icode
].operand
[0].mode
;
15828 /* If we got invalid arguments bail out before generating bad rtl. */
15829 if (arg0
== error_mark_node
)
15832 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15833 op0
= copy_to_mode_reg (mode0
, op0
);
15835 pat
= GEN_FCN (icode
) (op0
);
15840 case ALTIVEC_BUILTIN_DSSALL
:
15841 emit_insn (gen_altivec_dssall ());
15844 case ALTIVEC_BUILTIN_DSS
:
15845 icode
= CODE_FOR_altivec_dss
;
15846 arg0
= CALL_EXPR_ARG (exp
, 0);
15848 op0
= expand_normal (arg0
);
15849 mode0
= insn_data
[icode
].operand
[0].mode
;
15851 /* If we got invalid arguments bail out before generating bad rtl. */
15852 if (arg0
== error_mark_node
)
15855 if (TREE_CODE (arg0
) != INTEGER_CST
15856 || TREE_INT_CST_LOW (arg0
) & ~0x3)
15858 error ("argument to dss must be a 2-bit unsigned literal");
15862 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
15863 op0
= copy_to_mode_reg (mode0
, op0
);
15865 emit_insn (gen_altivec_dss (op0
));
15868 case ALTIVEC_BUILTIN_VEC_INIT_V4SI
:
15869 case ALTIVEC_BUILTIN_VEC_INIT_V8HI
:
15870 case ALTIVEC_BUILTIN_VEC_INIT_V16QI
:
15871 case ALTIVEC_BUILTIN_VEC_INIT_V4SF
:
15872 case VSX_BUILTIN_VEC_INIT_V2DF
:
15873 case VSX_BUILTIN_VEC_INIT_V2DI
:
15874 case VSX_BUILTIN_VEC_INIT_V1TI
:
15875 return altivec_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
15877 case ALTIVEC_BUILTIN_VEC_SET_V4SI
:
15878 case ALTIVEC_BUILTIN_VEC_SET_V8HI
:
15879 case ALTIVEC_BUILTIN_VEC_SET_V16QI
:
15880 case ALTIVEC_BUILTIN_VEC_SET_V4SF
:
15881 case VSX_BUILTIN_VEC_SET_V2DF
:
15882 case VSX_BUILTIN_VEC_SET_V2DI
:
15883 case VSX_BUILTIN_VEC_SET_V1TI
:
15884 return altivec_expand_vec_set_builtin (exp
);
15886 case ALTIVEC_BUILTIN_VEC_EXT_V4SI
:
15887 case ALTIVEC_BUILTIN_VEC_EXT_V8HI
:
15888 case ALTIVEC_BUILTIN_VEC_EXT_V16QI
:
15889 case ALTIVEC_BUILTIN_VEC_EXT_V4SF
:
15890 case VSX_BUILTIN_VEC_EXT_V2DF
:
15891 case VSX_BUILTIN_VEC_EXT_V2DI
:
15892 case VSX_BUILTIN_VEC_EXT_V1TI
:
15893 return altivec_expand_vec_ext_builtin (exp
, target
);
15895 case P9V_BUILTIN_VEXTRACT4B
:
15896 case P9V_BUILTIN_VEC_VEXTRACT4B
:
15897 arg1
= CALL_EXPR_ARG (exp
, 1);
15900 /* Generate a normal call if it is invalid. */
15901 if (arg1
== error_mark_node
)
15902 return expand_call (exp
, target
, false);
15904 if (TREE_CODE (arg1
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg1
) > 12)
15906 error ("second argument to vec_vextract4b must be 0..12");
15907 return expand_call (exp
, target
, false);
15911 case P9V_BUILTIN_VINSERT4B
:
15912 case P9V_BUILTIN_VINSERT4B_DI
:
15913 case P9V_BUILTIN_VEC_VINSERT4B
:
15914 arg2
= CALL_EXPR_ARG (exp
, 2);
15917 /* Generate a normal call if it is invalid. */
15918 if (arg2
== error_mark_node
)
15919 return expand_call (exp
, target
, false);
15921 if (TREE_CODE (arg2
) != INTEGER_CST
|| TREE_INT_CST_LOW (arg2
) > 12)
15923 error ("third argument to vec_vinsert4b must be 0..12");
15924 return expand_call (exp
, target
, false);
15930 /* Fall through. */
15933 /* Expand abs* operations. */
15935 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
15936 if (d
->code
== fcode
)
15937 return altivec_expand_abs_builtin (d
->icode
, exp
, target
);
15939 /* Expand the AltiVec predicates. */
15940 d
= bdesc_altivec_preds
;
15941 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
15942 if (d
->code
== fcode
)
15943 return altivec_expand_predicate_builtin (d
->icode
, exp
, target
);
15945 /* LV* are funky. We initialized them differently. */
15948 case ALTIVEC_BUILTIN_LVSL
:
15949 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl
,
15950 exp
, target
, false);
15951 case ALTIVEC_BUILTIN_LVSR
:
15952 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr
,
15953 exp
, target
, false);
15954 case ALTIVEC_BUILTIN_LVEBX
:
15955 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx
,
15956 exp
, target
, false);
15957 case ALTIVEC_BUILTIN_LVEHX
:
15958 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx
,
15959 exp
, target
, false);
15960 case ALTIVEC_BUILTIN_LVEWX
:
15961 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx
,
15962 exp
, target
, false);
15963 case ALTIVEC_BUILTIN_LVXL_V2DF
:
15964 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df
,
15965 exp
, target
, false);
15966 case ALTIVEC_BUILTIN_LVXL_V2DI
:
15967 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di
,
15968 exp
, target
, false);
15969 case ALTIVEC_BUILTIN_LVXL_V4SF
:
15970 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf
,
15971 exp
, target
, false);
15972 case ALTIVEC_BUILTIN_LVXL
:
15973 case ALTIVEC_BUILTIN_LVXL_V4SI
:
15974 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si
,
15975 exp
, target
, false);
15976 case ALTIVEC_BUILTIN_LVXL_V8HI
:
15977 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi
,
15978 exp
, target
, false);
15979 case ALTIVEC_BUILTIN_LVXL_V16QI
:
15980 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi
,
15981 exp
, target
, false);
15982 case ALTIVEC_BUILTIN_LVX_V2DF
:
15983 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op
,
15984 exp
, target
, false);
15985 case ALTIVEC_BUILTIN_LVX_V2DI
:
15986 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op
,
15987 exp
, target
, false);
15988 case ALTIVEC_BUILTIN_LVX_V4SF
:
15989 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op
,
15990 exp
, target
, false);
15991 case ALTIVEC_BUILTIN_LVX
:
15992 case ALTIVEC_BUILTIN_LVX_V4SI
:
15993 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op
,
15994 exp
, target
, false);
15995 case ALTIVEC_BUILTIN_LVX_V8HI
:
15996 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op
,
15997 exp
, target
, false);
15998 case ALTIVEC_BUILTIN_LVX_V16QI
:
15999 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op
,
16000 exp
, target
, false);
16001 case ALTIVEC_BUILTIN_LVLX
:
16002 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx
,
16003 exp
, target
, true);
16004 case ALTIVEC_BUILTIN_LVLXL
:
16005 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl
,
16006 exp
, target
, true);
16007 case ALTIVEC_BUILTIN_LVRX
:
16008 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx
,
16009 exp
, target
, true);
16010 case ALTIVEC_BUILTIN_LVRXL
:
16011 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl
,
16012 exp
, target
, true);
16013 case VSX_BUILTIN_LXVD2X_V1TI
:
16014 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti
,
16015 exp
, target
, false);
16016 case VSX_BUILTIN_LXVD2X_V2DF
:
16017 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df
,
16018 exp
, target
, false);
16019 case VSX_BUILTIN_LXVD2X_V2DI
:
16020 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di
,
16021 exp
, target
, false);
16022 case VSX_BUILTIN_LXVW4X_V4SF
:
16023 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf
,
16024 exp
, target
, false);
16025 case VSX_BUILTIN_LXVW4X_V4SI
:
16026 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si
,
16027 exp
, target
, false);
16028 case VSX_BUILTIN_LXVW4X_V8HI
:
16029 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi
,
16030 exp
, target
, false);
16031 case VSX_BUILTIN_LXVW4X_V16QI
:
16032 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi
,
16033 exp
, target
, false);
16034 /* For the following on big endian, it's ok to use any appropriate
16035 unaligned-supporting load, so use a generic expander. For
16036 little-endian, the exact element-reversing instruction must
16038 case VSX_BUILTIN_LD_ELEMREV_V2DF
:
16040 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2df
16041 : CODE_FOR_vsx_ld_elemrev_v2df
);
16042 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16044 case VSX_BUILTIN_LD_ELEMREV_V2DI
:
16046 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v2di
16047 : CODE_FOR_vsx_ld_elemrev_v2di
);
16048 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16050 case VSX_BUILTIN_LD_ELEMREV_V4SF
:
16052 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4sf
16053 : CODE_FOR_vsx_ld_elemrev_v4sf
);
16054 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16056 case VSX_BUILTIN_LD_ELEMREV_V4SI
:
16058 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v4si
16059 : CODE_FOR_vsx_ld_elemrev_v4si
);
16060 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16062 case VSX_BUILTIN_LD_ELEMREV_V8HI
:
16064 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v8hi
16065 : CODE_FOR_vsx_ld_elemrev_v8hi
);
16066 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16068 case VSX_BUILTIN_LD_ELEMREV_V16QI
:
16070 enum insn_code code
= (BYTES_BIG_ENDIAN
? CODE_FOR_vsx_load_v16qi
16071 : CODE_FOR_vsx_ld_elemrev_v16qi
);
16072 return altivec_expand_lv_builtin (code
, exp
, target
, false);
16077 /* Fall through. */
16080 *expandedp
= false;
16084 /* Expand the builtin in EXP and store the result in TARGET. Store
16085 true in *EXPANDEDP if we found a builtin to expand. */
16087 paired_expand_builtin (tree exp
, rtx target
, bool * expandedp
)
16089 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16090 enum rs6000_builtins fcode
= (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16091 const struct builtin_description
*d
;
16098 case PAIRED_BUILTIN_STX
:
16099 return paired_expand_stv_builtin (CODE_FOR_paired_stx
, exp
);
16100 case PAIRED_BUILTIN_LX
:
16101 return paired_expand_lv_builtin (CODE_FOR_paired_lx
, exp
, target
);
16104 /* Fall through. */
16107 /* Expand the paired predicates. */
16108 d
= bdesc_paired_preds
;
16109 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); i
++, d
++)
16110 if (d
->code
== fcode
)
16111 return paired_expand_predicate_builtin (d
->icode
, exp
, target
);
16113 *expandedp
= false;
16118 paired_expand_predicate_builtin (enum insn_code icode
, tree exp
, rtx target
)
16120 rtx pat
, scratch
, tmp
;
16121 tree form
= CALL_EXPR_ARG (exp
, 0);
16122 tree arg0
= CALL_EXPR_ARG (exp
, 1);
16123 tree arg1
= CALL_EXPR_ARG (exp
, 2);
16124 rtx op0
= expand_normal (arg0
);
16125 rtx op1
= expand_normal (arg1
);
16126 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
16127 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
16129 enum rtx_code code
;
16131 if (TREE_CODE (form
) != INTEGER_CST
)
16133 error ("argument 1 of __builtin_paired_predicate must be a constant");
16137 form_int
= TREE_INT_CST_LOW (form
);
16139 gcc_assert (mode0
== mode1
);
16141 if (arg0
== error_mark_node
|| arg1
== error_mark_node
)
16145 || GET_MODE (target
) != SImode
16146 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, SImode
))
16147 target
= gen_reg_rtx (SImode
);
16148 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16149 op0
= copy_to_mode_reg (mode0
, op0
);
16150 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16151 op1
= copy_to_mode_reg (mode1
, op1
);
16153 scratch
= gen_reg_rtx (CCFPmode
);
16155 pat
= GEN_FCN (icode
) (scratch
, op0
, op1
);
16177 emit_insn (gen_move_from_CR_ov_bit (target
, scratch
));
16180 error ("argument 1 of __builtin_paired_predicate is out of range");
16184 tmp
= gen_rtx_fmt_ee (code
, SImode
, scratch
, const0_rtx
);
16185 emit_move_insn (target
, tmp
);
16189 /* Raise an error message for a builtin function that is called without the
16190 appropriate target options being set. */
16193 rs6000_invalid_builtin (enum rs6000_builtins fncode
)
16195 size_t uns_fncode
= (size_t)fncode
;
16196 const char *name
= rs6000_builtin_info
[uns_fncode
].name
;
16197 HOST_WIDE_INT fnmask
= rs6000_builtin_info
[uns_fncode
].mask
;
16199 gcc_assert (name
!= NULL
);
16200 if ((fnmask
& RS6000_BTM_CELL
) != 0)
16201 error ("Builtin function %s is only valid for the cell processor", name
);
16202 else if ((fnmask
& RS6000_BTM_VSX
) != 0)
16203 error ("Builtin function %s requires the -mvsx option", name
);
16204 else if ((fnmask
& RS6000_BTM_HTM
) != 0)
16205 error ("Builtin function %s requires the -mhtm option", name
);
16206 else if ((fnmask
& RS6000_BTM_ALTIVEC
) != 0)
16207 error ("Builtin function %s requires the -maltivec option", name
);
16208 else if ((fnmask
& RS6000_BTM_PAIRED
) != 0)
16209 error ("Builtin function %s requires the -mpaired option", name
);
16210 else if ((fnmask
& (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
16211 == (RS6000_BTM_DFP
| RS6000_BTM_P8_VECTOR
))
16212 error ("Builtin function %s requires the -mhard-dfp and"
16213 " -mpower8-vector options", name
);
16214 else if ((fnmask
& RS6000_BTM_DFP
) != 0)
16215 error ("Builtin function %s requires the -mhard-dfp option", name
);
16216 else if ((fnmask
& RS6000_BTM_P8_VECTOR
) != 0)
16217 error ("Builtin function %s requires the -mpower8-vector option", name
);
16218 else if ((fnmask
& (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
16219 == (RS6000_BTM_P9_VECTOR
| RS6000_BTM_64BIT
))
16220 error ("Builtin function %s requires the -mcpu=power9 and"
16221 " -m64 options", name
);
16222 else if ((fnmask
& RS6000_BTM_P9_VECTOR
) != 0)
16223 error ("Builtin function %s requires the -mcpu=power9 option", name
);
16224 else if ((fnmask
& (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
16225 == (RS6000_BTM_P9_MISC
| RS6000_BTM_64BIT
))
16226 error ("Builtin function %s requires the -mcpu=power9 and"
16227 " -m64 options", name
);
16228 else if ((fnmask
& RS6000_BTM_P9_MISC
) == RS6000_BTM_P9_MISC
)
16229 error ("Builtin function %s requires the -mcpu=power9 option", name
);
16230 else if ((fnmask
& (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
16231 == (RS6000_BTM_HARD_FLOAT
| RS6000_BTM_LDBL128
))
16232 error ("Builtin function %s requires the -mhard-float and"
16233 " -mlong-double-128 options", name
);
16234 else if ((fnmask
& RS6000_BTM_HARD_FLOAT
) != 0)
16235 error ("Builtin function %s requires the -mhard-float option", name
);
16236 else if ((fnmask
& RS6000_BTM_FLOAT128
) != 0)
16237 error ("Builtin function %s requires the -mfloat128 option", name
);
16239 error ("Builtin function %s is not supported with the current options",
16243 /* Target hook for early folding of built-ins, shamelessly stolen
16247 rs6000_fold_builtin (tree fndecl
, int n_args ATTRIBUTE_UNUSED
,
16248 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
16250 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
16252 enum rs6000_builtins fn_code
16253 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16256 case RS6000_BUILTIN_NANQ
:
16257 case RS6000_BUILTIN_NANSQ
:
16259 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16260 const char *str
= c_getstr (*args
);
16261 int quiet
= fn_code
== RS6000_BUILTIN_NANQ
;
16262 REAL_VALUE_TYPE real
;
16264 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
16265 return build_real (type
, real
);
16268 case RS6000_BUILTIN_INFQ
:
16269 case RS6000_BUILTIN_HUGE_VALQ
:
16271 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16272 REAL_VALUE_TYPE inf
;
16274 return build_real (type
, inf
);
16280 #ifdef SUBTARGET_FOLD_BUILTIN
16281 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
16287 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16288 a constant, use rs6000_fold_builtin.) */
16291 rs6000_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
16293 gimple
*stmt
= gsi_stmt (*gsi
);
16294 tree fndecl
= gimple_call_fndecl (stmt
);
16295 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
16296 enum rs6000_builtins fn_code
16297 = (enum rs6000_builtins
) DECL_FUNCTION_CODE (fndecl
);
16298 tree arg0
, arg1
, lhs
;
16302 /* Flavors of vec_add. We deliberately don't expand
16303 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16304 TImode, resulting in much poorer code generation. */
16305 case ALTIVEC_BUILTIN_VADDUBM
:
16306 case ALTIVEC_BUILTIN_VADDUHM
:
16307 case ALTIVEC_BUILTIN_VADDUWM
:
16308 case P8V_BUILTIN_VADDUDM
:
16309 case ALTIVEC_BUILTIN_VADDFP
:
16310 case VSX_BUILTIN_XVADDDP
:
16312 arg0
= gimple_call_arg (stmt
, 0);
16313 arg1
= gimple_call_arg (stmt
, 1);
16314 lhs
= gimple_call_lhs (stmt
);
16315 gimple
*g
= gimple_build_assign (lhs
, PLUS_EXPR
, arg0
, arg1
);
16316 gimple_set_location (g
, gimple_location (stmt
));
16317 gsi_replace (gsi
, g
, true);
16320 /* Flavors of vec_sub. We deliberately don't expand
16321 P8V_BUILTIN_VSUBUQM. */
16322 case ALTIVEC_BUILTIN_VSUBUBM
:
16323 case ALTIVEC_BUILTIN_VSUBUHM
:
16324 case ALTIVEC_BUILTIN_VSUBUWM
:
16325 case P8V_BUILTIN_VSUBUDM
:
16326 case ALTIVEC_BUILTIN_VSUBFP
:
16327 case VSX_BUILTIN_XVSUBDP
:
16329 arg0
= gimple_call_arg (stmt
, 0);
16330 arg1
= gimple_call_arg (stmt
, 1);
16331 lhs
= gimple_call_lhs (stmt
);
16332 gimple
*g
= gimple_build_assign (lhs
, MINUS_EXPR
, arg0
, arg1
);
16333 gimple_set_location (g
, gimple_location (stmt
));
16334 gsi_replace (gsi
, g
, true);
16337 case VSX_BUILTIN_XVMULSP
:
16338 case VSX_BUILTIN_XVMULDP
:
16340 arg0
= gimple_call_arg (stmt
, 0);
16341 arg1
= gimple_call_arg (stmt
, 1);
16342 lhs
= gimple_call_lhs (stmt
);
16343 gimple
*g
= gimple_build_assign (lhs
, MULT_EXPR
, arg0
, arg1
);
16344 gimple_set_location (g
, gimple_location (stmt
));
16345 gsi_replace (gsi
, g
, true);
16348 /* Even element flavors of vec_mul (signed). */
16349 case ALTIVEC_BUILTIN_VMULESB
:
16350 case ALTIVEC_BUILTIN_VMULESH
:
16351 case ALTIVEC_BUILTIN_VMULESW
:
16352 /* Even element flavors of vec_mul (unsigned). */
16353 case ALTIVEC_BUILTIN_VMULEUB
:
16354 case ALTIVEC_BUILTIN_VMULEUH
:
16355 case ALTIVEC_BUILTIN_VMULEUW
:
16357 arg0
= gimple_call_arg (stmt
, 0);
16358 arg1
= gimple_call_arg (stmt
, 1);
16359 lhs
= gimple_call_lhs (stmt
);
16360 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_EVEN_EXPR
, arg0
, arg1
);
16361 gimple_set_location (g
, gimple_location (stmt
));
16362 gsi_replace (gsi
, g
, true);
16365 /* Odd element flavors of vec_mul (signed). */
16366 case ALTIVEC_BUILTIN_VMULOSB
:
16367 case ALTIVEC_BUILTIN_VMULOSH
:
16368 case ALTIVEC_BUILTIN_VMULOSW
:
16369 /* Odd element flavors of vec_mul (unsigned). */
16370 case ALTIVEC_BUILTIN_VMULOUB
:
16371 case ALTIVEC_BUILTIN_VMULOUH
:
16372 case ALTIVEC_BUILTIN_VMULOUW
:
16374 arg0
= gimple_call_arg (stmt
, 0);
16375 arg1
= gimple_call_arg (stmt
, 1);
16376 lhs
= gimple_call_lhs (stmt
);
16377 gimple
*g
= gimple_build_assign (lhs
, VEC_WIDEN_MULT_ODD_EXPR
, arg0
, arg1
);
16378 gimple_set_location (g
, gimple_location (stmt
));
16379 gsi_replace (gsi
, g
, true);
16382 /* Flavors of vec_div (Integer). */
16383 case VSX_BUILTIN_DIV_V2DI
:
16384 case VSX_BUILTIN_UDIV_V2DI
:
16386 arg0
= gimple_call_arg (stmt
, 0);
16387 arg1
= gimple_call_arg (stmt
, 1);
16388 lhs
= gimple_call_lhs (stmt
);
16389 gimple
*g
= gimple_build_assign (lhs
, TRUNC_DIV_EXPR
, arg0
, arg1
);
16390 gimple_set_location (g
, gimple_location (stmt
));
16391 gsi_replace (gsi
, g
, true);
16394 /* Flavors of vec_div (Float). */
16395 case VSX_BUILTIN_XVDIVSP
:
16396 case VSX_BUILTIN_XVDIVDP
:
16398 arg0
= gimple_call_arg (stmt
, 0);
16399 arg1
= gimple_call_arg (stmt
, 1);
16400 lhs
= gimple_call_lhs (stmt
);
16401 gimple
*g
= gimple_build_assign (lhs
, RDIV_EXPR
, arg0
, arg1
);
16402 gimple_set_location (g
, gimple_location (stmt
));
16403 gsi_replace (gsi
, g
, true);
16406 /* Flavors of vec_and. */
16407 case ALTIVEC_BUILTIN_VAND
:
16409 arg0
= gimple_call_arg (stmt
, 0);
16410 arg1
= gimple_call_arg (stmt
, 1);
16411 lhs
= gimple_call_lhs (stmt
);
16412 gimple
*g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, arg1
);
16413 gimple_set_location (g
, gimple_location (stmt
));
16414 gsi_replace (gsi
, g
, true);
16417 /* Flavors of vec_andc. */
16418 case ALTIVEC_BUILTIN_VANDC
:
16420 arg0
= gimple_call_arg (stmt
, 0);
16421 arg1
= gimple_call_arg (stmt
, 1);
16422 lhs
= gimple_call_lhs (stmt
);
16423 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
16424 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
16425 gimple_set_location (g
, gimple_location (stmt
));
16426 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
16427 g
= gimple_build_assign (lhs
, BIT_AND_EXPR
, arg0
, temp
);
16428 gimple_set_location (g
, gimple_location (stmt
));
16429 gsi_replace (gsi
, g
, true);
16432 /* Flavors of vec_nand. */
16433 case P8V_BUILTIN_VEC_NAND
:
16434 case P8V_BUILTIN_NAND_V16QI
:
16435 case P8V_BUILTIN_NAND_V8HI
:
16436 case P8V_BUILTIN_NAND_V4SI
:
16437 case P8V_BUILTIN_NAND_V4SF
:
16438 case P8V_BUILTIN_NAND_V2DF
:
16439 case P8V_BUILTIN_NAND_V2DI
:
16441 arg0
= gimple_call_arg (stmt
, 0);
16442 arg1
= gimple_call_arg (stmt
, 1);
16443 lhs
= gimple_call_lhs (stmt
);
16444 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
16445 gimple
*g
= gimple_build_assign(temp
, BIT_AND_EXPR
, arg0
, arg1
);
16446 gimple_set_location (g
, gimple_location (stmt
));
16447 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
16448 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
16449 gimple_set_location (g
, gimple_location (stmt
));
16450 gsi_replace (gsi
, g
, true);
16453 /* Flavors of vec_or. */
16454 case ALTIVEC_BUILTIN_VOR
:
16456 arg0
= gimple_call_arg (stmt
, 0);
16457 arg1
= gimple_call_arg (stmt
, 1);
16458 lhs
= gimple_call_lhs (stmt
);
16459 gimple
*g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, arg1
);
16460 gimple_set_location (g
, gimple_location (stmt
));
16461 gsi_replace (gsi
, g
, true);
16464 /* flavors of vec_orc. */
16465 case P8V_BUILTIN_ORC_V16QI
:
16466 case P8V_BUILTIN_ORC_V8HI
:
16467 case P8V_BUILTIN_ORC_V4SI
:
16468 case P8V_BUILTIN_ORC_V4SF
:
16469 case P8V_BUILTIN_ORC_V2DF
:
16470 case P8V_BUILTIN_ORC_V2DI
:
16472 arg0
= gimple_call_arg (stmt
, 0);
16473 arg1
= gimple_call_arg (stmt
, 1);
16474 lhs
= gimple_call_lhs (stmt
);
16475 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
16476 gimple
*g
= gimple_build_assign(temp
, BIT_NOT_EXPR
, arg1
);
16477 gimple_set_location (g
, gimple_location (stmt
));
16478 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
16479 g
= gimple_build_assign (lhs
, BIT_IOR_EXPR
, arg0
, temp
);
16480 gimple_set_location (g
, gimple_location (stmt
));
16481 gsi_replace (gsi
, g
, true);
16484 /* Flavors of vec_xor. */
16485 case ALTIVEC_BUILTIN_VXOR
:
16487 arg0
= gimple_call_arg (stmt
, 0);
16488 arg1
= gimple_call_arg (stmt
, 1);
16489 lhs
= gimple_call_lhs (stmt
);
16490 gimple
*g
= gimple_build_assign (lhs
, BIT_XOR_EXPR
, arg0
, arg1
);
16491 gimple_set_location (g
, gimple_location (stmt
));
16492 gsi_replace (gsi
, g
, true);
16495 /* Flavors of vec_nor. */
16496 case ALTIVEC_BUILTIN_VNOR
:
16498 arg0
= gimple_call_arg (stmt
, 0);
16499 arg1
= gimple_call_arg (stmt
, 1);
16500 lhs
= gimple_call_lhs (stmt
);
16501 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
16502 gimple
*g
= gimple_build_assign (temp
, BIT_IOR_EXPR
, arg0
, arg1
);
16503 gimple_set_location (g
, gimple_location (stmt
));
16504 gsi_insert_before(gsi
, g
, GSI_SAME_STMT
);
16505 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
16506 gimple_set_location (g
, gimple_location (stmt
));
16507 gsi_replace (gsi
, g
, true);
16510 /* flavors of vec_abs. */
16511 case ALTIVEC_BUILTIN_ABS_V16QI
:
16512 case ALTIVEC_BUILTIN_ABS_V8HI
:
16513 case ALTIVEC_BUILTIN_ABS_V4SI
:
16514 case ALTIVEC_BUILTIN_ABS_V4SF
:
16515 case P8V_BUILTIN_ABS_V2DI
:
16516 case VSX_BUILTIN_XVABSDP
:
16518 arg0
= gimple_call_arg (stmt
, 0);
16519 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0
)))
16520 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0
))))
16522 lhs
= gimple_call_lhs (stmt
);
16523 gimple
*g
= gimple_build_assign (lhs
, ABS_EXPR
, arg0
);
16524 gimple_set_location (g
, gimple_location (stmt
));
16525 gsi_replace (gsi
, g
, true);
16528 /* flavors of vec_min. */
16529 case VSX_BUILTIN_XVMINDP
:
16530 case P8V_BUILTIN_VMINSD
:
16531 case P8V_BUILTIN_VMINUD
:
16532 case ALTIVEC_BUILTIN_VMINSB
:
16533 case ALTIVEC_BUILTIN_VMINSH
:
16534 case ALTIVEC_BUILTIN_VMINSW
:
16535 case ALTIVEC_BUILTIN_VMINUB
:
16536 case ALTIVEC_BUILTIN_VMINUH
:
16537 case ALTIVEC_BUILTIN_VMINUW
:
16538 case ALTIVEC_BUILTIN_VMINFP
:
16540 arg0
= gimple_call_arg (stmt
, 0);
16541 arg1
= gimple_call_arg (stmt
, 1);
16542 lhs
= gimple_call_lhs (stmt
);
16543 gimple
*g
= gimple_build_assign (lhs
, MIN_EXPR
, arg0
, arg1
);
16544 gimple_set_location (g
, gimple_location (stmt
));
16545 gsi_replace (gsi
, g
, true);
16548 /* flavors of vec_max. */
16549 case VSX_BUILTIN_XVMAXDP
:
16550 case P8V_BUILTIN_VMAXSD
:
16551 case P8V_BUILTIN_VMAXUD
:
16552 case ALTIVEC_BUILTIN_VMAXSB
:
16553 case ALTIVEC_BUILTIN_VMAXSH
:
16554 case ALTIVEC_BUILTIN_VMAXSW
:
16555 case ALTIVEC_BUILTIN_VMAXUB
:
16556 case ALTIVEC_BUILTIN_VMAXUH
:
16557 case ALTIVEC_BUILTIN_VMAXUW
:
16558 case ALTIVEC_BUILTIN_VMAXFP
:
16560 arg0
= gimple_call_arg (stmt
, 0);
16561 arg1
= gimple_call_arg (stmt
, 1);
16562 lhs
= gimple_call_lhs (stmt
);
16563 gimple
*g
= gimple_build_assign (lhs
, MAX_EXPR
, arg0
, arg1
);
16564 gimple_set_location (g
, gimple_location (stmt
));
16565 gsi_replace (gsi
, g
, true);
16568 /* Flavors of vec_eqv. */
16569 case P8V_BUILTIN_EQV_V16QI
:
16570 case P8V_BUILTIN_EQV_V8HI
:
16571 case P8V_BUILTIN_EQV_V4SI
:
16572 case P8V_BUILTIN_EQV_V4SF
:
16573 case P8V_BUILTIN_EQV_V2DF
:
16574 case P8V_BUILTIN_EQV_V2DI
:
16576 arg0
= gimple_call_arg (stmt
, 0);
16577 arg1
= gimple_call_arg (stmt
, 1);
16578 lhs
= gimple_call_lhs (stmt
);
16579 tree temp
= create_tmp_reg_or_ssa_name (TREE_TYPE (arg1
));
16580 gimple
*g
= gimple_build_assign (temp
, BIT_XOR_EXPR
, arg0
, arg1
);
16581 gimple_set_location (g
, gimple_location (stmt
));
16582 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
16583 g
= gimple_build_assign (lhs
, BIT_NOT_EXPR
, temp
);
16584 gimple_set_location (g
, gimple_location (stmt
));
16585 gsi_replace (gsi
, g
, true);
16588 /* Flavors of vec_rotate_left. */
16589 case ALTIVEC_BUILTIN_VRLB
:
16590 case ALTIVEC_BUILTIN_VRLH
:
16591 case ALTIVEC_BUILTIN_VRLW
:
16592 case P8V_BUILTIN_VRLD
:
16594 arg0
= gimple_call_arg (stmt
, 0);
16595 arg1
= gimple_call_arg (stmt
, 1);
16596 lhs
= gimple_call_lhs (stmt
);
16597 gimple
*g
= gimple_build_assign (lhs
, LROTATE_EXPR
, arg0
, arg1
);
16598 gimple_set_location (g
, gimple_location (stmt
));
16599 gsi_replace (gsi
, g
, true);
16602 /* Flavors of vector shift right algebraic.
16603 vec_sra{b,h,w} -> vsra{b,h,w}. */
16604 case ALTIVEC_BUILTIN_VSRAB
:
16605 case ALTIVEC_BUILTIN_VSRAH
:
16606 case ALTIVEC_BUILTIN_VSRAW
:
16607 case P8V_BUILTIN_VSRAD
:
16609 arg0
= gimple_call_arg (stmt
, 0);
16610 arg1
= gimple_call_arg (stmt
, 1);
16611 lhs
= gimple_call_lhs (stmt
);
16612 gimple
*g
= gimple_build_assign (lhs
, RSHIFT_EXPR
, arg0
, arg1
);
16613 gimple_set_location (g
, gimple_location (stmt
));
16614 gsi_replace (gsi
, g
, true);
16617 /* Flavors of vector shift left.
16618 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
16619 case ALTIVEC_BUILTIN_VSLB
:
16620 case ALTIVEC_BUILTIN_VSLH
:
16621 case ALTIVEC_BUILTIN_VSLW
:
16622 case P8V_BUILTIN_VSLD
:
16624 arg0
= gimple_call_arg (stmt
, 0);
16625 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0
)))
16626 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0
))))
16628 arg1
= gimple_call_arg (stmt
, 1);
16629 lhs
= gimple_call_lhs (stmt
);
16630 gimple
*g
= gimple_build_assign (lhs
, LSHIFT_EXPR
, arg0
, arg1
);
16631 gimple_set_location (g
, gimple_location (stmt
));
16632 gsi_replace (gsi
, g
, true);
16635 /* Flavors of vector shift right. */
16636 case ALTIVEC_BUILTIN_VSRB
:
16637 case ALTIVEC_BUILTIN_VSRH
:
16638 case ALTIVEC_BUILTIN_VSRW
:
16639 case P8V_BUILTIN_VSRD
:
16641 arg0
= gimple_call_arg (stmt
, 0);
16642 arg1
= gimple_call_arg (stmt
, 1);
16643 lhs
= gimple_call_lhs (stmt
);
16644 gimple_seq stmts
= NULL
;
16645 /* Convert arg0 to unsigned. */
16647 = gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
16648 unsigned_type_for (TREE_TYPE (arg0
)), arg0
);
16650 = gimple_build (&stmts
, RSHIFT_EXPR
,
16651 TREE_TYPE (arg0_unsigned
), arg0_unsigned
, arg1
);
16652 /* Convert result back to the lhs type. */
16653 res
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
), res
);
16654 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
16655 update_call_from_tree (gsi
, res
);
16665 /* Expand an expression EXP that calls a built-in function,
16666 with result going to TARGET if that's convenient
16667 (and in mode MODE if that's convenient).
16668 SUBTARGET may be used as the target for computing one of EXP's operands.
16669 IGNORE is nonzero if the value is to be ignored. */
16672 rs6000_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
16673 machine_mode mode ATTRIBUTE_UNUSED
,
16674 int ignore ATTRIBUTE_UNUSED
)
16676 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
16677 enum rs6000_builtins fcode
16678 = (enum rs6000_builtins
)DECL_FUNCTION_CODE (fndecl
);
16679 size_t uns_fcode
= (size_t)fcode
;
16680 const struct builtin_description
*d
;
16684 HOST_WIDE_INT mask
= rs6000_builtin_info
[uns_fcode
].mask
;
16685 bool func_valid_p
= ((rs6000_builtin_mask
& mask
) == mask
);
16687 if (TARGET_DEBUG_BUILTIN
)
16689 enum insn_code icode
= rs6000_builtin_info
[uns_fcode
].icode
;
16690 const char *name1
= rs6000_builtin_info
[uns_fcode
].name
;
16691 const char *name2
= ((icode
!= CODE_FOR_nothing
)
16692 ? get_insn_name ((int)icode
)
16696 switch (rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
)
16698 default: name3
= "unknown"; break;
16699 case RS6000_BTC_SPECIAL
: name3
= "special"; break;
16700 case RS6000_BTC_UNARY
: name3
= "unary"; break;
16701 case RS6000_BTC_BINARY
: name3
= "binary"; break;
16702 case RS6000_BTC_TERNARY
: name3
= "ternary"; break;
16703 case RS6000_BTC_PREDICATE
: name3
= "predicate"; break;
16704 case RS6000_BTC_ABS
: name3
= "abs"; break;
16705 case RS6000_BTC_DST
: name3
= "dst"; break;
16710 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16711 (name1
) ? name1
: "---", fcode
,
16712 (name2
) ? name2
: "---", (int)icode
,
16714 func_valid_p
? "" : ", not valid");
16719 rs6000_invalid_builtin (fcode
);
16721 /* Given it is invalid, just generate a normal call. */
16722 return expand_call (exp
, target
, ignore
);
16727 case RS6000_BUILTIN_RECIP
:
16728 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3
, exp
, target
);
16730 case RS6000_BUILTIN_RECIPF
:
16731 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3
, exp
, target
);
16733 case RS6000_BUILTIN_RSQRTF
:
16734 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2
, exp
, target
);
16736 case RS6000_BUILTIN_RSQRT
:
16737 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2
, exp
, target
);
16739 case POWER7_BUILTIN_BPERMD
:
16740 return rs6000_expand_binop_builtin (((TARGET_64BIT
)
16741 ? CODE_FOR_bpermd_di
16742 : CODE_FOR_bpermd_si
), exp
, target
);
16744 case RS6000_BUILTIN_GET_TB
:
16745 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase
,
16748 case RS6000_BUILTIN_MFTB
:
16749 return rs6000_expand_zeroop_builtin (((TARGET_64BIT
)
16750 ? CODE_FOR_rs6000_mftb_di
16751 : CODE_FOR_rs6000_mftb_si
),
16754 case RS6000_BUILTIN_MFFS
:
16755 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs
, target
);
16757 case RS6000_BUILTIN_MTFSF
:
16758 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf
, exp
);
16760 case RS6000_BUILTIN_CPU_INIT
:
16761 case RS6000_BUILTIN_CPU_IS
:
16762 case RS6000_BUILTIN_CPU_SUPPORTS
:
16763 return cpu_expand_builtin (fcode
, exp
, target
);
16765 case ALTIVEC_BUILTIN_MASK_FOR_LOAD
:
16766 case ALTIVEC_BUILTIN_MASK_FOR_STORE
:
16768 int icode
= (BYTES_BIG_ENDIAN
? (int) CODE_FOR_altivec_lvsr_direct
16769 : (int) CODE_FOR_altivec_lvsl_direct
);
16770 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
16771 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
16775 gcc_assert (TARGET_ALTIVEC
);
16777 arg
= CALL_EXPR_ARG (exp
, 0);
16778 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
16779 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
16780 addr
= memory_address (mode
, op
);
16781 if (fcode
== ALTIVEC_BUILTIN_MASK_FOR_STORE
)
16785 /* For the load case need to negate the address. */
16786 op
= gen_reg_rtx (GET_MODE (addr
));
16787 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
16789 op
= gen_rtx_MEM (mode
, op
);
16792 || GET_MODE (target
) != tmode
16793 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16794 target
= gen_reg_rtx (tmode
);
16796 pat
= GEN_FCN (icode
) (target
, op
);
16804 case ALTIVEC_BUILTIN_VCFUX
:
16805 case ALTIVEC_BUILTIN_VCFSX
:
16806 case ALTIVEC_BUILTIN_VCTUXS
:
16807 case ALTIVEC_BUILTIN_VCTSXS
:
16808 /* FIXME: There's got to be a nicer way to handle this case than
16809 constructing a new CALL_EXPR. */
16810 if (call_expr_nargs (exp
) == 1)
16812 exp
= build_call_nary (TREE_TYPE (exp
), CALL_EXPR_FN (exp
),
16813 2, CALL_EXPR_ARG (exp
, 0), integer_zero_node
);
16821 if (TARGET_ALTIVEC
)
16823 ret
= altivec_expand_builtin (exp
, target
, &success
);
16828 if (TARGET_PAIRED_FLOAT
)
16830 ret
= paired_expand_builtin (exp
, target
, &success
);
16837 ret
= htm_expand_builtin (exp
, target
, &success
);
16843 unsigned attr
= rs6000_builtin_info
[uns_fcode
].attr
& RS6000_BTC_TYPE_MASK
;
16844 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16845 gcc_assert (attr
== RS6000_BTC_UNARY
16846 || attr
== RS6000_BTC_BINARY
16847 || attr
== RS6000_BTC_TERNARY
16848 || attr
== RS6000_BTC_SPECIAL
);
16850 /* Handle simple unary operations. */
16852 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16853 if (d
->code
== fcode
)
16854 return rs6000_expand_unop_builtin (d
->icode
, exp
, target
);
16856 /* Handle simple binary operations. */
16858 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16859 if (d
->code
== fcode
)
16860 return rs6000_expand_binop_builtin (d
->icode
, exp
, target
);
16862 /* Handle simple ternary operations. */
16864 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
16865 if (d
->code
== fcode
)
16866 return rs6000_expand_ternop_builtin (d
->icode
, exp
, target
);
16868 /* Handle simple no-argument operations. */
16870 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
16871 if (d
->code
== fcode
)
16872 return rs6000_expand_zeroop_builtin (d
->icode
, target
);
16874 gcc_unreachable ();
16877 /* Create a builtin vector type with a name. Taking care not to give
16878 the canonical type a name. */
16881 rs6000_vector_type (const char *name
, tree elt_type
, unsigned num_elts
)
16883 tree result
= build_vector_type (elt_type
, num_elts
);
16885 /* Copy so we don't give the canonical type a name. */
16886 result
= build_variant_type_copy (result
);
16888 add_builtin_type (name
, result
);
16894 rs6000_init_builtins (void)
16900 if (TARGET_DEBUG_BUILTIN
)
16901 fprintf (stderr
, "rs6000_init_builtins%s%s%s\n",
16902 (TARGET_PAIRED_FLOAT
) ? ", paired" : "",
16903 (TARGET_ALTIVEC
) ? ", altivec" : "",
16904 (TARGET_VSX
) ? ", vsx" : "");
16906 V2SI_type_node
= build_vector_type (intSI_type_node
, 2);
16907 V2SF_type_node
= build_vector_type (float_type_node
, 2);
16908 V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
? "__vector long"
16909 : "__vector long long",
16910 intDI_type_node
, 2);
16911 V2DF_type_node
= rs6000_vector_type ("__vector double", double_type_node
, 2);
16912 V4SI_type_node
= rs6000_vector_type ("__vector signed int",
16913 intSI_type_node
, 4);
16914 V4SF_type_node
= rs6000_vector_type ("__vector float", float_type_node
, 4);
16915 V8HI_type_node
= rs6000_vector_type ("__vector signed short",
16916 intHI_type_node
, 8);
16917 V16QI_type_node
= rs6000_vector_type ("__vector signed char",
16918 intQI_type_node
, 16);
16920 unsigned_V16QI_type_node
= rs6000_vector_type ("__vector unsigned char",
16921 unsigned_intQI_type_node
, 16);
16922 unsigned_V8HI_type_node
= rs6000_vector_type ("__vector unsigned short",
16923 unsigned_intHI_type_node
, 8);
16924 unsigned_V4SI_type_node
= rs6000_vector_type ("__vector unsigned int",
16925 unsigned_intSI_type_node
, 4);
16926 unsigned_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
16927 ? "__vector unsigned long"
16928 : "__vector unsigned long long",
16929 unsigned_intDI_type_node
, 2);
16931 opaque_V2SF_type_node
= build_opaque_vector_type (float_type_node
, 2);
16932 opaque_V2SI_type_node
= build_opaque_vector_type (intSI_type_node
, 2);
16933 opaque_p_V2SI_type_node
= build_pointer_type (opaque_V2SI_type_node
);
16934 opaque_V4SI_type_node
= build_opaque_vector_type (intSI_type_node
, 4);
16936 const_str_type_node
16937 = build_pointer_type (build_qualified_type (char_type_node
,
16940 /* We use V1TI mode as a special container to hold __int128_t items that
16941 must live in VSX registers. */
16942 if (intTI_type_node
)
16944 V1TI_type_node
= rs6000_vector_type ("__vector __int128",
16945 intTI_type_node
, 1);
16946 unsigned_V1TI_type_node
16947 = rs6000_vector_type ("__vector unsigned __int128",
16948 unsigned_intTI_type_node
, 1);
16951 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16952 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16953 'vector unsigned short'. */
16955 bool_char_type_node
= build_distinct_type_copy (unsigned_intQI_type_node
);
16956 bool_short_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
16957 bool_int_type_node
= build_distinct_type_copy (unsigned_intSI_type_node
);
16958 bool_long_type_node
= build_distinct_type_copy (unsigned_intDI_type_node
);
16959 pixel_type_node
= build_distinct_type_copy (unsigned_intHI_type_node
);
16961 long_integer_type_internal_node
= long_integer_type_node
;
16962 long_unsigned_type_internal_node
= long_unsigned_type_node
;
16963 long_long_integer_type_internal_node
= long_long_integer_type_node
;
16964 long_long_unsigned_type_internal_node
= long_long_unsigned_type_node
;
16965 intQI_type_internal_node
= intQI_type_node
;
16966 uintQI_type_internal_node
= unsigned_intQI_type_node
;
16967 intHI_type_internal_node
= intHI_type_node
;
16968 uintHI_type_internal_node
= unsigned_intHI_type_node
;
16969 intSI_type_internal_node
= intSI_type_node
;
16970 uintSI_type_internal_node
= unsigned_intSI_type_node
;
16971 intDI_type_internal_node
= intDI_type_node
;
16972 uintDI_type_internal_node
= unsigned_intDI_type_node
;
16973 intTI_type_internal_node
= intTI_type_node
;
16974 uintTI_type_internal_node
= unsigned_intTI_type_node
;
16975 float_type_internal_node
= float_type_node
;
16976 double_type_internal_node
= double_type_node
;
16977 long_double_type_internal_node
= long_double_type_node
;
16978 dfloat64_type_internal_node
= dfloat64_type_node
;
16979 dfloat128_type_internal_node
= dfloat128_type_node
;
16980 void_type_internal_node
= void_type_node
;
16982 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16983 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16984 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16985 format that uses a pair of doubles, depending on the switches and
16988 We do not enable the actual __float128 keyword unless the user explicitly
16989 asks for it, because the library support is not yet complete.
16991 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16992 floating point, we need make sure the type is non-zero or else self-test
16993 fails during bootstrap.
16995 We don't register a built-in type for __ibm128 if the type is the same as
16996 long double. Instead we add a #define for __ibm128 in
16997 rs6000_cpu_cpp_builtins to long double. */
16998 if (TARGET_LONG_DOUBLE_128
&& FLOAT128_IEEE_P (TFmode
))
17000 ibm128_float_type_node
= make_node (REAL_TYPE
);
17001 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17002 SET_TYPE_MODE (ibm128_float_type_node
, IFmode
);
17003 layout_type (ibm128_float_type_node
);
17005 lang_hooks
.types
.register_builtin_type (ibm128_float_type_node
,
17009 ibm128_float_type_node
= long_double_type_node
;
17011 if (TARGET_FLOAT128_KEYWORD
)
17013 ieee128_float_type_node
= float128_type_node
;
17014 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17018 else if (TARGET_FLOAT128_TYPE
)
17020 ieee128_float_type_node
= make_node (REAL_TYPE
);
17021 TYPE_PRECISION (ibm128_float_type_node
) = 128;
17022 SET_TYPE_MODE (ieee128_float_type_node
, KFmode
);
17023 layout_type (ieee128_float_type_node
);
17025 /* If we are not exporting the __float128/_Float128 keywords, we need a
17026 keyword to get the types created. Use __ieee128 as the dummy
17028 lang_hooks
.types
.register_builtin_type (ieee128_float_type_node
,
17033 ieee128_float_type_node
= long_double_type_node
;
17035 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17037 builtin_mode_to_type
[QImode
][0] = integer_type_node
;
17038 builtin_mode_to_type
[HImode
][0] = integer_type_node
;
17039 builtin_mode_to_type
[SImode
][0] = intSI_type_node
;
17040 builtin_mode_to_type
[SImode
][1] = unsigned_intSI_type_node
;
17041 builtin_mode_to_type
[DImode
][0] = intDI_type_node
;
17042 builtin_mode_to_type
[DImode
][1] = unsigned_intDI_type_node
;
17043 builtin_mode_to_type
[TImode
][0] = intTI_type_node
;
17044 builtin_mode_to_type
[TImode
][1] = unsigned_intTI_type_node
;
17045 builtin_mode_to_type
[SFmode
][0] = float_type_node
;
17046 builtin_mode_to_type
[DFmode
][0] = double_type_node
;
17047 builtin_mode_to_type
[IFmode
][0] = ibm128_float_type_node
;
17048 builtin_mode_to_type
[KFmode
][0] = ieee128_float_type_node
;
17049 builtin_mode_to_type
[TFmode
][0] = long_double_type_node
;
17050 builtin_mode_to_type
[DDmode
][0] = dfloat64_type_node
;
17051 builtin_mode_to_type
[TDmode
][0] = dfloat128_type_node
;
17052 builtin_mode_to_type
[V1TImode
][0] = V1TI_type_node
;
17053 builtin_mode_to_type
[V1TImode
][1] = unsigned_V1TI_type_node
;
17054 builtin_mode_to_type
[V2SImode
][0] = V2SI_type_node
;
17055 builtin_mode_to_type
[V2SFmode
][0] = V2SF_type_node
;
17056 builtin_mode_to_type
[V2DImode
][0] = V2DI_type_node
;
17057 builtin_mode_to_type
[V2DImode
][1] = unsigned_V2DI_type_node
;
17058 builtin_mode_to_type
[V2DFmode
][0] = V2DF_type_node
;
17059 builtin_mode_to_type
[V4SImode
][0] = V4SI_type_node
;
17060 builtin_mode_to_type
[V4SImode
][1] = unsigned_V4SI_type_node
;
17061 builtin_mode_to_type
[V4SFmode
][0] = V4SF_type_node
;
17062 builtin_mode_to_type
[V8HImode
][0] = V8HI_type_node
;
17063 builtin_mode_to_type
[V8HImode
][1] = unsigned_V8HI_type_node
;
17064 builtin_mode_to_type
[V16QImode
][0] = V16QI_type_node
;
17065 builtin_mode_to_type
[V16QImode
][1] = unsigned_V16QI_type_node
;
17067 tdecl
= add_builtin_type ("__bool char", bool_char_type_node
);
17068 TYPE_NAME (bool_char_type_node
) = tdecl
;
17070 tdecl
= add_builtin_type ("__bool short", bool_short_type_node
);
17071 TYPE_NAME (bool_short_type_node
) = tdecl
;
17073 tdecl
= add_builtin_type ("__bool int", bool_int_type_node
);
17074 TYPE_NAME (bool_int_type_node
) = tdecl
;
17076 tdecl
= add_builtin_type ("__pixel", pixel_type_node
);
17077 TYPE_NAME (pixel_type_node
) = tdecl
;
17079 bool_V16QI_type_node
= rs6000_vector_type ("__vector __bool char",
17080 bool_char_type_node
, 16);
17081 bool_V8HI_type_node
= rs6000_vector_type ("__vector __bool short",
17082 bool_short_type_node
, 8);
17083 bool_V4SI_type_node
= rs6000_vector_type ("__vector __bool int",
17084 bool_int_type_node
, 4);
17085 bool_V2DI_type_node
= rs6000_vector_type (TARGET_POWERPC64
17086 ? "__vector __bool long"
17087 : "__vector __bool long long",
17088 bool_long_type_node
, 2);
17089 pixel_V8HI_type_node
= rs6000_vector_type ("__vector __pixel",
17090 pixel_type_node
, 8);
17092 /* Paired builtins are only available if you build a compiler with the
17093 appropriate options, so only create those builtins with the appropriate
17094 compiler option. Create Altivec and VSX builtins on machines with at
17095 least the general purpose extensions (970 and newer) to allow the use of
17096 the target attribute. */
17097 if (TARGET_PAIRED_FLOAT
)
17098 paired_init_builtins ();
17099 if (TARGET_EXTRA_BUILTINS
)
17100 altivec_init_builtins ();
17102 htm_init_builtins ();
17104 if (TARGET_EXTRA_BUILTINS
|| TARGET_PAIRED_FLOAT
)
17105 rs6000_common_init_builtins ();
17107 ftype
= build_function_type_list (ieee128_float_type_node
,
17108 const_str_type_node
, NULL_TREE
);
17109 def_builtin ("__builtin_nanq", ftype
, RS6000_BUILTIN_NANQ
);
17110 def_builtin ("__builtin_nansq", ftype
, RS6000_BUILTIN_NANSQ
);
17112 ftype
= build_function_type_list (ieee128_float_type_node
, NULL_TREE
);
17113 def_builtin ("__builtin_infq", ftype
, RS6000_BUILTIN_INFQ
);
17114 def_builtin ("__builtin_huge_valq", ftype
, RS6000_BUILTIN_HUGE_VALQ
);
17116 ftype
= builtin_function_type (DFmode
, DFmode
, DFmode
, VOIDmode
,
17117 RS6000_BUILTIN_RECIP
, "__builtin_recipdiv");
17118 def_builtin ("__builtin_recipdiv", ftype
, RS6000_BUILTIN_RECIP
);
17120 ftype
= builtin_function_type (SFmode
, SFmode
, SFmode
, VOIDmode
,
17121 RS6000_BUILTIN_RECIPF
, "__builtin_recipdivf");
17122 def_builtin ("__builtin_recipdivf", ftype
, RS6000_BUILTIN_RECIPF
);
17124 ftype
= builtin_function_type (DFmode
, DFmode
, VOIDmode
, VOIDmode
,
17125 RS6000_BUILTIN_RSQRT
, "__builtin_rsqrt");
17126 def_builtin ("__builtin_rsqrt", ftype
, RS6000_BUILTIN_RSQRT
);
17128 ftype
= builtin_function_type (SFmode
, SFmode
, VOIDmode
, VOIDmode
,
17129 RS6000_BUILTIN_RSQRTF
, "__builtin_rsqrtf");
17130 def_builtin ("__builtin_rsqrtf", ftype
, RS6000_BUILTIN_RSQRTF
);
17132 mode
= (TARGET_64BIT
) ? DImode
: SImode
;
17133 ftype
= builtin_function_type (mode
, mode
, mode
, VOIDmode
,
17134 POWER7_BUILTIN_BPERMD
, "__builtin_bpermd");
17135 def_builtin ("__builtin_bpermd", ftype
, POWER7_BUILTIN_BPERMD
);
17137 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17139 def_builtin ("__builtin_ppc_get_timebase", ftype
, RS6000_BUILTIN_GET_TB
);
17142 ftype
= build_function_type_list (unsigned_intDI_type_node
,
17145 ftype
= build_function_type_list (unsigned_intSI_type_node
,
17147 def_builtin ("__builtin_ppc_mftb", ftype
, RS6000_BUILTIN_MFTB
);
17149 ftype
= build_function_type_list (double_type_node
, NULL_TREE
);
17150 def_builtin ("__builtin_mffs", ftype
, RS6000_BUILTIN_MFFS
);
17152 ftype
= build_function_type_list (void_type_node
,
17153 intSI_type_node
, double_type_node
,
17155 def_builtin ("__builtin_mtfsf", ftype
, RS6000_BUILTIN_MTFSF
);
17157 ftype
= build_function_type_list (void_type_node
, NULL_TREE
);
17158 def_builtin ("__builtin_cpu_init", ftype
, RS6000_BUILTIN_CPU_INIT
);
17160 ftype
= build_function_type_list (bool_int_type_node
, const_ptr_type_node
,
17162 def_builtin ("__builtin_cpu_is", ftype
, RS6000_BUILTIN_CPU_IS
);
17163 def_builtin ("__builtin_cpu_supports", ftype
, RS6000_BUILTIN_CPU_SUPPORTS
);
17165 /* AIX libm provides clog as __clog. */
17166 if (TARGET_XCOFF
&&
17167 (tdecl
= builtin_decl_explicit (BUILT_IN_CLOG
)) != NULL_TREE
)
17168 set_user_assembler_name (tdecl
, "__clog");
17170 #ifdef SUBTARGET_INIT_BUILTINS
17171 SUBTARGET_INIT_BUILTINS
;
17175 /* Returns the rs6000 builtin decl for CODE. */
17178 rs6000_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
17180 HOST_WIDE_INT fnmask
;
17182 if (code
>= RS6000_BUILTIN_COUNT
)
17183 return error_mark_node
;
17185 fnmask
= rs6000_builtin_info
[code
].mask
;
17186 if ((fnmask
& rs6000_builtin_mask
) != fnmask
)
17188 rs6000_invalid_builtin ((enum rs6000_builtins
)code
);
17189 return error_mark_node
;
17192 return rs6000_builtin_decls
[code
];
17196 paired_init_builtins (void)
17198 const struct builtin_description
*d
;
17200 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17202 tree int_ftype_int_v2sf_v2sf
17203 = build_function_type_list (integer_type_node
,
17208 tree pcfloat_type_node
=
17209 build_pointer_type (build_qualified_type
17210 (float_type_node
, TYPE_QUAL_CONST
));
17212 tree v2sf_ftype_long_pcfloat
= build_function_type_list (V2SF_type_node
,
17213 long_integer_type_node
,
17216 tree void_ftype_v2sf_long_pcfloat
=
17217 build_function_type_list (void_type_node
,
17219 long_integer_type_node
,
17224 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat
,
17225 PAIRED_BUILTIN_LX
);
17228 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat
,
17229 PAIRED_BUILTIN_STX
);
17232 d
= bdesc_paired_preds
;
17233 for (i
= 0; i
< ARRAY_SIZE (bdesc_paired_preds
); ++i
, d
++)
17236 HOST_WIDE_INT mask
= d
->mask
;
17238 if ((mask
& builtin_mask
) != mask
)
17240 if (TARGET_DEBUG_BUILTIN
)
17241 fprintf (stderr
, "paired_init_builtins, skip predicate %s\n",
17246 /* Cannot define builtin if the instruction is disabled. */
17247 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17249 if (TARGET_DEBUG_BUILTIN
)
17250 fprintf (stderr
, "paired pred #%d, insn = %s [%d], mode = %s\n",
17251 (int)i
, get_insn_name (d
->icode
), (int)d
->icode
,
17252 GET_MODE_NAME (insn_data
[d
->icode
].operand
[1].mode
));
17254 switch (insn_data
[d
->icode
].operand
[1].mode
)
17257 type
= int_ftype_int_v2sf_v2sf
;
17260 gcc_unreachable ();
17263 def_builtin (d
->name
, type
, d
->code
);
17268 altivec_init_builtins (void)
17270 const struct builtin_description
*d
;
17274 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17276 tree pvoid_type_node
= build_pointer_type (void_type_node
);
17278 tree pcvoid_type_node
17279 = build_pointer_type (build_qualified_type (void_type_node
,
17282 tree int_ftype_opaque
17283 = build_function_type_list (integer_type_node
,
17284 opaque_V4SI_type_node
, NULL_TREE
);
17285 tree opaque_ftype_opaque
17286 = build_function_type_list (integer_type_node
, NULL_TREE
);
17287 tree opaque_ftype_opaque_int
17288 = build_function_type_list (opaque_V4SI_type_node
,
17289 opaque_V4SI_type_node
, integer_type_node
, NULL_TREE
);
17290 tree opaque_ftype_opaque_opaque_int
17291 = build_function_type_list (opaque_V4SI_type_node
,
17292 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17293 integer_type_node
, NULL_TREE
);
17294 tree opaque_ftype_opaque_opaque_opaque
17295 = build_function_type_list (opaque_V4SI_type_node
,
17296 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17297 opaque_V4SI_type_node
, NULL_TREE
);
17298 tree opaque_ftype_opaque_opaque
17299 = build_function_type_list (opaque_V4SI_type_node
,
17300 opaque_V4SI_type_node
, opaque_V4SI_type_node
,
17302 tree int_ftype_int_opaque_opaque
17303 = build_function_type_list (integer_type_node
,
17304 integer_type_node
, opaque_V4SI_type_node
,
17305 opaque_V4SI_type_node
, NULL_TREE
);
17306 tree int_ftype_int_v4si_v4si
17307 = build_function_type_list (integer_type_node
,
17308 integer_type_node
, V4SI_type_node
,
17309 V4SI_type_node
, NULL_TREE
);
17310 tree int_ftype_int_v2di_v2di
17311 = build_function_type_list (integer_type_node
,
17312 integer_type_node
, V2DI_type_node
,
17313 V2DI_type_node
, NULL_TREE
);
17314 tree void_ftype_v4si
17315 = build_function_type_list (void_type_node
, V4SI_type_node
, NULL_TREE
);
17316 tree v8hi_ftype_void
17317 = build_function_type_list (V8HI_type_node
, NULL_TREE
);
17318 tree void_ftype_void
17319 = build_function_type_list (void_type_node
, NULL_TREE
);
17320 tree void_ftype_int
17321 = build_function_type_list (void_type_node
, integer_type_node
, NULL_TREE
);
17323 tree opaque_ftype_long_pcvoid
17324 = build_function_type_list (opaque_V4SI_type_node
,
17325 long_integer_type_node
, pcvoid_type_node
,
17327 tree v16qi_ftype_long_pcvoid
17328 = build_function_type_list (V16QI_type_node
,
17329 long_integer_type_node
, pcvoid_type_node
,
17331 tree v8hi_ftype_long_pcvoid
17332 = build_function_type_list (V8HI_type_node
,
17333 long_integer_type_node
, pcvoid_type_node
,
17335 tree v4si_ftype_long_pcvoid
17336 = build_function_type_list (V4SI_type_node
,
17337 long_integer_type_node
, pcvoid_type_node
,
17339 tree v4sf_ftype_long_pcvoid
17340 = build_function_type_list (V4SF_type_node
,
17341 long_integer_type_node
, pcvoid_type_node
,
17343 tree v2df_ftype_long_pcvoid
17344 = build_function_type_list (V2DF_type_node
,
17345 long_integer_type_node
, pcvoid_type_node
,
17347 tree v2di_ftype_long_pcvoid
17348 = build_function_type_list (V2DI_type_node
,
17349 long_integer_type_node
, pcvoid_type_node
,
17352 tree void_ftype_opaque_long_pvoid
17353 = build_function_type_list (void_type_node
,
17354 opaque_V4SI_type_node
, long_integer_type_node
,
17355 pvoid_type_node
, NULL_TREE
);
17356 tree void_ftype_v4si_long_pvoid
17357 = build_function_type_list (void_type_node
,
17358 V4SI_type_node
, long_integer_type_node
,
17359 pvoid_type_node
, NULL_TREE
);
17360 tree void_ftype_v16qi_long_pvoid
17361 = build_function_type_list (void_type_node
,
17362 V16QI_type_node
, long_integer_type_node
,
17363 pvoid_type_node
, NULL_TREE
);
17365 tree void_ftype_v16qi_pvoid_long
17366 = build_function_type_list (void_type_node
,
17367 V16QI_type_node
, pvoid_type_node
,
17368 long_integer_type_node
, NULL_TREE
);
17370 tree void_ftype_v8hi_long_pvoid
17371 = build_function_type_list (void_type_node
,
17372 V8HI_type_node
, long_integer_type_node
,
17373 pvoid_type_node
, NULL_TREE
);
17374 tree void_ftype_v4sf_long_pvoid
17375 = build_function_type_list (void_type_node
,
17376 V4SF_type_node
, long_integer_type_node
,
17377 pvoid_type_node
, NULL_TREE
);
17378 tree void_ftype_v2df_long_pvoid
17379 = build_function_type_list (void_type_node
,
17380 V2DF_type_node
, long_integer_type_node
,
17381 pvoid_type_node
, NULL_TREE
);
17382 tree void_ftype_v2di_long_pvoid
17383 = build_function_type_list (void_type_node
,
17384 V2DI_type_node
, long_integer_type_node
,
17385 pvoid_type_node
, NULL_TREE
);
17386 tree int_ftype_int_v8hi_v8hi
17387 = build_function_type_list (integer_type_node
,
17388 integer_type_node
, V8HI_type_node
,
17389 V8HI_type_node
, NULL_TREE
);
17390 tree int_ftype_int_v16qi_v16qi
17391 = build_function_type_list (integer_type_node
,
17392 integer_type_node
, V16QI_type_node
,
17393 V16QI_type_node
, NULL_TREE
);
17394 tree int_ftype_int_v4sf_v4sf
17395 = build_function_type_list (integer_type_node
,
17396 integer_type_node
, V4SF_type_node
,
17397 V4SF_type_node
, NULL_TREE
);
17398 tree int_ftype_int_v2df_v2df
17399 = build_function_type_list (integer_type_node
,
17400 integer_type_node
, V2DF_type_node
,
17401 V2DF_type_node
, NULL_TREE
);
17402 tree v2di_ftype_v2di
17403 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
17404 tree v4si_ftype_v4si
17405 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17406 tree v8hi_ftype_v8hi
17407 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17408 tree v16qi_ftype_v16qi
17409 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17410 tree v4sf_ftype_v4sf
17411 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
17412 tree v2df_ftype_v2df
17413 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
17414 tree void_ftype_pcvoid_int_int
17415 = build_function_type_list (void_type_node
,
17416 pcvoid_type_node
, integer_type_node
,
17417 integer_type_node
, NULL_TREE
);
17419 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si
, ALTIVEC_BUILTIN_MTVSCR
);
17420 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void
, ALTIVEC_BUILTIN_MFVSCR
);
17421 def_builtin ("__builtin_altivec_dssall", void_ftype_void
, ALTIVEC_BUILTIN_DSSALL
);
17422 def_builtin ("__builtin_altivec_dss", void_ftype_int
, ALTIVEC_BUILTIN_DSS
);
17423 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSL
);
17424 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVSR
);
17425 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEBX
);
17426 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEHX
);
17427 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVEWX
);
17428 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVXL
);
17429 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid
,
17430 ALTIVEC_BUILTIN_LVXL_V2DF
);
17431 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid
,
17432 ALTIVEC_BUILTIN_LVXL_V2DI
);
17433 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid
,
17434 ALTIVEC_BUILTIN_LVXL_V4SF
);
17435 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid
,
17436 ALTIVEC_BUILTIN_LVXL_V4SI
);
17437 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid
,
17438 ALTIVEC_BUILTIN_LVXL_V8HI
);
17439 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid
,
17440 ALTIVEC_BUILTIN_LVXL_V16QI
);
17441 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVX
);
17442 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid
,
17443 ALTIVEC_BUILTIN_LVX_V2DF
);
17444 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid
,
17445 ALTIVEC_BUILTIN_LVX_V2DI
);
17446 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid
,
17447 ALTIVEC_BUILTIN_LVX_V4SF
);
17448 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid
,
17449 ALTIVEC_BUILTIN_LVX_V4SI
);
17450 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid
,
17451 ALTIVEC_BUILTIN_LVX_V8HI
);
17452 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid
,
17453 ALTIVEC_BUILTIN_LVX_V16QI
);
17454 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVX
);
17455 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid
,
17456 ALTIVEC_BUILTIN_STVX_V2DF
);
17457 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid
,
17458 ALTIVEC_BUILTIN_STVX_V2DI
);
17459 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid
,
17460 ALTIVEC_BUILTIN_STVX_V4SF
);
17461 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid
,
17462 ALTIVEC_BUILTIN_STVX_V4SI
);
17463 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid
,
17464 ALTIVEC_BUILTIN_STVX_V8HI
);
17465 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid
,
17466 ALTIVEC_BUILTIN_STVX_V16QI
);
17467 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVEWX
);
17468 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid
, ALTIVEC_BUILTIN_STVXL
);
17469 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid
,
17470 ALTIVEC_BUILTIN_STVXL_V2DF
);
17471 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid
,
17472 ALTIVEC_BUILTIN_STVXL_V2DI
);
17473 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid
,
17474 ALTIVEC_BUILTIN_STVXL_V4SF
);
17475 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid
,
17476 ALTIVEC_BUILTIN_STVXL_V4SI
);
17477 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid
,
17478 ALTIVEC_BUILTIN_STVXL_V8HI
);
17479 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid
,
17480 ALTIVEC_BUILTIN_STVXL_V16QI
);
17481 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVEBX
);
17482 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid
, ALTIVEC_BUILTIN_STVEHX
);
17483 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LD
);
17484 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDE
);
17485 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LDL
);
17486 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSL
);
17487 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVSR
);
17488 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEBX
);
17489 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEHX
);
17490 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVEWX
);
17491 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_ST
);
17492 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STE
);
17493 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STL
);
17494 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEWX
);
17495 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEBX
);
17496 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVEHX
);
17498 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid
,
17499 VSX_BUILTIN_LXVD2X_V2DF
);
17500 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid
,
17501 VSX_BUILTIN_LXVD2X_V2DI
);
17502 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid
,
17503 VSX_BUILTIN_LXVW4X_V4SF
);
17504 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid
,
17505 VSX_BUILTIN_LXVW4X_V4SI
);
17506 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid
,
17507 VSX_BUILTIN_LXVW4X_V8HI
);
17508 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid
,
17509 VSX_BUILTIN_LXVW4X_V16QI
);
17510 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid
,
17511 VSX_BUILTIN_STXVD2X_V2DF
);
17512 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid
,
17513 VSX_BUILTIN_STXVD2X_V2DI
);
17514 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid
,
17515 VSX_BUILTIN_STXVW4X_V4SF
);
17516 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid
,
17517 VSX_BUILTIN_STXVW4X_V4SI
);
17518 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid
,
17519 VSX_BUILTIN_STXVW4X_V8HI
);
17520 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid
,
17521 VSX_BUILTIN_STXVW4X_V16QI
);
17523 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid
,
17524 VSX_BUILTIN_LD_ELEMREV_V2DF
);
17525 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid
,
17526 VSX_BUILTIN_LD_ELEMREV_V2DI
);
17527 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid
,
17528 VSX_BUILTIN_LD_ELEMREV_V4SF
);
17529 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid
,
17530 VSX_BUILTIN_LD_ELEMREV_V4SI
);
17531 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid
,
17532 VSX_BUILTIN_ST_ELEMREV_V2DF
);
17533 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid
,
17534 VSX_BUILTIN_ST_ELEMREV_V2DI
);
17535 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid
,
17536 VSX_BUILTIN_ST_ELEMREV_V4SF
);
17537 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid
,
17538 VSX_BUILTIN_ST_ELEMREV_V4SI
);
17540 if (TARGET_P9_VECTOR
)
17542 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid
,
17543 VSX_BUILTIN_LD_ELEMREV_V8HI
);
17544 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid
,
17545 VSX_BUILTIN_LD_ELEMREV_V16QI
);
17546 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17547 void_ftype_v8hi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V8HI
);
17548 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17549 void_ftype_v16qi_long_pvoid
, VSX_BUILTIN_ST_ELEMREV_V16QI
);
17553 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V8HI
]
17554 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V8HI
];
17555 rs6000_builtin_decls
[(int) VSX_BUILTIN_LD_ELEMREV_V16QI
]
17556 = rs6000_builtin_decls
[(int) VSX_BUILTIN_LXVW4X_V16QI
];
17557 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V8HI
]
17558 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V8HI
];
17559 rs6000_builtin_decls
[(int) VSX_BUILTIN_ST_ELEMREV_V16QI
]
17560 = rs6000_builtin_decls
[(int) VSX_BUILTIN_STXVW4X_V16QI
];
17563 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid
,
17564 VSX_BUILTIN_VEC_LD
);
17565 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid
,
17566 VSX_BUILTIN_VEC_ST
);
17567 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid
,
17568 VSX_BUILTIN_VEC_XL
);
17569 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid
,
17570 VSX_BUILTIN_VEC_XST
);
17572 def_builtin ("__builtin_vec_step", int_ftype_opaque
, ALTIVEC_BUILTIN_VEC_STEP
);
17573 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_SPLATS
);
17574 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque
, ALTIVEC_BUILTIN_VEC_PROMOTE
);
17576 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_SLD
);
17577 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_SPLAT
);
17578 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_EXTRACT
);
17579 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int
, ALTIVEC_BUILTIN_VEC_INSERT
);
17580 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTW
);
17581 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTH
);
17582 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VSPLTB
);
17583 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTF
);
17584 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFSX
);
17585 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_VCFUX
);
17586 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTS
);
17587 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int
, ALTIVEC_BUILTIN_VEC_CTU
);
17589 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque
,
17590 ALTIVEC_BUILTIN_VEC_ADDE
);
17591 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque
,
17592 ALTIVEC_BUILTIN_VEC_ADDEC
);
17593 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque
,
17594 ALTIVEC_BUILTIN_VEC_CMPNE
);
17595 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque
,
17596 ALTIVEC_BUILTIN_VEC_MUL
);
17598 /* Cell builtins. */
17599 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLX
);
17600 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVLXL
);
17601 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRX
);
17602 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_LVRXL
);
17604 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLX
);
17605 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVLXL
);
17606 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRX
);
17607 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid
, ALTIVEC_BUILTIN_VEC_LVRXL
);
17609 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLX
);
17610 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVLXL
);
17611 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRX
);
17612 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_STVRXL
);
17614 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLX
);
17615 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVLXL
);
17616 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRX
);
17617 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid
, ALTIVEC_BUILTIN_VEC_STVRXL
);
17619 if (TARGET_P9_VECTOR
)
17620 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long
,
17621 P9V_BUILTIN_STXVL
);
17623 /* Add the DST variants. */
17625 for (i
= 0; i
< ARRAY_SIZE (bdesc_dst
); i
++, d
++)
17627 HOST_WIDE_INT mask
= d
->mask
;
17629 /* It is expected that these dst built-in functions may have
17630 d->icode equal to CODE_FOR_nothing. */
17631 if ((mask
& builtin_mask
) != mask
)
17633 if (TARGET_DEBUG_BUILTIN
)
17634 fprintf (stderr
, "altivec_init_builtins, skip dst %s\n",
17638 def_builtin (d
->name
, void_ftype_pcvoid_int_int
, d
->code
);
17641 /* Initialize the predicates. */
17642 d
= bdesc_altivec_preds
;
17643 for (i
= 0; i
< ARRAY_SIZE (bdesc_altivec_preds
); i
++, d
++)
17645 machine_mode mode1
;
17647 HOST_WIDE_INT mask
= d
->mask
;
17649 if ((mask
& builtin_mask
) != mask
)
17651 if (TARGET_DEBUG_BUILTIN
)
17652 fprintf (stderr
, "altivec_init_builtins, skip predicate %s\n",
17657 if (rs6000_overloaded_builtin_p (d
->code
))
17661 /* Cannot define builtin if the instruction is disabled. */
17662 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17663 mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17669 type
= int_ftype_int_opaque_opaque
;
17672 type
= int_ftype_int_v2di_v2di
;
17675 type
= int_ftype_int_v4si_v4si
;
17678 type
= int_ftype_int_v8hi_v8hi
;
17681 type
= int_ftype_int_v16qi_v16qi
;
17684 type
= int_ftype_int_v4sf_v4sf
;
17687 type
= int_ftype_int_v2df_v2df
;
17690 gcc_unreachable ();
17693 def_builtin (d
->name
, type
, d
->code
);
17696 /* Initialize the abs* operators. */
17698 for (i
= 0; i
< ARRAY_SIZE (bdesc_abs
); i
++, d
++)
17700 machine_mode mode0
;
17702 HOST_WIDE_INT mask
= d
->mask
;
17704 if ((mask
& builtin_mask
) != mask
)
17706 if (TARGET_DEBUG_BUILTIN
)
17707 fprintf (stderr
, "altivec_init_builtins, skip abs %s\n",
17712 /* Cannot define builtin if the instruction is disabled. */
17713 gcc_assert (d
->icode
!= CODE_FOR_nothing
);
17714 mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17719 type
= v2di_ftype_v2di
;
17722 type
= v4si_ftype_v4si
;
17725 type
= v8hi_ftype_v8hi
;
17728 type
= v16qi_ftype_v16qi
;
17731 type
= v4sf_ftype_v4sf
;
17734 type
= v2df_ftype_v2df
;
17737 gcc_unreachable ();
17740 def_builtin (d
->name
, type
, d
->code
);
17743 /* Initialize target builtin that implements
17744 targetm.vectorize.builtin_mask_for_load. */
17746 decl
= add_builtin_function ("__builtin_altivec_mask_for_load",
17747 v16qi_ftype_long_pcvoid
,
17748 ALTIVEC_BUILTIN_MASK_FOR_LOAD
,
17749 BUILT_IN_MD
, NULL
, NULL_TREE
);
17750 TREE_READONLY (decl
) = 1;
17751 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17752 altivec_builtin_mask_for_load
= decl
;
17754 /* Access to the vec_init patterns. */
17755 ftype
= build_function_type_list (V4SI_type_node
, integer_type_node
,
17756 integer_type_node
, integer_type_node
,
17757 integer_type_node
, NULL_TREE
);
17758 def_builtin ("__builtin_vec_init_v4si", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SI
);
17760 ftype
= build_function_type_list (V8HI_type_node
, short_integer_type_node
,
17761 short_integer_type_node
,
17762 short_integer_type_node
,
17763 short_integer_type_node
,
17764 short_integer_type_node
,
17765 short_integer_type_node
,
17766 short_integer_type_node
,
17767 short_integer_type_node
, NULL_TREE
);
17768 def_builtin ("__builtin_vec_init_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V8HI
);
17770 ftype
= build_function_type_list (V16QI_type_node
, char_type_node
,
17771 char_type_node
, char_type_node
,
17772 char_type_node
, char_type_node
,
17773 char_type_node
, char_type_node
,
17774 char_type_node
, char_type_node
,
17775 char_type_node
, char_type_node
,
17776 char_type_node
, char_type_node
,
17777 char_type_node
, char_type_node
,
17778 char_type_node
, NULL_TREE
);
17779 def_builtin ("__builtin_vec_init_v16qi", ftype
,
17780 ALTIVEC_BUILTIN_VEC_INIT_V16QI
);
17782 ftype
= build_function_type_list (V4SF_type_node
, float_type_node
,
17783 float_type_node
, float_type_node
,
17784 float_type_node
, NULL_TREE
);
17785 def_builtin ("__builtin_vec_init_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_INIT_V4SF
);
17787 /* VSX builtins. */
17788 ftype
= build_function_type_list (V2DF_type_node
, double_type_node
,
17789 double_type_node
, NULL_TREE
);
17790 def_builtin ("__builtin_vec_init_v2df", ftype
, VSX_BUILTIN_VEC_INIT_V2DF
);
17792 ftype
= build_function_type_list (V2DI_type_node
, intDI_type_node
,
17793 intDI_type_node
, NULL_TREE
);
17794 def_builtin ("__builtin_vec_init_v2di", ftype
, VSX_BUILTIN_VEC_INIT_V2DI
);
17796 /* Access to the vec_set patterns. */
17797 ftype
= build_function_type_list (V4SI_type_node
, V4SI_type_node
,
17799 integer_type_node
, NULL_TREE
);
17800 def_builtin ("__builtin_vec_set_v4si", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SI
);
17802 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17804 integer_type_node
, NULL_TREE
);
17805 def_builtin ("__builtin_vec_set_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V8HI
);
17807 ftype
= build_function_type_list (V16QI_type_node
, V16QI_type_node
,
17809 integer_type_node
, NULL_TREE
);
17810 def_builtin ("__builtin_vec_set_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_SET_V16QI
);
17812 ftype
= build_function_type_list (V4SF_type_node
, V4SF_type_node
,
17814 integer_type_node
, NULL_TREE
);
17815 def_builtin ("__builtin_vec_set_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_SET_V4SF
);
17817 ftype
= build_function_type_list (V2DF_type_node
, V2DF_type_node
,
17819 integer_type_node
, NULL_TREE
);
17820 def_builtin ("__builtin_vec_set_v2df", ftype
, VSX_BUILTIN_VEC_SET_V2DF
);
17822 ftype
= build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17824 integer_type_node
, NULL_TREE
);
17825 def_builtin ("__builtin_vec_set_v2di", ftype
, VSX_BUILTIN_VEC_SET_V2DI
);
17827 /* Access to the vec_extract patterns. */
17828 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17829 integer_type_node
, NULL_TREE
);
17830 def_builtin ("__builtin_vec_ext_v4si", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SI
);
17832 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17833 integer_type_node
, NULL_TREE
);
17834 def_builtin ("__builtin_vec_ext_v8hi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V8HI
);
17836 ftype
= build_function_type_list (intQI_type_node
, V16QI_type_node
,
17837 integer_type_node
, NULL_TREE
);
17838 def_builtin ("__builtin_vec_ext_v16qi", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V16QI
);
17840 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17841 integer_type_node
, NULL_TREE
);
17842 def_builtin ("__builtin_vec_ext_v4sf", ftype
, ALTIVEC_BUILTIN_VEC_EXT_V4SF
);
17844 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17845 integer_type_node
, NULL_TREE
);
17846 def_builtin ("__builtin_vec_ext_v2df", ftype
, VSX_BUILTIN_VEC_EXT_V2DF
);
17848 ftype
= build_function_type_list (intDI_type_node
, V2DI_type_node
,
17849 integer_type_node
, NULL_TREE
);
17850 def_builtin ("__builtin_vec_ext_v2di", ftype
, VSX_BUILTIN_VEC_EXT_V2DI
);
17853 if (V1TI_type_node
)
17855 tree v1ti_ftype_long_pcvoid
17856 = build_function_type_list (V1TI_type_node
,
17857 long_integer_type_node
, pcvoid_type_node
,
17859 tree void_ftype_v1ti_long_pvoid
17860 = build_function_type_list (void_type_node
,
17861 V1TI_type_node
, long_integer_type_node
,
17862 pvoid_type_node
, NULL_TREE
);
17863 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid
,
17864 VSX_BUILTIN_LXVD2X_V1TI
);
17865 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid
,
17866 VSX_BUILTIN_STXVD2X_V1TI
);
17867 ftype
= build_function_type_list (V1TI_type_node
, intTI_type_node
,
17868 NULL_TREE
, NULL_TREE
);
17869 def_builtin ("__builtin_vec_init_v1ti", ftype
, VSX_BUILTIN_VEC_INIT_V1TI
);
17870 ftype
= build_function_type_list (V1TI_type_node
, V1TI_type_node
,
17872 integer_type_node
, NULL_TREE
);
17873 def_builtin ("__builtin_vec_set_v1ti", ftype
, VSX_BUILTIN_VEC_SET_V1TI
);
17874 ftype
= build_function_type_list (intTI_type_node
, V1TI_type_node
,
17875 integer_type_node
, NULL_TREE
);
17876 def_builtin ("__builtin_vec_ext_v1ti", ftype
, VSX_BUILTIN_VEC_EXT_V1TI
);
17882 htm_init_builtins (void)
17884 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
17885 const struct builtin_description
*d
;
17889 for (i
= 0; i
< ARRAY_SIZE (bdesc_htm
); i
++, d
++)
17891 tree op
[MAX_HTM_OPERANDS
], type
;
17892 HOST_WIDE_INT mask
= d
->mask
;
17893 unsigned attr
= rs6000_builtin_info
[d
->code
].attr
;
17894 bool void_func
= (attr
& RS6000_BTC_VOID
);
17895 int attr_args
= (attr
& RS6000_BTC_TYPE_MASK
);
17897 tree gpr_type_node
;
17901 /* It is expected that these htm built-in functions may have
17902 d->icode equal to CODE_FOR_nothing. */
17904 if (TARGET_32BIT
&& TARGET_POWERPC64
)
17905 gpr_type_node
= long_long_unsigned_type_node
;
17907 gpr_type_node
= long_unsigned_type_node
;
17909 if (attr
& RS6000_BTC_SPR
)
17911 rettype
= gpr_type_node
;
17912 argtype
= gpr_type_node
;
17914 else if (d
->code
== HTM_BUILTIN_TABORTDC
17915 || d
->code
== HTM_BUILTIN_TABORTDCI
)
17917 rettype
= unsigned_type_node
;
17918 argtype
= gpr_type_node
;
17922 rettype
= unsigned_type_node
;
17923 argtype
= unsigned_type_node
;
17926 if ((mask
& builtin_mask
) != mask
)
17928 if (TARGET_DEBUG_BUILTIN
)
17929 fprintf (stderr
, "htm_builtin, skip binary %s\n", d
->name
);
17935 if (TARGET_DEBUG_BUILTIN
)
17936 fprintf (stderr
, "htm_builtin, bdesc_htm[%ld] no name\n",
17937 (long unsigned) i
);
17941 op
[nopnds
++] = (void_func
) ? void_type_node
: rettype
;
17943 if (attr_args
== RS6000_BTC_UNARY
)
17944 op
[nopnds
++] = argtype
;
17945 else if (attr_args
== RS6000_BTC_BINARY
)
17947 op
[nopnds
++] = argtype
;
17948 op
[nopnds
++] = argtype
;
17950 else if (attr_args
== RS6000_BTC_TERNARY
)
17952 op
[nopnds
++] = argtype
;
17953 op
[nopnds
++] = argtype
;
17954 op
[nopnds
++] = argtype
;
17960 type
= build_function_type_list (op
[0], NULL_TREE
);
17963 type
= build_function_type_list (op
[0], op
[1], NULL_TREE
);
17966 type
= build_function_type_list (op
[0], op
[1], op
[2], NULL_TREE
);
17969 type
= build_function_type_list (op
[0], op
[1], op
[2], op
[3],
17973 gcc_unreachable ();
17976 def_builtin (d
->name
, type
, d
->code
);
17980 /* Hash function for builtin functions with up to 3 arguments and a return
17983 builtin_hasher::hash (builtin_hash_struct
*bh
)
17988 for (i
= 0; i
< 4; i
++)
17990 ret
= (ret
* (unsigned)MAX_MACHINE_MODE
) + ((unsigned)bh
->mode
[i
]);
17991 ret
= (ret
* 2) + bh
->uns_p
[i
];
17997 /* Compare builtin hash entries H1 and H2 for equivalence. */
17999 builtin_hasher::equal (builtin_hash_struct
*p1
, builtin_hash_struct
*p2
)
18001 return ((p1
->mode
[0] == p2
->mode
[0])
18002 && (p1
->mode
[1] == p2
->mode
[1])
18003 && (p1
->mode
[2] == p2
->mode
[2])
18004 && (p1
->mode
[3] == p2
->mode
[3])
18005 && (p1
->uns_p
[0] == p2
->uns_p
[0])
18006 && (p1
->uns_p
[1] == p2
->uns_p
[1])
18007 && (p1
->uns_p
[2] == p2
->uns_p
[2])
18008 && (p1
->uns_p
[3] == p2
->uns_p
[3]));
18011 /* Map types for builtin functions with an explicit return type and up to 3
18012 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18013 of the argument. */
18015 builtin_function_type (machine_mode mode_ret
, machine_mode mode_arg0
,
18016 machine_mode mode_arg1
, machine_mode mode_arg2
,
18017 enum rs6000_builtins builtin
, const char *name
)
18019 struct builtin_hash_struct h
;
18020 struct builtin_hash_struct
*h2
;
18023 tree ret_type
= NULL_TREE
;
18024 tree arg_type
[3] = { NULL_TREE
, NULL_TREE
, NULL_TREE
};
18026 /* Create builtin_hash_table. */
18027 if (builtin_hash_table
== NULL
)
18028 builtin_hash_table
= hash_table
<builtin_hasher
>::create_ggc (1500);
18030 h
.type
= NULL_TREE
;
18031 h
.mode
[0] = mode_ret
;
18032 h
.mode
[1] = mode_arg0
;
18033 h
.mode
[2] = mode_arg1
;
18034 h
.mode
[3] = mode_arg2
;
18040 /* If the builtin is a type that produces unsigned results or takes unsigned
18041 arguments, and it is returned as a decl for the vectorizer (such as
18042 widening multiplies, permute), make sure the arguments and return value
18043 are type correct. */
18046 /* unsigned 1 argument functions. */
18047 case CRYPTO_BUILTIN_VSBOX
:
18048 case P8V_BUILTIN_VGBBD
:
18049 case MISC_BUILTIN_CDTBCD
:
18050 case MISC_BUILTIN_CBCDTD
:
18055 /* unsigned 2 argument functions. */
18056 case ALTIVEC_BUILTIN_VMULEUB
:
18057 case ALTIVEC_BUILTIN_VMULEUH
:
18058 case ALTIVEC_BUILTIN_VMULEUW
:
18059 case ALTIVEC_BUILTIN_VMULOUB
:
18060 case ALTIVEC_BUILTIN_VMULOUH
:
18061 case ALTIVEC_BUILTIN_VMULOUW
:
18062 case CRYPTO_BUILTIN_VCIPHER
:
18063 case CRYPTO_BUILTIN_VCIPHERLAST
:
18064 case CRYPTO_BUILTIN_VNCIPHER
:
18065 case CRYPTO_BUILTIN_VNCIPHERLAST
:
18066 case CRYPTO_BUILTIN_VPMSUMB
:
18067 case CRYPTO_BUILTIN_VPMSUMH
:
18068 case CRYPTO_BUILTIN_VPMSUMW
:
18069 case CRYPTO_BUILTIN_VPMSUMD
:
18070 case CRYPTO_BUILTIN_VPMSUM
:
18071 case MISC_BUILTIN_ADDG6S
:
18072 case MISC_BUILTIN_DIVWEU
:
18073 case MISC_BUILTIN_DIVWEUO
:
18074 case MISC_BUILTIN_DIVDEU
:
18075 case MISC_BUILTIN_DIVDEUO
:
18076 case VSX_BUILTIN_UDIV_V2DI
:
18077 case ALTIVEC_BUILTIN_VMAXUB
:
18078 case ALTIVEC_BUILTIN_VMINUB
:
18079 case ALTIVEC_BUILTIN_VMAXUH
:
18080 case ALTIVEC_BUILTIN_VMINUH
:
18081 case ALTIVEC_BUILTIN_VMAXUW
:
18082 case ALTIVEC_BUILTIN_VMINUW
:
18083 case P8V_BUILTIN_VMAXUD
:
18084 case P8V_BUILTIN_VMINUD
:
18090 /* unsigned 3 argument functions. */
18091 case ALTIVEC_BUILTIN_VPERM_16QI_UNS
:
18092 case ALTIVEC_BUILTIN_VPERM_8HI_UNS
:
18093 case ALTIVEC_BUILTIN_VPERM_4SI_UNS
:
18094 case ALTIVEC_BUILTIN_VPERM_2DI_UNS
:
18095 case ALTIVEC_BUILTIN_VSEL_16QI_UNS
:
18096 case ALTIVEC_BUILTIN_VSEL_8HI_UNS
:
18097 case ALTIVEC_BUILTIN_VSEL_4SI_UNS
:
18098 case ALTIVEC_BUILTIN_VSEL_2DI_UNS
:
18099 case VSX_BUILTIN_VPERM_16QI_UNS
:
18100 case VSX_BUILTIN_VPERM_8HI_UNS
:
18101 case VSX_BUILTIN_VPERM_4SI_UNS
:
18102 case VSX_BUILTIN_VPERM_2DI_UNS
:
18103 case VSX_BUILTIN_XXSEL_16QI_UNS
:
18104 case VSX_BUILTIN_XXSEL_8HI_UNS
:
18105 case VSX_BUILTIN_XXSEL_4SI_UNS
:
18106 case VSX_BUILTIN_XXSEL_2DI_UNS
:
18107 case CRYPTO_BUILTIN_VPERMXOR
:
18108 case CRYPTO_BUILTIN_VPERMXOR_V2DI
:
18109 case CRYPTO_BUILTIN_VPERMXOR_V4SI
:
18110 case CRYPTO_BUILTIN_VPERMXOR_V8HI
:
18111 case CRYPTO_BUILTIN_VPERMXOR_V16QI
:
18112 case CRYPTO_BUILTIN_VSHASIGMAW
:
18113 case CRYPTO_BUILTIN_VSHASIGMAD
:
18114 case CRYPTO_BUILTIN_VSHASIGMA
:
18121 /* signed permute functions with unsigned char mask. */
18122 case ALTIVEC_BUILTIN_VPERM_16QI
:
18123 case ALTIVEC_BUILTIN_VPERM_8HI
:
18124 case ALTIVEC_BUILTIN_VPERM_4SI
:
18125 case ALTIVEC_BUILTIN_VPERM_4SF
:
18126 case ALTIVEC_BUILTIN_VPERM_2DI
:
18127 case ALTIVEC_BUILTIN_VPERM_2DF
:
18128 case VSX_BUILTIN_VPERM_16QI
:
18129 case VSX_BUILTIN_VPERM_8HI
:
18130 case VSX_BUILTIN_VPERM_4SI
:
18131 case VSX_BUILTIN_VPERM_4SF
:
18132 case VSX_BUILTIN_VPERM_2DI
:
18133 case VSX_BUILTIN_VPERM_2DF
:
18137 /* unsigned args, signed return. */
18138 case VSX_BUILTIN_XVCVUXDSP
:
18139 case VSX_BUILTIN_XVCVUXDDP_UNS
:
18140 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF
:
18144 /* signed args, unsigned return. */
18145 case VSX_BUILTIN_XVCVDPUXDS_UNS
:
18146 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI
:
18147 case MISC_BUILTIN_UNPACK_TD
:
18148 case MISC_BUILTIN_UNPACK_V1TI
:
18152 /* unsigned arguments for 128-bit pack instructions. */
18153 case MISC_BUILTIN_PACK_TD
:
18154 case MISC_BUILTIN_PACK_V1TI
:
18159 /* unsigned second arguments (vector shift right). */
18160 case ALTIVEC_BUILTIN_VSRB
:
18161 case ALTIVEC_BUILTIN_VSRH
:
18162 case ALTIVEC_BUILTIN_VSRW
:
18163 case P8V_BUILTIN_VSRD
:
18171 /* Figure out how many args are present. */
18172 while (num_args
> 0 && h
.mode
[num_args
] == VOIDmode
)
18175 ret_type
= builtin_mode_to_type
[h
.mode
[0]][h
.uns_p
[0]];
18176 if (!ret_type
&& h
.uns_p
[0])
18177 ret_type
= builtin_mode_to_type
[h
.mode
[0]][0];
18180 fatal_error (input_location
,
18181 "internal error: builtin function %s had an unexpected "
18182 "return type %s", name
, GET_MODE_NAME (h
.mode
[0]));
18184 for (i
= 0; i
< (int) ARRAY_SIZE (arg_type
); i
++)
18185 arg_type
[i
] = NULL_TREE
;
18187 for (i
= 0; i
< num_args
; i
++)
18189 int m
= (int) h
.mode
[i
+1];
18190 int uns_p
= h
.uns_p
[i
+1];
18192 arg_type
[i
] = builtin_mode_to_type
[m
][uns_p
];
18193 if (!arg_type
[i
] && uns_p
)
18194 arg_type
[i
] = builtin_mode_to_type
[m
][0];
18197 fatal_error (input_location
,
18198 "internal error: builtin function %s, argument %d "
18199 "had unexpected argument type %s", name
, i
,
18200 GET_MODE_NAME (m
));
18203 builtin_hash_struct
**found
= builtin_hash_table
->find_slot (&h
, INSERT
);
18204 if (*found
== NULL
)
18206 h2
= ggc_alloc
<builtin_hash_struct
> ();
18210 h2
->type
= build_function_type_list (ret_type
, arg_type
[0], arg_type
[1],
18211 arg_type
[2], NULL_TREE
);
18214 return (*found
)->type
;
18218 rs6000_common_init_builtins (void)
18220 const struct builtin_description
*d
;
18223 tree opaque_ftype_opaque
= NULL_TREE
;
18224 tree opaque_ftype_opaque_opaque
= NULL_TREE
;
18225 tree opaque_ftype_opaque_opaque_opaque
= NULL_TREE
;
18226 tree v2si_ftype
= NULL_TREE
;
18227 tree v2si_ftype_qi
= NULL_TREE
;
18228 tree v2si_ftype_v2si_qi
= NULL_TREE
;
18229 tree v2si_ftype_int_qi
= NULL_TREE
;
18230 HOST_WIDE_INT builtin_mask
= rs6000_builtin_mask
;
18232 if (!TARGET_PAIRED_FLOAT
)
18234 builtin_mode_to_type
[V2SImode
][0] = opaque_V2SI_type_node
;
18235 builtin_mode_to_type
[V2SFmode
][0] = opaque_V2SF_type_node
;
18238 /* Paired builtins are only available if you build a compiler with the
18239 appropriate options, so only create those builtins with the appropriate
18240 compiler option. Create Altivec and VSX builtins on machines with at
18241 least the general purpose extensions (970 and newer) to allow the use of
18242 the target attribute.. */
18244 if (TARGET_EXTRA_BUILTINS
)
18245 builtin_mask
|= RS6000_BTM_COMMON
;
18247 /* Add the ternary operators. */
18249 for (i
= 0; i
< ARRAY_SIZE (bdesc_3arg
); i
++, d
++)
18252 HOST_WIDE_INT mask
= d
->mask
;
18254 if ((mask
& builtin_mask
) != mask
)
18256 if (TARGET_DEBUG_BUILTIN
)
18257 fprintf (stderr
, "rs6000_builtin, skip ternary %s\n", d
->name
);
18261 if (rs6000_overloaded_builtin_p (d
->code
))
18263 if (! (type
= opaque_ftype_opaque_opaque_opaque
))
18264 type
= opaque_ftype_opaque_opaque_opaque
18265 = build_function_type_list (opaque_V4SI_type_node
,
18266 opaque_V4SI_type_node
,
18267 opaque_V4SI_type_node
,
18268 opaque_V4SI_type_node
,
18273 enum insn_code icode
= d
->icode
;
18276 if (TARGET_DEBUG_BUILTIN
)
18277 fprintf (stderr
, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18283 if (icode
== CODE_FOR_nothing
)
18285 if (TARGET_DEBUG_BUILTIN
)
18286 fprintf (stderr
, "rs6000_builtin, skip ternary %s (no code)\n",
18292 type
= builtin_function_type (insn_data
[icode
].operand
[0].mode
,
18293 insn_data
[icode
].operand
[1].mode
,
18294 insn_data
[icode
].operand
[2].mode
,
18295 insn_data
[icode
].operand
[3].mode
,
18299 def_builtin (d
->name
, type
, d
->code
);
18302 /* Add the binary operators. */
18304 for (i
= 0; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18306 machine_mode mode0
, mode1
, mode2
;
18308 HOST_WIDE_INT mask
= d
->mask
;
18310 if ((mask
& builtin_mask
) != mask
)
18312 if (TARGET_DEBUG_BUILTIN
)
18313 fprintf (stderr
, "rs6000_builtin, skip binary %s\n", d
->name
);
18317 if (rs6000_overloaded_builtin_p (d
->code
))
18319 if (! (type
= opaque_ftype_opaque_opaque
))
18320 type
= opaque_ftype_opaque_opaque
18321 = build_function_type_list (opaque_V4SI_type_node
,
18322 opaque_V4SI_type_node
,
18323 opaque_V4SI_type_node
,
18328 enum insn_code icode
= d
->icode
;
18331 if (TARGET_DEBUG_BUILTIN
)
18332 fprintf (stderr
, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18338 if (icode
== CODE_FOR_nothing
)
18340 if (TARGET_DEBUG_BUILTIN
)
18341 fprintf (stderr
, "rs6000_builtin, skip binary %s (no code)\n",
18347 mode0
= insn_data
[icode
].operand
[0].mode
;
18348 mode1
= insn_data
[icode
].operand
[1].mode
;
18349 mode2
= insn_data
[icode
].operand
[2].mode
;
18351 if (mode0
== V2SImode
&& mode1
== V2SImode
&& mode2
== QImode
)
18353 if (! (type
= v2si_ftype_v2si_qi
))
18354 type
= v2si_ftype_v2si_qi
18355 = build_function_type_list (opaque_V2SI_type_node
,
18356 opaque_V2SI_type_node
,
18361 else if (mode0
== V2SImode
&& GET_MODE_CLASS (mode1
) == MODE_INT
18362 && mode2
== QImode
)
18364 if (! (type
= v2si_ftype_int_qi
))
18365 type
= v2si_ftype_int_qi
18366 = build_function_type_list (opaque_V2SI_type_node
,
18373 type
= builtin_function_type (mode0
, mode1
, mode2
, VOIDmode
,
18377 def_builtin (d
->name
, type
, d
->code
);
18380 /* Add the simple unary operators. */
18382 for (i
= 0; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18384 machine_mode mode0
, mode1
;
18386 HOST_WIDE_INT mask
= d
->mask
;
18388 if ((mask
& builtin_mask
) != mask
)
18390 if (TARGET_DEBUG_BUILTIN
)
18391 fprintf (stderr
, "rs6000_builtin, skip unary %s\n", d
->name
);
18395 if (rs6000_overloaded_builtin_p (d
->code
))
18397 if (! (type
= opaque_ftype_opaque
))
18398 type
= opaque_ftype_opaque
18399 = build_function_type_list (opaque_V4SI_type_node
,
18400 opaque_V4SI_type_node
,
18405 enum insn_code icode
= d
->icode
;
18408 if (TARGET_DEBUG_BUILTIN
)
18409 fprintf (stderr
, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18415 if (icode
== CODE_FOR_nothing
)
18417 if (TARGET_DEBUG_BUILTIN
)
18418 fprintf (stderr
, "rs6000_builtin, skip unary %s (no code)\n",
18424 mode0
= insn_data
[icode
].operand
[0].mode
;
18425 mode1
= insn_data
[icode
].operand
[1].mode
;
18427 if (mode0
== V2SImode
&& mode1
== QImode
)
18429 if (! (type
= v2si_ftype_qi
))
18430 type
= v2si_ftype_qi
18431 = build_function_type_list (opaque_V2SI_type_node
,
18437 type
= builtin_function_type (mode0
, mode1
, VOIDmode
, VOIDmode
,
18441 def_builtin (d
->name
, type
, d
->code
);
18444 /* Add the simple no-argument operators. */
18446 for (i
= 0; i
< ARRAY_SIZE (bdesc_0arg
); i
++, d
++)
18448 machine_mode mode0
;
18450 HOST_WIDE_INT mask
= d
->mask
;
18452 if ((mask
& builtin_mask
) != mask
)
18454 if (TARGET_DEBUG_BUILTIN
)
18455 fprintf (stderr
, "rs6000_builtin, skip no-argument %s\n", d
->name
);
18458 if (rs6000_overloaded_builtin_p (d
->code
))
18460 if (!opaque_ftype_opaque
)
18461 opaque_ftype_opaque
18462 = build_function_type_list (opaque_V4SI_type_node
, NULL_TREE
);
18463 type
= opaque_ftype_opaque
;
18467 enum insn_code icode
= d
->icode
;
18470 if (TARGET_DEBUG_BUILTIN
)
18471 fprintf (stderr
, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18472 (long unsigned) i
);
18475 if (icode
== CODE_FOR_nothing
)
18477 if (TARGET_DEBUG_BUILTIN
)
18479 "rs6000_builtin, skip no-argument %s (no code)\n",
18483 mode0
= insn_data
[icode
].operand
[0].mode
;
18484 if (mode0
== V2SImode
)
18486 /* code for paired single */
18487 if (! (type
= v2si_ftype
))
18490 = build_function_type_list (opaque_V2SI_type_node
,
18496 type
= builtin_function_type (mode0
, VOIDmode
, VOIDmode
, VOIDmode
,
18499 def_builtin (d
->name
, type
, d
->code
);
18503 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18505 init_float128_ibm (machine_mode mode
)
18507 if (!TARGET_XL_COMPAT
)
18509 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
18510 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
18511 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
18512 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
18514 if (!TARGET_HARD_FLOAT
)
18516 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
18517 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
18518 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
18519 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
18520 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
18521 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
18522 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
18523 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
18525 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
18526 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
18527 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
18528 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
18529 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
18530 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
18531 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
18532 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
18537 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
18538 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
18539 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
18540 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
18543 /* Add various conversions for IFmode to use the traditional TFmode
18545 if (mode
== IFmode
)
18547 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf2");
18548 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf2");
18549 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctftd2");
18550 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd2");
18551 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd2");
18552 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdtf2");
18554 if (TARGET_POWERPC64
)
18556 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
18557 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
18558 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
18559 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
18564 /* Set up IEEE 128-bit floating point routines. Use different names if the
18565 arguments can be passed in a vector register. The historical PowerPC
18566 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18567 continue to use that if we aren't using vector registers to pass IEEE
18568 128-bit floating point. */
18571 init_float128_ieee (machine_mode mode
)
18573 if (FLOAT128_VECTOR_P (mode
))
18575 set_optab_libfunc (add_optab
, mode
, "__addkf3");
18576 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
18577 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
18578 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
18579 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
18580 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
18581 set_optab_libfunc (abs_optab
, mode
, "__abstkf2");
18583 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
18584 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
18585 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
18586 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
18587 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
18588 set_optab_libfunc (le_optab
, mode
, "__lekf2");
18589 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
18591 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
18592 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
18593 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
18594 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
18596 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__extendtfkf2");
18597 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
18598 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__extendtfkf2");
18600 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__trunckftf2");
18601 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
18602 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__trunckftf2");
18604 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf2");
18605 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf2");
18606 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunckftd2");
18607 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd2");
18608 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd2");
18609 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtdkf2");
18611 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
18612 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
18613 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
18614 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
18616 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
18617 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
18618 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
18619 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
18621 if (TARGET_POWERPC64
)
18623 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti");
18624 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti");
18625 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf");
18626 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf");
18632 set_optab_libfunc (add_optab
, mode
, "_q_add");
18633 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
18634 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
18635 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
18636 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
18637 if (TARGET_PPC_GPOPT
)
18638 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
18640 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
18641 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
18642 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
18643 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
18644 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
18645 set_optab_libfunc (le_optab
, mode
, "_q_fle");
18647 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
18648 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
18649 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
18650 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
18651 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
18652 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
18653 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
18654 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
18659 rs6000_init_libfuncs (void)
18661 /* __float128 support. */
18662 if (TARGET_FLOAT128_TYPE
)
18664 init_float128_ibm (IFmode
);
18665 init_float128_ieee (KFmode
);
18668 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18669 if (TARGET_LONG_DOUBLE_128
)
18671 if (!TARGET_IEEEQUAD
)
18672 init_float128_ibm (TFmode
);
18674 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18676 init_float128_ieee (TFmode
);
18680 /* Emit a potentially record-form instruction, setting DST from SRC.
18681 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18682 signed comparison of DST with zero. If DOT is 1, the generated RTL
18683 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18684 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18685 a separate COMPARE. */
18688 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
18692 emit_move_insn (dst
, src
);
18696 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
18698 emit_move_insn (dst
, src
);
18699 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
18703 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
18706 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
18707 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
18711 rtx set
= gen_rtx_SET (dst
, src
);
18712 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
18717 /* A validation routine: say whether CODE, a condition code, and MODE
18718 match. The other alternatives either don't make sense or should
18719 never be generated. */
18722 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
18724 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
18725 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
18726 && GET_MODE_CLASS (mode
) == MODE_CC
);
18728 /* These don't make sense. */
18729 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
18730 || mode
!= CCUNSmode
);
18732 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
18733 || mode
== CCUNSmode
);
18735 gcc_assert (mode
== CCFPmode
18736 || (code
!= ORDERED
&& code
!= UNORDERED
18737 && code
!= UNEQ
&& code
!= LTGT
18738 && code
!= UNGT
&& code
!= UNLT
18739 && code
!= UNGE
&& code
!= UNLE
));
18741 /* These should never be generated except for
18742 flag_finite_math_only. */
18743 gcc_assert (mode
!= CCFPmode
18744 || flag_finite_math_only
18745 || (code
!= LE
&& code
!= GE
18746 && code
!= UNEQ
&& code
!= LTGT
18747 && code
!= UNGT
&& code
!= UNLT
));
18749 /* These are invalid; the information is not there. */
18750 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
18754 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18755 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18756 not zero, store there the bit offset (counted from the right) where
18757 the single stretch of 1 bits begins; and similarly for B, the bit
18758 offset where it ends. */
18761 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
18763 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
18764 unsigned HOST_WIDE_INT bit
;
18766 int n
= GET_MODE_PRECISION (mode
);
18768 if (mode
!= DImode
&& mode
!= SImode
)
18771 if (INTVAL (mask
) >= 0)
18774 ne
= exact_log2 (bit
);
18775 nb
= exact_log2 (val
+ bit
);
18777 else if (val
+ 1 == 0)
18786 nb
= exact_log2 (bit
);
18787 ne
= exact_log2 (val
+ bit
);
18792 ne
= exact_log2 (bit
);
18793 if (val
+ bit
== 0)
18801 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
18812 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18813 or rldicr instruction, to implement an AND with it in mode MODE. */
18816 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
18820 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
18823 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18825 if (mode
== DImode
)
18826 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
18828 /* For SImode, rlwinm can do everything. */
18829 if (mode
== SImode
)
18830 return (nb
< 32 && ne
< 32);
18835 /* Return the instruction template for an AND with mask in mode MODE, with
18836 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18839 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
18843 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
18844 gcc_unreachable ();
18846 if (mode
== DImode
&& ne
== 0)
18848 operands
[3] = GEN_INT (63 - nb
);
18850 return "rldicl. %0,%1,0,%3";
18851 return "rldicl %0,%1,0,%3";
18854 if (mode
== DImode
&& nb
== 63)
18856 operands
[3] = GEN_INT (63 - ne
);
18858 return "rldicr. %0,%1,0,%3";
18859 return "rldicr %0,%1,0,%3";
18862 if (nb
< 32 && ne
< 32)
18864 operands
[3] = GEN_INT (31 - nb
);
18865 operands
[4] = GEN_INT (31 - ne
);
18867 return "rlwinm. %0,%1,0,%3,%4";
18868 return "rlwinm %0,%1,0,%3,%4";
18871 gcc_unreachable ();
18874 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18875 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18876 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18879 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
18883 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
18886 int n
= GET_MODE_PRECISION (mode
);
18889 if (CONST_INT_P (XEXP (shift
, 1)))
18891 sh
= INTVAL (XEXP (shift
, 1));
18892 if (sh
< 0 || sh
>= n
)
18896 rtx_code code
= GET_CODE (shift
);
18898 /* Convert any shift by 0 to a rotate, to simplify below code. */
18902 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18903 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
18905 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
18911 /* DImode rotates need rld*. */
18912 if (mode
== DImode
&& code
== ROTATE
)
18913 return (nb
== 63 || ne
== 0 || ne
== sh
);
18915 /* SImode rotates need rlw*. */
18916 if (mode
== SImode
&& code
== ROTATE
)
18917 return (nb
< 32 && ne
< 32 && sh
< 32);
18919 /* Wrap-around masks are only okay for rotates. */
18923 /* Variable shifts are only okay for rotates. */
18927 /* Don't allow ASHIFT if the mask is wrong for that. */
18928 if (code
== ASHIFT
&& ne
< sh
)
18931 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18932 if the mask is wrong for that. */
18933 if (nb
< 32 && ne
< 32 && sh
< 32
18934 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
18937 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18938 if the mask is wrong for that. */
18939 if (code
== LSHIFTRT
)
18941 if (nb
== 63 || ne
== 0 || ne
== sh
)
18942 return !(code
== LSHIFTRT
&& nb
>= sh
);
18947 /* Return the instruction template for a shift with mask in mode MODE, with
18948 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18951 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
18955 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
18956 gcc_unreachable ();
18958 if (mode
== DImode
&& ne
== 0)
18960 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
18961 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
18962 operands
[3] = GEN_INT (63 - nb
);
18964 return "rld%I2cl. %0,%1,%2,%3";
18965 return "rld%I2cl %0,%1,%2,%3";
18968 if (mode
== DImode
&& nb
== 63)
18970 operands
[3] = GEN_INT (63 - ne
);
18972 return "rld%I2cr. %0,%1,%2,%3";
18973 return "rld%I2cr %0,%1,%2,%3";
18977 && GET_CODE (operands
[4]) != LSHIFTRT
18978 && CONST_INT_P (operands
[2])
18979 && ne
== INTVAL (operands
[2]))
18981 operands
[3] = GEN_INT (63 - nb
);
18983 return "rld%I2c. %0,%1,%2,%3";
18984 return "rld%I2c %0,%1,%2,%3";
18987 if (nb
< 32 && ne
< 32)
18989 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
18990 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
18991 operands
[3] = GEN_INT (31 - nb
);
18992 operands
[4] = GEN_INT (31 - ne
);
18993 /* This insn can also be a 64-bit rotate with mask that really makes
18994 it just a shift right (with mask); the %h below are to adjust for
18995 that situation (shift count is >= 32 in that case). */
18997 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18998 return "rlw%I2nm %0,%1,%h2,%3,%4";
19001 gcc_unreachable ();
19004 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19005 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19006 ASHIFT, or LSHIFTRT) in mode MODE. */
19009 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
19013 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
19016 int n
= GET_MODE_PRECISION (mode
);
19018 int sh
= INTVAL (XEXP (shift
, 1));
19019 if (sh
< 0 || sh
>= n
)
19022 rtx_code code
= GET_CODE (shift
);
19024 /* Convert any shift by 0 to a rotate, to simplify below code. */
19028 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19029 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
19031 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
19037 /* DImode rotates need rldimi. */
19038 if (mode
== DImode
&& code
== ROTATE
)
19041 /* SImode rotates need rlwimi. */
19042 if (mode
== SImode
&& code
== ROTATE
)
19043 return (nb
< 32 && ne
< 32 && sh
< 32);
19045 /* Wrap-around masks are only okay for rotates. */
19049 /* Don't allow ASHIFT if the mask is wrong for that. */
19050 if (code
== ASHIFT
&& ne
< sh
)
19053 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19054 if the mask is wrong for that. */
19055 if (nb
< 32 && ne
< 32 && sh
< 32
19056 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
19059 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19060 if the mask is wrong for that. */
19061 if (code
== LSHIFTRT
)
19064 return !(code
== LSHIFTRT
&& nb
>= sh
);
19069 /* Return the instruction template for an insert with mask in mode MODE, with
19070 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19073 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
19077 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
19078 gcc_unreachable ();
19080 /* Prefer rldimi because rlwimi is cracked. */
19081 if (TARGET_POWERPC64
19082 && (!dot
|| mode
== DImode
)
19083 && GET_CODE (operands
[4]) != LSHIFTRT
19084 && ne
== INTVAL (operands
[2]))
19086 operands
[3] = GEN_INT (63 - nb
);
19088 return "rldimi. %0,%1,%2,%3";
19089 return "rldimi %0,%1,%2,%3";
19092 if (nb
< 32 && ne
< 32)
19094 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
19095 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
19096 operands
[3] = GEN_INT (31 - nb
);
19097 operands
[4] = GEN_INT (31 - ne
);
19099 return "rlwimi. %0,%1,%2,%3,%4";
19100 return "rlwimi %0,%1,%2,%3,%4";
19103 gcc_unreachable ();
19106 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19107 using two machine instructions. */
19110 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
19112 /* There are two kinds of AND we can handle with two insns:
19113 1) those we can do with two rl* insn;
19116 We do not handle that last case yet. */
19118 /* If there is just one stretch of ones, we can do it. */
19119 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
19122 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19123 one insn, we can do the whole thing with two. */
19124 unsigned HOST_WIDE_INT val
= INTVAL (c
);
19125 unsigned HOST_WIDE_INT bit1
= val
& -val
;
19126 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
19127 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
19128 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
19129 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
19132 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19133 If EXPAND is true, split rotate-and-mask instructions we generate to
19134 their constituent parts as well (this is used during expand); if DOT
19135 is 1, make the last insn a record-form instruction clobbering the
19136 destination GPR and setting the CC reg (from operands[3]); if 2, set
19137 that GPR as well as the CC reg. */
19140 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
19142 gcc_assert (!(expand
&& dot
));
19144 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
19146 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19147 shift right. This generates better code than doing the masks without
19148 shifts, or shifting first right and then left. */
19150 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
19152 gcc_assert (mode
== DImode
);
19154 int shift
= 63 - nb
;
19157 rtx tmp1
= gen_reg_rtx (DImode
);
19158 rtx tmp2
= gen_reg_rtx (DImode
);
19159 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
19160 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
19161 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
19165 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
19166 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
19167 emit_move_insn (operands
[0], tmp
);
19168 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
19169 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
19174 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19175 that does the rest. */
19176 unsigned HOST_WIDE_INT bit1
= val
& -val
;
19177 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
19178 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
19179 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
19181 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
19182 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
19184 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
19186 /* Two "no-rotate"-and-mask instructions, for SImode. */
19187 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
19189 gcc_assert (mode
== SImode
);
19191 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
19192 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
19193 emit_move_insn (reg
, tmp
);
19194 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
19195 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
19199 gcc_assert (mode
== DImode
);
19201 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19202 insns; we have to do the first in SImode, because it wraps. */
19203 if (mask2
<= 0xffffffff
19204 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
19206 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
19207 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
19209 rtx reg_low
= gen_lowpart (SImode
, reg
);
19210 emit_move_insn (reg_low
, tmp
);
19211 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
19212 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
19216 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19217 at the top end), rotate back and clear the other hole. */
19218 int right
= exact_log2 (bit3
);
19219 int left
= 64 - right
;
19221 /* Rotate the mask too. */
19222 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
19226 rtx tmp1
= gen_reg_rtx (DImode
);
19227 rtx tmp2
= gen_reg_rtx (DImode
);
19228 rtx tmp3
= gen_reg_rtx (DImode
);
19229 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
19230 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
19231 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
19232 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
19236 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
19237 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
19238 emit_move_insn (operands
[0], tmp
);
19239 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
19240 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
19241 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
19245 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19246 for lfq and stfq insns iff the registers are hard registers. */
19249 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
19251 /* We might have been passed a SUBREG. */
19252 if (GET_CODE (reg1
) != REG
|| GET_CODE (reg2
) != REG
)
19255 /* We might have been passed non floating point registers. */
19256 if (!FP_REGNO_P (REGNO (reg1
))
19257 || !FP_REGNO_P (REGNO (reg2
)))
19260 return (REGNO (reg1
) == REGNO (reg2
) - 1);
19263 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19264 addr1 and addr2 must be in consecutive memory locations
19265 (addr2 == addr1 + 8). */
19268 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
19271 unsigned int reg1
, reg2
;
19272 int offset1
, offset2
;
19274 /* The mems cannot be volatile. */
19275 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
19278 addr1
= XEXP (mem1
, 0);
19279 addr2
= XEXP (mem2
, 0);
19281 /* Extract an offset (if used) from the first addr. */
19282 if (GET_CODE (addr1
) == PLUS
)
19284 /* If not a REG, return zero. */
19285 if (GET_CODE (XEXP (addr1
, 0)) != REG
)
19289 reg1
= REGNO (XEXP (addr1
, 0));
19290 /* The offset must be constant! */
19291 if (GET_CODE (XEXP (addr1
, 1)) != CONST_INT
)
19293 offset1
= INTVAL (XEXP (addr1
, 1));
19296 else if (GET_CODE (addr1
) != REG
)
19300 reg1
= REGNO (addr1
);
19301 /* This was a simple (mem (reg)) expression. Offset is 0. */
19305 /* And now for the second addr. */
19306 if (GET_CODE (addr2
) == PLUS
)
19308 /* If not a REG, return zero. */
19309 if (GET_CODE (XEXP (addr2
, 0)) != REG
)
19313 reg2
= REGNO (XEXP (addr2
, 0));
19314 /* The offset must be constant. */
19315 if (GET_CODE (XEXP (addr2
, 1)) != CONST_INT
)
19317 offset2
= INTVAL (XEXP (addr2
, 1));
19320 else if (GET_CODE (addr2
) != REG
)
19324 reg2
= REGNO (addr2
);
19325 /* This was a simple (mem (reg)) expression. Offset is 0. */
19329 /* Both of these must have the same base register. */
19333 /* The offset for the second addr must be 8 more than the first addr. */
19334 if (offset2
!= offset1
+ 8)
19337 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19344 rs6000_secondary_memory_needed_rtx (machine_mode mode
)
19346 static bool eliminated
= false;
19349 if (mode
!= SDmode
|| TARGET_NO_SDMODE_STACK
)
19350 ret
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
19353 rtx mem
= cfun
->machine
->sdmode_stack_slot
;
19354 gcc_assert (mem
!= NULL_RTX
);
19358 mem
= eliminate_regs (mem
, VOIDmode
, NULL_RTX
);
19359 cfun
->machine
->sdmode_stack_slot
= mem
;
19365 if (TARGET_DEBUG_ADDR
)
19367 fprintf (stderr
, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19368 GET_MODE_NAME (mode
));
19370 fprintf (stderr
, "\tNULL_RTX\n");
19378 /* Return the mode to be used for memory when a secondary memory
19379 location is needed. For SDmode values we need to use DDmode, in
19380 all other cases we can use the same mode. */
19382 rs6000_secondary_memory_needed_mode (machine_mode mode
)
19384 if (lra_in_progress
&& mode
== SDmode
)
19390 rs6000_check_sdmode (tree
*tp
, int *walk_subtrees
, void *data ATTRIBUTE_UNUSED
)
19392 /* Don't walk into types. */
19393 if (*tp
== NULL_TREE
|| *tp
== error_mark_node
|| TYPE_P (*tp
))
19395 *walk_subtrees
= 0;
19399 switch (TREE_CODE (*tp
))
19408 case VIEW_CONVERT_EXPR
:
19409 if (TYPE_MODE (TREE_TYPE (*tp
)) == SDmode
)
19419 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19420 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19421 only work on the traditional altivec registers, note if an altivec register
19424 static enum rs6000_reg_type
19425 register_to_reg_type (rtx reg
, bool *is_altivec
)
19427 HOST_WIDE_INT regno
;
19428 enum reg_class rclass
;
19430 if (GET_CODE (reg
) == SUBREG
)
19431 reg
= SUBREG_REG (reg
);
19434 return NO_REG_TYPE
;
19436 regno
= REGNO (reg
);
19437 if (regno
>= FIRST_PSEUDO_REGISTER
)
19439 if (!lra_in_progress
&& !reload_in_progress
&& !reload_completed
)
19440 return PSEUDO_REG_TYPE
;
19442 regno
= true_regnum (reg
);
19443 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
)
19444 return PSEUDO_REG_TYPE
;
19447 gcc_assert (regno
>= 0);
19449 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
19450 *is_altivec
= true;
19452 rclass
= rs6000_regno_regclass
[regno
];
19453 return reg_class_to_reg_type
[(int)rclass
];
19456 /* Helper function to return the cost of adding a TOC entry address. */
19459 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
19463 if (TARGET_CMODEL
!= CMODEL_SMALL
)
19464 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
19467 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
19472 /* Helper function for rs6000_secondary_reload to determine whether the memory
19473 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19474 needs reloading. Return negative if the memory is not handled by the memory
19475 helper functions and to try a different reload method, 0 if no additional
19476 instructions are need, and positive to give the extra cost for the
19480 rs6000_secondary_reload_memory (rtx addr
,
19481 enum reg_class rclass
,
19484 int extra_cost
= 0;
19485 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
19486 addr_mask_type addr_mask
;
19487 const char *type
= NULL
;
19488 const char *fail_msg
= NULL
;
19490 if (GPR_REG_CLASS_P (rclass
))
19491 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
19493 else if (rclass
== FLOAT_REGS
)
19494 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
19496 else if (rclass
== ALTIVEC_REGS
)
19497 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
19499 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19500 else if (rclass
== VSX_REGS
)
19501 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
19502 & ~RELOAD_REG_AND_M16
);
19504 /* If the register allocator hasn't made up its mind yet on the register
19505 class to use, settle on defaults to use. */
19506 else if (rclass
== NO_REGS
)
19508 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
19509 & ~RELOAD_REG_AND_M16
);
19511 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
19512 addr_mask
&= ~(RELOAD_REG_INDEXED
19513 | RELOAD_REG_PRE_INCDEC
19514 | RELOAD_REG_PRE_MODIFY
);
19520 /* If the register isn't valid in this register class, just return now. */
19521 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
19523 if (TARGET_DEBUG_ADDR
)
19526 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19527 "not valid in class\n",
19528 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
19535 switch (GET_CODE (addr
))
19537 /* Does the register class supports auto update forms for this mode? We
19538 don't need a scratch register, since the powerpc only supports
19539 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19542 reg
= XEXP (addr
, 0);
19543 if (!base_reg_operand (addr
, GET_MODE (reg
)))
19545 fail_msg
= "no base register #1";
19549 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
19557 reg
= XEXP (addr
, 0);
19558 plus_arg1
= XEXP (addr
, 1);
19559 if (!base_reg_operand (reg
, GET_MODE (reg
))
19560 || GET_CODE (plus_arg1
) != PLUS
19561 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
19563 fail_msg
= "bad PRE_MODIFY";
19567 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
19574 /* Do we need to simulate AND -16 to clear the bottom address bits used
19575 in VMX load/stores? Only allow the AND for vector sizes. */
19577 and_arg
= XEXP (addr
, 0);
19578 if (GET_MODE_SIZE (mode
) != 16
19579 || GET_CODE (XEXP (addr
, 1)) != CONST_INT
19580 || INTVAL (XEXP (addr
, 1)) != -16)
19582 fail_msg
= "bad Altivec AND #1";
19586 if (rclass
!= ALTIVEC_REGS
)
19588 if (legitimate_indirect_address_p (and_arg
, false))
19591 else if (legitimate_indexed_address_p (and_arg
, false))
19596 fail_msg
= "bad Altivec AND #2";
19604 /* If this is an indirect address, make sure it is a base register. */
19607 if (!legitimate_indirect_address_p (addr
, false))
19614 /* If this is an indexed address, make sure the register class can handle
19615 indexed addresses for this mode. */
19617 plus_arg0
= XEXP (addr
, 0);
19618 plus_arg1
= XEXP (addr
, 1);
19620 /* (plus (plus (reg) (constant)) (constant)) is generated during
19621 push_reload processing, so handle it now. */
19622 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
19624 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
19631 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19632 push_reload processing, so handle it now. */
19633 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
19635 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
19638 type
= "indexed #2";
19642 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
19644 fail_msg
= "no base register #2";
19648 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
19650 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
19651 || !legitimate_indexed_address_p (addr
, false))
19658 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
19659 && CONST_INT_P (plus_arg1
))
19661 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
19664 type
= "vector d-form offset";
19668 /* Make sure the register class can handle offset addresses. */
19669 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
19671 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
19674 type
= "offset #2";
19680 fail_msg
= "bad PLUS";
19687 /* Quad offsets are restricted and can't handle normal addresses. */
19688 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
19691 type
= "vector d-form lo_sum";
19694 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
19696 fail_msg
= "bad LO_SUM";
19700 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
19707 /* Static addresses need to create a TOC entry. */
19711 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
19714 type
= "vector d-form lo_sum #2";
19720 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
19724 /* TOC references look like offsetable memory. */
19726 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
19728 fail_msg
= "bad UNSPEC";
19732 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
19735 type
= "vector d-form lo_sum #3";
19738 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
19741 type
= "toc reference";
19747 fail_msg
= "bad address";
19752 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
19754 if (extra_cost
< 0)
19756 "rs6000_secondary_reload_memory error: mode = %s, "
19757 "class = %s, addr_mask = '%s', %s\n",
19758 GET_MODE_NAME (mode
),
19759 reg_class_names
[rclass
],
19760 rs6000_debug_addr_mask (addr_mask
, false),
19761 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
19765 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19766 "addr_mask = '%s', extra cost = %d, %s\n",
19767 GET_MODE_NAME (mode
),
19768 reg_class_names
[rclass
],
19769 rs6000_debug_addr_mask (addr_mask
, false),
19771 (type
) ? type
: "<none>");
19779 /* Helper function for rs6000_secondary_reload to return true if a move to a
19780 different register classe is really a simple move. */
19783 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
19784 enum rs6000_reg_type from_type
,
19787 int size
= GET_MODE_SIZE (mode
);
19789 /* Add support for various direct moves available. In this function, we only
19790 look at cases where we don't need any extra registers, and one or more
19791 simple move insns are issued. Originally small integers are not allowed
19792 in FPR/VSX registers. Single precision binary floating is not a simple
19793 move because we need to convert to the single precision memory layout.
19794 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19795 need special direct move handling, which we do not support yet. */
19796 if (TARGET_DIRECT_MOVE
19797 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
19798 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
19800 if (TARGET_POWERPC64
)
19802 /* ISA 2.07: MTVSRD or MVFVSRD. */
19806 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19807 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
19811 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19812 if (TARGET_VSX_SMALL_INTEGER
)
19814 if (mode
== SImode
)
19817 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
19821 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19822 if (mode
== SDmode
)
19826 /* Power6+: MFTGPR or MFFGPR. */
19827 else if (TARGET_MFPGPR
&& TARGET_POWERPC64
&& size
== 8
19828 && ((to_type
== GPR_REG_TYPE
&& from_type
== FPR_REG_TYPE
)
19829 || (to_type
== FPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
19832 /* Move to/from SPR. */
19833 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
19834 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
19835 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
19841 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19842 special direct moves that involve allocating an extra register, return the
19843 insn code of the helper function if there is such a function or
19844 CODE_FOR_nothing if not. */
19847 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
19848 enum rs6000_reg_type from_type
,
19850 secondary_reload_info
*sri
,
19854 enum insn_code icode
= CODE_FOR_nothing
;
19856 int size
= GET_MODE_SIZE (mode
);
19858 if (TARGET_POWERPC64
&& size
== 16)
19860 /* Handle moving 128-bit values from GPRs to VSX point registers on
19861 ISA 2.07 (power8, power9) when running in 64-bit mode using
19862 XXPERMDI to glue the two 64-bit values back together. */
19863 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
19865 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
19866 icode
= reg_addr
[mode
].reload_vsx_gpr
;
19869 /* Handle moving 128-bit values from VSX point registers to GPRs on
19870 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19871 bottom 64-bit value. */
19872 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
19874 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
19875 icode
= reg_addr
[mode
].reload_gpr_vsx
;
19879 else if (TARGET_POWERPC64
&& mode
== SFmode
)
19881 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
19883 cost
= 3; /* xscvdpspn, mfvsrd, and. */
19884 icode
= reg_addr
[mode
].reload_gpr_vsx
;
19887 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
19889 cost
= 2; /* mtvsrz, xscvspdpn. */
19890 icode
= reg_addr
[mode
].reload_vsx_gpr
;
19894 else if (!TARGET_POWERPC64
&& size
== 8)
19896 /* Handle moving 64-bit values from GPRs to floating point registers on
19897 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19898 32-bit values back together. Altivec register classes must be handled
19899 specially since a different instruction is used, and the secondary
19900 reload support requires a single instruction class in the scratch
19901 register constraint. However, right now TFmode is not allowed in
19902 Altivec registers, so the pattern will never match. */
19903 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
19905 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
19906 icode
= reg_addr
[mode
].reload_fpr_gpr
;
19910 if (icode
!= CODE_FOR_nothing
)
19915 sri
->icode
= icode
;
19916 sri
->extra_cost
= cost
;
19923 /* Return whether a move between two register classes can be done either
19924 directly (simple move) or via a pattern that uses a single extra temporary
19925 (using ISA 2.07's direct move in this case. */
19928 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
19929 enum rs6000_reg_type from_type
,
19931 secondary_reload_info
*sri
,
19934 /* Fall back to load/store reloads if either type is not a register. */
19935 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
19938 /* If we haven't allocated registers yet, assume the move can be done for the
19939 standard register types. */
19940 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
19941 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
19942 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
19945 /* Moves to the same set of registers is a simple move for non-specialized
19947 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
19950 /* Check whether a simple move can be done directly. */
19951 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
19955 sri
->icode
= CODE_FOR_nothing
;
19956 sri
->extra_cost
= 0;
19961 /* Now check if we can do it in a few steps. */
19962 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
19966 /* Inform reload about cases where moving X with a mode MODE to a register in
19967 RCLASS requires an extra scratch or immediate register. Return the class
19968 needed for the immediate register.
19970 For VSX and Altivec, we may need a register to convert sp+offset into
19973 For misaligned 64-bit gpr loads and stores we need a register to
19974 convert an offset address to indirect. */
19977 rs6000_secondary_reload (bool in_p
,
19979 reg_class_t rclass_i
,
19981 secondary_reload_info
*sri
)
19983 enum reg_class rclass
= (enum reg_class
) rclass_i
;
19984 reg_class_t ret
= ALL_REGS
;
19985 enum insn_code icode
;
19986 bool default_p
= false;
19987 bool done_p
= false;
19989 /* Allow subreg of memory before/during reload. */
19990 bool memory_p
= (MEM_P (x
)
19991 || (!reload_completed
&& GET_CODE (x
) == SUBREG
19992 && MEM_P (SUBREG_REG (x
))));
19994 sri
->icode
= CODE_FOR_nothing
;
19995 sri
->t_icode
= CODE_FOR_nothing
;
19996 sri
->extra_cost
= 0;
19998 ? reg_addr
[mode
].reload_load
19999 : reg_addr
[mode
].reload_store
);
20001 if (REG_P (x
) || register_operand (x
, mode
))
20003 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
20004 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
20005 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
20008 std::swap (to_type
, from_type
);
20010 /* Can we do a direct move of some sort? */
20011 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
20014 icode
= (enum insn_code
)sri
->icode
;
20021 /* Make sure 0.0 is not reloaded or forced into memory. */
20022 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
20029 /* If this is a scalar floating point value and we want to load it into the
20030 traditional Altivec registers, do it via a move via a traditional floating
20031 point register, unless we have D-form addressing. Also make sure that
20032 non-zero constants use a FPR. */
20033 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
20034 && !mode_supports_vmx_dform (mode
)
20035 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
20036 && (memory_p
|| (GET_CODE (x
) == CONST_DOUBLE
)))
20043 /* Handle reload of load/stores if we have reload helper functions. */
20044 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
20046 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
20049 if (extra_cost
>= 0)
20053 if (extra_cost
> 0)
20055 sri
->extra_cost
= extra_cost
;
20056 sri
->icode
= icode
;
20061 /* Handle unaligned loads and stores of integer registers. */
20062 if (!done_p
&& TARGET_POWERPC64
20063 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
20065 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
20067 rtx addr
= XEXP (x
, 0);
20068 rtx off
= address_offset (addr
);
20070 if (off
!= NULL_RTX
)
20072 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
20073 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
20075 /* We need a secondary reload when our legitimate_address_p
20076 says the address is good (as otherwise the entire address
20077 will be reloaded), and the offset is not a multiple of
20078 four or we have an address wrap. Address wrap will only
20079 occur for LO_SUMs since legitimate_offset_address_p
20080 rejects addresses for 16-byte mems that will wrap. */
20081 if (GET_CODE (addr
) == LO_SUM
20082 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20083 && ((offset
& 3) != 0
20084 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
20085 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
20086 && (offset
& 3) != 0))
20088 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20090 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
20091 : CODE_FOR_reload_di_load
);
20093 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
20094 : CODE_FOR_reload_di_store
);
20095 sri
->extra_cost
= 2;
20106 if (!done_p
&& !TARGET_POWERPC64
20107 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
20109 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
20111 rtx addr
= XEXP (x
, 0);
20112 rtx off
= address_offset (addr
);
20114 if (off
!= NULL_RTX
)
20116 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
20117 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
20119 /* We need a secondary reload when our legitimate_address_p
20120 says the address is good (as otherwise the entire address
20121 will be reloaded), and we have a wrap.
20123 legitimate_lo_sum_address_p allows LO_SUM addresses to
20124 have any offset so test for wrap in the low 16 bits.
20126 legitimate_offset_address_p checks for the range
20127 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20128 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20129 [0x7ff4,0x7fff] respectively, so test for the
20130 intersection of these ranges, [0x7ffc,0x7fff] and
20131 [0x7ff4,0x7ff7] respectively.
20133 Note that the address we see here may have been
20134 manipulated by legitimize_reload_address. */
20135 if (GET_CODE (addr
) == LO_SUM
20136 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
20137 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
20140 sri
->icode
= CODE_FOR_reload_si_load
;
20142 sri
->icode
= CODE_FOR_reload_si_store
;
20143 sri
->extra_cost
= 2;
20158 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
20160 gcc_assert (ret
!= ALL_REGS
);
20162 if (TARGET_DEBUG_ADDR
)
20165 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20167 reg_class_names
[ret
],
20168 in_p
? "true" : "false",
20169 reg_class_names
[rclass
],
20170 GET_MODE_NAME (mode
));
20172 if (reload_completed
)
20173 fputs (", after reload", stderr
);
20176 fputs (", done_p not set", stderr
);
20179 fputs (", default secondary reload", stderr
);
20181 if (sri
->icode
!= CODE_FOR_nothing
)
20182 fprintf (stderr
, ", reload func = %s, extra cost = %d",
20183 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
20185 else if (sri
->extra_cost
> 0)
20186 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
20188 fputs ("\n", stderr
);
20195 /* Better tracing for rs6000_secondary_reload_inner. */
20198 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
20203 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
20205 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
20206 store_p
? "store" : "load");
20209 set
= gen_rtx_SET (mem
, reg
);
20211 set
= gen_rtx_SET (reg
, mem
);
20213 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
20214 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
20217 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
20218 ATTRIBUTE_NORETURN
;
20221 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
20224 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
20225 gcc_unreachable ();
20228 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20229 reload helper functions. These were identified in
20230 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20231 reload, it calls the insns:
20232 reload_<RELOAD:mode>_<P:mptrsize>_store
20233 reload_<RELOAD:mode>_<P:mptrsize>_load
20235 which in turn calls this function, to do whatever is necessary to create
20236 valid addresses. */
20239 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
20241 int regno
= true_regnum (reg
);
20242 machine_mode mode
= GET_MODE (reg
);
20243 addr_mask_type addr_mask
;
20246 rtx op_reg
, op0
, op1
;
20251 if (regno
< 0 || regno
>= FIRST_PSEUDO_REGISTER
|| !MEM_P (mem
)
20252 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
20253 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20255 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
20256 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
20258 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
20259 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
20261 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
20262 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
20265 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20267 /* Make sure the mode is valid in this register class. */
20268 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
20269 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20271 if (TARGET_DEBUG_ADDR
)
20272 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
20274 new_addr
= addr
= XEXP (mem
, 0);
20275 switch (GET_CODE (addr
))
20277 /* Does the register class support auto update forms for this mode? If
20278 not, do the update now. We don't need a scratch register, since the
20279 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20282 op_reg
= XEXP (addr
, 0);
20283 if (!base_reg_operand (op_reg
, Pmode
))
20284 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20286 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
20288 emit_insn (gen_add2_insn (op_reg
, GEN_INT (GET_MODE_SIZE (mode
))));
20294 op0
= XEXP (addr
, 0);
20295 op1
= XEXP (addr
, 1);
20296 if (!base_reg_operand (op0
, Pmode
)
20297 || GET_CODE (op1
) != PLUS
20298 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
20299 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20301 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
20303 emit_insn (gen_rtx_SET (op0
, op1
));
20308 /* Do we need to simulate AND -16 to clear the bottom address bits used
20309 in VMX load/stores? */
20311 op0
= XEXP (addr
, 0);
20312 op1
= XEXP (addr
, 1);
20313 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
20315 if (REG_P (op0
) || GET_CODE (op0
) == SUBREG
)
20318 else if (GET_CODE (op1
) == PLUS
)
20320 emit_insn (gen_rtx_SET (scratch
, op1
));
20325 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20327 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
20328 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
20329 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
20330 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
20331 new_addr
= scratch
;
20335 /* If this is an indirect address, make sure it is a base register. */
20338 if (!base_reg_operand (addr
, GET_MODE (addr
)))
20340 emit_insn (gen_rtx_SET (scratch
, addr
));
20341 new_addr
= scratch
;
20345 /* If this is an indexed address, make sure the register class can handle
20346 indexed addresses for this mode. */
20348 op0
= XEXP (addr
, 0);
20349 op1
= XEXP (addr
, 1);
20350 if (!base_reg_operand (op0
, Pmode
))
20351 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20353 else if (int_reg_operand (op1
, Pmode
))
20355 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
20357 emit_insn (gen_rtx_SET (scratch
, addr
));
20358 new_addr
= scratch
;
20362 else if (mode_supports_vsx_dform_quad (mode
) && CONST_INT_P (op1
))
20364 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
20365 || !quad_address_p (addr
, mode
, false))
20367 emit_insn (gen_rtx_SET (scratch
, addr
));
20368 new_addr
= scratch
;
20372 /* Make sure the register class can handle offset addresses. */
20373 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
20375 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
20377 emit_insn (gen_rtx_SET (scratch
, addr
));
20378 new_addr
= scratch
;
20383 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20388 op0
= XEXP (addr
, 0);
20389 op1
= XEXP (addr
, 1);
20390 if (!base_reg_operand (op0
, Pmode
))
20391 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20393 else if (int_reg_operand (op1
, Pmode
))
20395 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
20397 emit_insn (gen_rtx_SET (scratch
, addr
));
20398 new_addr
= scratch
;
20402 /* Quad offsets are restricted and can't handle normal addresses. */
20403 else if (mode_supports_vsx_dform_quad (mode
))
20405 emit_insn (gen_rtx_SET (scratch
, addr
));
20406 new_addr
= scratch
;
20409 /* Make sure the register class can handle offset addresses. */
20410 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
20412 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
20414 emit_insn (gen_rtx_SET (scratch
, addr
));
20415 new_addr
= scratch
;
20420 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20427 rs6000_emit_move (scratch
, addr
, Pmode
);
20428 new_addr
= scratch
;
20432 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
20435 /* Adjust the address if it changed. */
20436 if (addr
!= new_addr
)
20438 mem
= replace_equiv_address_nv (mem
, new_addr
);
20439 if (TARGET_DEBUG_ADDR
)
20440 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20443 /* Now create the move. */
20445 emit_insn (gen_rtx_SET (mem
, reg
));
20447 emit_insn (gen_rtx_SET (reg
, mem
));
20452 /* Convert reloads involving 64-bit gprs and misaligned offset
20453 addressing, or multiple 32-bit gprs and offsets that are too large,
20454 to use indirect addressing. */
20457 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
20459 int regno
= true_regnum (reg
);
20460 enum reg_class rclass
;
20462 rtx scratch_or_premodify
= scratch
;
20464 if (TARGET_DEBUG_ADDR
)
20466 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
20467 store_p
? "store" : "load");
20468 fprintf (stderr
, "reg:\n");
20470 fprintf (stderr
, "mem:\n");
20472 fprintf (stderr
, "scratch:\n");
20473 debug_rtx (scratch
);
20476 gcc_assert (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
);
20477 gcc_assert (GET_CODE (mem
) == MEM
);
20478 rclass
= REGNO_REG_CLASS (regno
);
20479 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
20480 addr
= XEXP (mem
, 0);
20482 if (GET_CODE (addr
) == PRE_MODIFY
)
20484 gcc_assert (REG_P (XEXP (addr
, 0))
20485 && GET_CODE (XEXP (addr
, 1)) == PLUS
20486 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
20487 scratch_or_premodify
= XEXP (addr
, 0);
20488 if (!HARD_REGISTER_P (scratch_or_premodify
))
20489 /* If we have a pseudo here then reload will have arranged
20490 to have it replaced, but only in the original insn.
20491 Use the replacement here too. */
20492 scratch_or_premodify
= find_replacement (&XEXP (addr
, 0));
20494 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20495 expressions from the original insn, without unsharing them.
20496 Any RTL that points into the original insn will of course
20497 have register replacements applied. That is why we don't
20498 need to look for replacements under the PLUS. */
20499 addr
= XEXP (addr
, 1);
20501 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
20503 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
20505 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
20507 /* Now create the move. */
20509 emit_insn (gen_rtx_SET (mem
, reg
));
20511 emit_insn (gen_rtx_SET (reg
, mem
));
20516 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
20517 this function has any SDmode references. If we are on a power7 or later, we
20518 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
20519 can load/store the value. */
20522 rs6000_alloc_sdmode_stack_slot (void)
20526 gimple_stmt_iterator gsi
;
20528 gcc_assert (cfun
->machine
->sdmode_stack_slot
== NULL_RTX
);
20529 /* We use a different approach for dealing with the secondary
20534 if (TARGET_NO_SDMODE_STACK
)
20537 FOR_EACH_BB_FN (bb
, cfun
)
20538 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
20540 tree ret
= walk_gimple_op (gsi_stmt (gsi
), rs6000_check_sdmode
, NULL
);
20543 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
20544 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
20550 /* Check for any SDmode parameters of the function. */
20551 for (t
= DECL_ARGUMENTS (cfun
->decl
); t
; t
= DECL_CHAIN (t
))
20553 if (TREE_TYPE (t
) == error_mark_node
)
20556 if (TYPE_MODE (TREE_TYPE (t
)) == SDmode
20557 || TYPE_MODE (DECL_ARG_TYPE (t
)) == SDmode
)
20559 rtx stack
= assign_stack_local (DDmode
, GET_MODE_SIZE (DDmode
), 0);
20560 cfun
->machine
->sdmode_stack_slot
= adjust_address_nv (stack
,
20568 rs6000_instantiate_decls (void)
20570 if (cfun
->machine
->sdmode_stack_slot
!= NULL_RTX
)
20571 instantiate_decl_rtl (cfun
->machine
->sdmode_stack_slot
);
20574 /* Given an rtx X being reloaded into a reg required to be
20575 in class CLASS, return the class of reg to actually use.
20576 In general this is just CLASS; but on some machines
20577 in some cases it is preferable to use a more restrictive class.
20579 On the RS/6000, we have to return NO_REGS when we want to reload a
20580 floating-point CONST_DOUBLE to force it to be copied to memory.
20582 We also don't want to reload integer values into floating-point
20583 registers if we can at all help it. In fact, this can
20584 cause reload to die, if it tries to generate a reload of CTR
20585 into a FP register and discovers it doesn't have the memory location
20588 ??? Would it be a good idea to have reload do the converse, that is
20589 try to reload floating modes into FP registers if possible?
20592 static enum reg_class
20593 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
20595 machine_mode mode
= GET_MODE (x
);
20596 bool is_constant
= CONSTANT_P (x
);
20598 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20599 reload class for it. */
20600 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
20601 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
20604 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
20605 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
20608 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20609 the reloading of address expressions using PLUS into floating point
20611 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
20615 /* Zero is always allowed in all VSX registers. */
20616 if (x
== CONST0_RTX (mode
))
20619 /* If this is a vector constant that can be formed with a few Altivec
20620 instructions, we want altivec registers. */
20621 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
20622 return ALTIVEC_REGS
;
20624 /* If this is an integer constant that can easily be loaded into
20625 vector registers, allow it. */
20626 if (CONST_INT_P (x
))
20628 HOST_WIDE_INT value
= INTVAL (x
);
20630 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20631 2.06 can generate it in the Altivec registers with
20635 if (TARGET_P8_VECTOR
)
20637 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
20638 return ALTIVEC_REGS
;
20643 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20644 a sign extend in the Altivec registers. */
20645 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
20646 && TARGET_VSX_SMALL_INTEGER
20647 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
20648 return ALTIVEC_REGS
;
20651 /* Force constant to memory. */
20655 /* D-form addressing can easily reload the value. */
20656 if (mode_supports_vmx_dform (mode
)
20657 || mode_supports_vsx_dform_quad (mode
))
20660 /* If this is a scalar floating point value and we don't have D-form
20661 addressing, prefer the traditional floating point registers so that we
20662 can use D-form (register+offset) addressing. */
20663 if (rclass
== VSX_REGS
20664 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
20667 /* Prefer the Altivec registers if Altivec is handling the vector
20668 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20670 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
20671 || mode
== V1TImode
)
20672 return ALTIVEC_REGS
;
20677 if (is_constant
|| GET_CODE (x
) == PLUS
)
20679 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
20680 return GENERAL_REGS
;
20681 if (reg_class_subset_p (BASE_REGS
, rclass
))
20686 if (GET_MODE_CLASS (mode
) == MODE_INT
&& rclass
== NON_SPECIAL_REGS
)
20687 return GENERAL_REGS
;
20692 /* Debug version of rs6000_preferred_reload_class. */
20693 static enum reg_class
20694 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
20696 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
20699 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20701 reg_class_names
[ret
], reg_class_names
[rclass
],
20702 GET_MODE_NAME (GET_MODE (x
)));
20708 /* If we are copying between FP or AltiVec registers and anything else, we need
20709 a memory location. The exception is when we are targeting ppc64 and the
20710 move to/from fpr to gpr instructions are available. Also, under VSX, you
20711 can copy vector registers from the FP register set to the Altivec register
20712 set and vice versa. */
20715 rs6000_secondary_memory_needed (enum reg_class from_class
,
20716 enum reg_class to_class
,
20719 enum rs6000_reg_type from_type
, to_type
;
20720 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
20721 || (to_class
== ALTIVEC_REGS
));
20723 /* If a simple/direct move is available, we don't need secondary memory */
20724 from_type
= reg_class_to_reg_type
[(int)from_class
];
20725 to_type
= reg_class_to_reg_type
[(int)to_class
];
20727 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
20728 (secondary_reload_info
*)0, altivec_p
))
20731 /* If we have a floating point or vector register class, we need to use
20732 memory to transfer the data. */
20733 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
20739 /* Debug version of rs6000_secondary_memory_needed. */
20741 rs6000_debug_secondary_memory_needed (enum reg_class from_class
,
20742 enum reg_class to_class
,
20745 bool ret
= rs6000_secondary_memory_needed (from_class
, to_class
, mode
);
20748 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20749 "to_class = %s, mode = %s\n",
20750 ret
? "true" : "false",
20751 reg_class_names
[from_class
],
20752 reg_class_names
[to_class
],
20753 GET_MODE_NAME (mode
));
20758 /* Return the register class of a scratch register needed to copy IN into
20759 or out of a register in RCLASS in MODE. If it can be done directly,
20760 NO_REGS is returned. */
20762 static enum reg_class
20763 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
20768 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
20770 && MACHOPIC_INDIRECT
20774 /* We cannot copy a symbolic operand directly into anything
20775 other than BASE_REGS for TARGET_ELF. So indicate that a
20776 register from BASE_REGS is needed as an intermediate
20779 On Darwin, pic addresses require a load from memory, which
20780 needs a base register. */
20781 if (rclass
!= BASE_REGS
20782 && (GET_CODE (in
) == SYMBOL_REF
20783 || GET_CODE (in
) == HIGH
20784 || GET_CODE (in
) == LABEL_REF
20785 || GET_CODE (in
) == CONST
))
20789 if (GET_CODE (in
) == REG
)
20791 regno
= REGNO (in
);
20792 if (regno
>= FIRST_PSEUDO_REGISTER
)
20794 regno
= true_regnum (in
);
20795 if (regno
>= FIRST_PSEUDO_REGISTER
)
20799 else if (GET_CODE (in
) == SUBREG
)
20801 regno
= true_regnum (in
);
20802 if (regno
>= FIRST_PSEUDO_REGISTER
)
20808 /* If we have VSX register moves, prefer moving scalar values between
20809 Altivec registers and GPR by going via an FPR (and then via memory)
20810 instead of reloading the secondary memory address for Altivec moves. */
20812 && GET_MODE_SIZE (mode
) < 16
20813 && !mode_supports_vmx_dform (mode
)
20814 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
20815 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
20816 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
20817 && (regno
>= 0 && INT_REGNO_P (regno
)))))
20820 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20822 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
20823 || (regno
>= 0 && INT_REGNO_P (regno
)))
20826 /* Constants, memory, and VSX registers can go into VSX registers (both the
20827 traditional floating point and the altivec registers). */
20828 if (rclass
== VSX_REGS
20829 && (regno
== -1 || VSX_REGNO_P (regno
)))
20832 /* Constants, memory, and FP registers can go into FP registers. */
20833 if ((regno
== -1 || FP_REGNO_P (regno
))
20834 && (rclass
== FLOAT_REGS
|| rclass
== NON_SPECIAL_REGS
))
20835 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
20837 /* Memory, and AltiVec registers can go into AltiVec registers. */
20838 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
20839 && rclass
== ALTIVEC_REGS
)
20842 /* We can copy among the CR registers. */
20843 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
20844 && regno
>= 0 && CR_REGNO_P (regno
))
20847 /* Otherwise, we need GENERAL_REGS. */
20848 return GENERAL_REGS
;
20851 /* Debug version of rs6000_secondary_reload_class. */
20852 static enum reg_class
20853 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
20854 machine_mode mode
, rtx in
)
20856 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
20858 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20859 "mode = %s, input rtx:\n",
20860 reg_class_names
[ret
], reg_class_names
[rclass
],
20861 GET_MODE_NAME (mode
));
20867 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
20870 rs6000_cannot_change_mode_class (machine_mode from
,
20872 enum reg_class rclass
)
20874 unsigned from_size
= GET_MODE_SIZE (from
);
20875 unsigned to_size
= GET_MODE_SIZE (to
);
20877 if (from_size
!= to_size
)
20879 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
20881 if (reg_classes_intersect_p (xclass
, rclass
))
20883 unsigned to_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][to
];
20884 unsigned from_nregs
= hard_regno_nregs
[FIRST_FPR_REGNO
][from
];
20885 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
20886 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
20888 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20889 single register under VSX because the scalar part of the register
20890 is in the upper 64-bits, and not the lower 64-bits. Types like
20891 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20892 IEEE floating point can't overlap, and neither can small
20895 if (to_float128_vector_p
&& from_float128_vector_p
)
20898 else if (to_float128_vector_p
|| from_float128_vector_p
)
20901 /* TDmode in floating-mode registers must always go into a register
20902 pair with the most significant word in the even-numbered register
20903 to match ISA requirements. In little-endian mode, this does not
20904 match subreg numbering, so we cannot allow subregs. */
20905 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
20908 if (from_size
< 8 || to_size
< 8)
20911 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
20914 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
20923 /* Since the VSX register set includes traditional floating point registers
20924 and altivec registers, just check for the size being different instead of
20925 trying to check whether the modes are vector modes. Otherwise it won't
20926 allow say DF and DI to change classes. For types like TFmode and TDmode
20927 that take 2 64-bit registers, rather than a single 128-bit register, don't
20928 allow subregs of those types to other 128 bit types. */
20929 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
20931 unsigned num_regs
= (from_size
+ 15) / 16;
20932 if (hard_regno_nregs
[FIRST_FPR_REGNO
][to
] > num_regs
20933 || hard_regno_nregs
[FIRST_FPR_REGNO
][from
] > num_regs
)
20936 return (from_size
!= 8 && from_size
!= 16);
20939 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
20940 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
20946 /* Debug version of rs6000_cannot_change_mode_class. */
20948 rs6000_debug_cannot_change_mode_class (machine_mode from
,
20950 enum reg_class rclass
)
20952 bool ret
= rs6000_cannot_change_mode_class (from
, to
, rclass
);
20955 "rs6000_cannot_change_mode_class, return %s, from = %s, "
20956 "to = %s, rclass = %s\n",
20957 ret
? "true" : "false",
20958 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
20959 reg_class_names
[rclass
]);
20964 /* Return a string to do a move operation of 128 bits of data. */
20967 rs6000_output_move_128bit (rtx operands
[])
20969 rtx dest
= operands
[0];
20970 rtx src
= operands
[1];
20971 machine_mode mode
= GET_MODE (dest
);
20974 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
20975 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
20979 dest_regno
= REGNO (dest
);
20980 dest_gpr_p
= INT_REGNO_P (dest_regno
);
20981 dest_fp_p
= FP_REGNO_P (dest_regno
);
20982 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
20983 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
20988 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
20993 src_regno
= REGNO (src
);
20994 src_gpr_p
= INT_REGNO_P (src_regno
);
20995 src_fp_p
= FP_REGNO_P (src_regno
);
20996 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
20997 src_vsx_p
= src_fp_p
| src_vmx_p
;
21002 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
21005 /* Register moves. */
21006 if (dest_regno
>= 0 && src_regno
>= 0)
21013 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
21014 return (WORDS_BIG_ENDIAN
21015 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21016 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21018 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
21022 else if (TARGET_VSX
&& dest_vsx_p
)
21025 return "xxlor %x0,%x1,%x1";
21027 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
21028 return (WORDS_BIG_ENDIAN
21029 ? "mtvsrdd %x0,%1,%L1"
21030 : "mtvsrdd %x0,%L1,%1");
21032 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
21036 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
21037 return "vor %0,%1,%1";
21039 else if (dest_fp_p
&& src_fp_p
)
21044 else if (dest_regno
>= 0 && MEM_P (src
))
21048 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
21054 else if (TARGET_ALTIVEC
&& dest_vmx_p
21055 && altivec_indexed_or_indirect_operand (src
, mode
))
21056 return "lvx %0,%y1";
21058 else if (TARGET_VSX
&& dest_vsx_p
)
21060 if (mode_supports_vsx_dform_quad (mode
)
21061 && quad_address_p (XEXP (src
, 0), mode
, true))
21062 return "lxv %x0,%1";
21064 else if (TARGET_P9_VECTOR
)
21065 return "lxvx %x0,%y1";
21067 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
21068 return "lxvw4x %x0,%y1";
21071 return "lxvd2x %x0,%y1";
21074 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
21075 return "lvx %0,%y1";
21077 else if (dest_fp_p
)
21082 else if (src_regno
>= 0 && MEM_P (dest
))
21086 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
21087 return "stq %1,%0";
21092 else if (TARGET_ALTIVEC
&& src_vmx_p
21093 && altivec_indexed_or_indirect_operand (src
, mode
))
21094 return "stvx %1,%y0";
21096 else if (TARGET_VSX
&& src_vsx_p
)
21098 if (mode_supports_vsx_dform_quad (mode
)
21099 && quad_address_p (XEXP (dest
, 0), mode
, true))
21100 return "stxv %x1,%0";
21102 else if (TARGET_P9_VECTOR
)
21103 return "stxvx %x1,%y0";
21105 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
21106 return "stxvw4x %x1,%y0";
21109 return "stxvd2x %x1,%y0";
21112 else if (TARGET_ALTIVEC
&& src_vmx_p
)
21113 return "stvx %1,%y0";
21120 else if (dest_regno
>= 0
21121 && (GET_CODE (src
) == CONST_INT
21122 || GET_CODE (src
) == CONST_WIDE_INT
21123 || GET_CODE (src
) == CONST_DOUBLE
21124 || GET_CODE (src
) == CONST_VECTOR
))
21129 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
21130 || (dest_vsx_p
&& TARGET_VSX
))
21131 return output_vec_const_move (operands
);
21134 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
21137 /* Validate a 128-bit move. */
21139 rs6000_move_128bit_ok_p (rtx operands
[])
21141 machine_mode mode
= GET_MODE (operands
[0]);
21142 return (gpc_reg_operand (operands
[0], mode
)
21143 || gpc_reg_operand (operands
[1], mode
));
21146 /* Return true if a 128-bit move needs to be split. */
21148 rs6000_split_128bit_ok_p (rtx operands
[])
21150 if (!reload_completed
)
21153 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
21156 if (quad_load_store_p (operands
[0], operands
[1]))
21163 /* Given a comparison operation, return the bit number in CCR to test. We
21164 know this is a valid comparison.
21166 SCC_P is 1 if this is for an scc. That means that %D will have been
21167 used instead of %C, so the bits will be in different places.
21169 Return -1 if OP isn't a valid comparison for some reason. */
21172 ccr_bit (rtx op
, int scc_p
)
21174 enum rtx_code code
= GET_CODE (op
);
21175 machine_mode cc_mode
;
21180 if (!COMPARISON_P (op
))
21183 reg
= XEXP (op
, 0);
21185 gcc_assert (GET_CODE (reg
) == REG
&& CR_REGNO_P (REGNO (reg
)));
21187 cc_mode
= GET_MODE (reg
);
21188 cc_regnum
= REGNO (reg
);
21189 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
21191 validate_condition_mode (code
, cc_mode
);
21193 /* When generating a sCOND operation, only positive conditions are
21196 || code
== EQ
|| code
== GT
|| code
== LT
|| code
== UNORDERED
21197 || code
== GTU
|| code
== LTU
);
21202 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
21204 return base_bit
+ 2;
21205 case GT
: case GTU
: case UNLE
:
21206 return base_bit
+ 1;
21207 case LT
: case LTU
: case UNGE
:
21209 case ORDERED
: case UNORDERED
:
21210 return base_bit
+ 3;
21213 /* If scc, we will have done a cror to put the bit in the
21214 unordered position. So test that bit. For integer, this is ! LT
21215 unless this is an scc insn. */
21216 return scc_p
? base_bit
+ 3 : base_bit
;
21219 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
21222 gcc_unreachable ();
21226 /* Return the GOT register. */
21229 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
21231 /* The second flow pass currently (June 1999) can't update
21232 regs_ever_live without disturbing other parts of the compiler, so
21233 update it here to make the prolog/epilogue code happy. */
21234 if (!can_create_pseudo_p ()
21235 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
21236 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
21238 crtl
->uses_pic_offset_table
= 1;
21240 return pic_offset_table_rtx
;
21243 static rs6000_stack_t stack_info
;
21245 /* Function to init struct machine_function.
21246 This will be called, via a pointer variable,
21247 from push_function_context. */
21249 static struct machine_function
*
21250 rs6000_init_machine_status (void)
21252 stack_info
.reload_completed
= 0;
21253 return ggc_cleared_alloc
<machine_function
> ();
21256 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21258 /* Write out a function code label. */
21261 rs6000_output_function_entry (FILE *file
, const char *fname
)
21263 if (fname
[0] != '.')
21265 switch (DEFAULT_ABI
)
21268 gcc_unreachable ();
21274 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
21284 RS6000_OUTPUT_BASENAME (file
, fname
);
21287 /* Print an operand. Recognize special options, documented below. */
21290 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21291 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21293 #define SMALL_DATA_RELOC "sda21"
21294 #define SMALL_DATA_REG 0
21298 print_operand (FILE *file
, rtx x
, int code
)
21301 unsigned HOST_WIDE_INT uval
;
21305 /* %a is output_address. */
21307 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21311 /* Like 'J' but get to the GT bit only. */
21312 gcc_assert (REG_P (x
));
21314 /* Bit 1 is GT bit. */
21315 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
21317 /* Add one for shift count in rlinm for scc. */
21318 fprintf (file
, "%d", i
+ 1);
21322 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21325 output_operand_lossage ("invalid %%e value");
21330 if ((uval
& 0xffff) == 0 && uval
!= 0)
21335 /* X is a CR register. Print the number of the EQ bit of the CR */
21336 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
21337 output_operand_lossage ("invalid %%E value");
21339 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
21343 /* X is a CR register. Print the shift count needed to move it
21344 to the high-order four bits. */
21345 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
21346 output_operand_lossage ("invalid %%f value");
21348 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
21352 /* Similar, but print the count for the rotate in the opposite
21354 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
21355 output_operand_lossage ("invalid %%F value");
21357 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
21361 /* X is a constant integer. If it is negative, print "m",
21362 otherwise print "z". This is to make an aze or ame insn. */
21363 if (GET_CODE (x
) != CONST_INT
)
21364 output_operand_lossage ("invalid %%G value");
21365 else if (INTVAL (x
) >= 0)
21372 /* If constant, output low-order five bits. Otherwise, write
21375 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
21377 print_operand (file
, x
, 0);
21381 /* If constant, output low-order six bits. Otherwise, write
21384 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
21386 print_operand (file
, x
, 0);
21390 /* Print `i' if this is a constant, else nothing. */
21396 /* Write the bit number in CCR for jump. */
21397 i
= ccr_bit (x
, 0);
21399 output_operand_lossage ("invalid %%j code");
21401 fprintf (file
, "%d", i
);
21405 /* Similar, but add one for shift count in rlinm for scc and pass
21406 scc flag to `ccr_bit'. */
21407 i
= ccr_bit (x
, 1);
21409 output_operand_lossage ("invalid %%J code");
21411 /* If we want bit 31, write a shift count of zero, not 32. */
21412 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
21416 /* X must be a constant. Write the 1's complement of the
21419 output_operand_lossage ("invalid %%k value");
21421 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
21425 /* X must be a symbolic constant on ELF. Write an
21426 expression suitable for an 'addi' that adds in the low 16
21427 bits of the MEM. */
21428 if (GET_CODE (x
) == CONST
)
21430 if (GET_CODE (XEXP (x
, 0)) != PLUS
21431 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) != SYMBOL_REF
21432 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
21433 || GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
21434 output_operand_lossage ("invalid %%K value");
21436 print_operand_address (file
, x
);
21437 fputs ("@l", file
);
21440 /* %l is output_asm_label. */
21443 /* Write second word of DImode or DFmode reference. Works on register
21444 or non-indexed memory only. */
21446 fputs (reg_names
[REGNO (x
) + 1], file
);
21447 else if (MEM_P (x
))
21449 machine_mode mode
= GET_MODE (x
);
21450 /* Handle possible auto-increment. Since it is pre-increment and
21451 we have already done it, we can just use an offset of word. */
21452 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
21453 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
21454 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
21456 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
21457 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
21460 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
21464 if (small_data_operand (x
, GET_MODE (x
)))
21465 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
21466 reg_names
[SMALL_DATA_REG
]);
21471 /* Write the number of elements in the vector times 4. */
21472 if (GET_CODE (x
) != PARALLEL
)
21473 output_operand_lossage ("invalid %%N value");
21475 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
21479 /* Similar, but subtract 1 first. */
21480 if (GET_CODE (x
) != PARALLEL
)
21481 output_operand_lossage ("invalid %%O value");
21483 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
21487 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21490 || (i
= exact_log2 (INTVAL (x
))) < 0)
21491 output_operand_lossage ("invalid %%p value");
21493 fprintf (file
, "%d", i
);
21497 /* The operand must be an indirect memory reference. The result
21498 is the register name. */
21499 if (GET_CODE (x
) != MEM
|| GET_CODE (XEXP (x
, 0)) != REG
21500 || REGNO (XEXP (x
, 0)) >= 32)
21501 output_operand_lossage ("invalid %%P value");
21503 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
21507 /* This outputs the logical code corresponding to a boolean
21508 expression. The expression may have one or both operands
21509 negated (if one, only the first one). For condition register
21510 logical operations, it will also treat the negated
21511 CR codes as NOTs, but not handle NOTs of them. */
21513 const char *const *t
= 0;
21515 enum rtx_code code
= GET_CODE (x
);
21516 static const char * const tbl
[3][3] = {
21517 { "and", "andc", "nor" },
21518 { "or", "orc", "nand" },
21519 { "xor", "eqv", "xor" } };
21523 else if (code
== IOR
)
21525 else if (code
== XOR
)
21528 output_operand_lossage ("invalid %%q value");
21530 if (GET_CODE (XEXP (x
, 0)) != NOT
)
21534 if (GET_CODE (XEXP (x
, 1)) == NOT
)
21545 if (! TARGET_MFCRF
)
21551 /* X is a CR register. Print the mask for `mtcrf'. */
21552 if (GET_CODE (x
) != REG
|| ! CR_REGNO_P (REGNO (x
)))
21553 output_operand_lossage ("invalid %%R value");
21555 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
21559 /* Low 5 bits of 32 - value */
21561 output_operand_lossage ("invalid %%s value");
21563 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
21567 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21568 gcc_assert (REG_P (x
) && GET_MODE (x
) == CCmode
);
21570 /* Bit 3 is OV bit. */
21571 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
21573 /* If we want bit 31, write a shift count of zero, not 32. */
21574 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
21578 /* Print the symbolic name of a branch target register. */
21579 if (GET_CODE (x
) != REG
|| (REGNO (x
) != LR_REGNO
21580 && REGNO (x
) != CTR_REGNO
))
21581 output_operand_lossage ("invalid %%T value");
21582 else if (REGNO (x
) == LR_REGNO
)
21583 fputs ("lr", file
);
21585 fputs ("ctr", file
);
21589 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21590 for use in unsigned operand. */
21593 output_operand_lossage ("invalid %%u value");
21598 if ((uval
& 0xffff) == 0)
21601 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
21605 /* High-order 16 bits of constant for use in signed operand. */
21607 output_operand_lossage ("invalid %%v value");
21609 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
21610 (INTVAL (x
) >> 16) & 0xffff);
21614 /* Print `u' if this has an auto-increment or auto-decrement. */
21616 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
21617 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
21618 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
21623 /* Print the trap code for this operand. */
21624 switch (GET_CODE (x
))
21627 fputs ("eq", file
); /* 4 */
21630 fputs ("ne", file
); /* 24 */
21633 fputs ("lt", file
); /* 16 */
21636 fputs ("le", file
); /* 20 */
21639 fputs ("gt", file
); /* 8 */
21642 fputs ("ge", file
); /* 12 */
21645 fputs ("llt", file
); /* 2 */
21648 fputs ("lle", file
); /* 6 */
21651 fputs ("lgt", file
); /* 1 */
21654 fputs ("lge", file
); /* 5 */
21657 gcc_unreachable ();
21662 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21665 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
21666 ((INTVAL (x
) & 0xffff) ^ 0x8000) - 0x8000);
21668 print_operand (file
, x
, 0);
21672 /* X is a FPR or Altivec register used in a VSX context. */
21673 if (GET_CODE (x
) != REG
|| !VSX_REGNO_P (REGNO (x
)))
21674 output_operand_lossage ("invalid %%x value");
21677 int reg
= REGNO (x
);
21678 int vsx_reg
= (FP_REGNO_P (reg
)
21680 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
21682 #ifdef TARGET_REGNAMES
21683 if (TARGET_REGNAMES
)
21684 fprintf (file
, "%%vs%d", vsx_reg
);
21687 fprintf (file
, "%d", vsx_reg
);
21693 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
21694 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
21695 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
21700 /* Like 'L', for third word of TImode/PTImode */
21702 fputs (reg_names
[REGNO (x
) + 2], file
);
21703 else if (MEM_P (x
))
21705 machine_mode mode
= GET_MODE (x
);
21706 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
21707 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
21708 output_address (mode
, plus_constant (Pmode
,
21709 XEXP (XEXP (x
, 0), 0), 8));
21710 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
21711 output_address (mode
, plus_constant (Pmode
,
21712 XEXP (XEXP (x
, 0), 0), 8));
21714 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
21715 if (small_data_operand (x
, GET_MODE (x
)))
21716 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
21717 reg_names
[SMALL_DATA_REG
]);
21722 /* X is a SYMBOL_REF. Write out the name preceded by a
21723 period and without any trailing data in brackets. Used for function
21724 names. If we are configured for System V (or the embedded ABI) on
21725 the PowerPC, do not emit the period, since those systems do not use
21726 TOCs and the like. */
21727 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
21729 /* For macho, check to see if we need a stub. */
21732 const char *name
= XSTR (x
, 0);
21734 if (darwin_emit_branch_islands
21735 && MACHOPIC_INDIRECT
21736 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
21737 name
= machopic_indirection_name (x
, /*stub_p=*/true);
21739 assemble_name (file
, name
);
21741 else if (!DOT_SYMBOLS
)
21742 assemble_name (file
, XSTR (x
, 0));
21744 rs6000_output_function_entry (file
, XSTR (x
, 0));
21748 /* Like 'L', for last word of TImode/PTImode. */
21750 fputs (reg_names
[REGNO (x
) + 3], file
);
21751 else if (MEM_P (x
))
21753 machine_mode mode
= GET_MODE (x
);
21754 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
21755 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
21756 output_address (mode
, plus_constant (Pmode
,
21757 XEXP (XEXP (x
, 0), 0), 12));
21758 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
21759 output_address (mode
, plus_constant (Pmode
,
21760 XEXP (XEXP (x
, 0), 0), 12));
21762 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
21763 if (small_data_operand (x
, GET_MODE (x
)))
21764 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
21765 reg_names
[SMALL_DATA_REG
]);
21769 /* Print AltiVec memory operand. */
21774 gcc_assert (MEM_P (x
));
21778 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x
))
21779 && GET_CODE (tmp
) == AND
21780 && GET_CODE (XEXP (tmp
, 1)) == CONST_INT
21781 && INTVAL (XEXP (tmp
, 1)) == -16)
21782 tmp
= XEXP (tmp
, 0);
21783 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
21784 && GET_CODE (tmp
) == PRE_MODIFY
)
21785 tmp
= XEXP (tmp
, 1);
21787 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
21790 if (GET_CODE (tmp
) != PLUS
21791 || !REG_P (XEXP (tmp
, 0))
21792 || !REG_P (XEXP (tmp
, 1)))
21794 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21798 if (REGNO (XEXP (tmp
, 0)) == 0)
21799 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
21800 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
21802 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
21803 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
21810 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
21811 else if (MEM_P (x
))
21813 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21814 know the width from the mode. */
21815 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
21816 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
21817 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
21818 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
21819 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
21820 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
21821 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
21822 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
21824 output_address (GET_MODE (x
), XEXP (x
, 0));
21828 if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
21829 /* This hack along with a corresponding hack in
21830 rs6000_output_addr_const_extra arranges to output addends
21831 where the assembler expects to find them. eg.
21832 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21833 without this hack would be output as "x@toc+4". We
21835 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
21837 output_addr_const (file
, x
);
21842 if (const char *name
= get_some_local_dynamic_name ())
21843 assemble_name (file
, name
);
21845 output_operand_lossage ("'%%&' used without any "
21846 "local dynamic TLS references");
21850 output_operand_lossage ("invalid %%xn code");
21854 /* Print the address of an operand. */
21857 print_operand_address (FILE *file
, rtx x
)
21860 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
21861 else if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
21862 || GET_CODE (x
) == LABEL_REF
)
21864 output_addr_const (file
, x
);
21865 if (small_data_operand (x
, GET_MODE (x
)))
21866 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
21867 reg_names
[SMALL_DATA_REG
]);
21869 gcc_assert (!TARGET_TOC
);
21871 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
21872 && REG_P (XEXP (x
, 1)))
21874 if (REGNO (XEXP (x
, 0)) == 0)
21875 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
21876 reg_names
[ REGNO (XEXP (x
, 0)) ]);
21878 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
21879 reg_names
[ REGNO (XEXP (x
, 1)) ]);
21881 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
21882 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
21883 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
21884 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
21886 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
21887 && CONSTANT_P (XEXP (x
, 1)))
21889 fprintf (file
, "lo16(");
21890 output_addr_const (file
, XEXP (x
, 1));
21891 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
21895 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
21896 && CONSTANT_P (XEXP (x
, 1)))
21898 output_addr_const (file
, XEXP (x
, 1));
21899 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
21902 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
21904 /* This hack along with a corresponding hack in
21905 rs6000_output_addr_const_extra arranges to output addends
21906 where the assembler expects to find them. eg.
21908 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21909 without this hack would be output as "x@toc+8@l(9)". We
21910 want "x+8@toc@l(9)". */
21911 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
21912 if (GET_CODE (x
) == LO_SUM
)
21913 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
21915 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
21918 gcc_unreachable ();
21921 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21924 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
21926 if (GET_CODE (x
) == UNSPEC
)
21927 switch (XINT (x
, 1))
21929 case UNSPEC_TOCREL
:
21930 gcc_checking_assert (GET_CODE (XVECEXP (x
, 0, 0)) == SYMBOL_REF
21931 && REG_P (XVECEXP (x
, 0, 1))
21932 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
21933 output_addr_const (file
, XVECEXP (x
, 0, 0));
21934 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
21936 if (INTVAL (tocrel_offset_oac
) >= 0)
21937 fprintf (file
, "+");
21938 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
21940 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
21943 assemble_name (file
, toc_label_name
);
21946 else if (TARGET_ELF
)
21947 fputs ("@toc", file
);
21951 case UNSPEC_MACHOPIC_OFFSET
:
21952 output_addr_const (file
, XVECEXP (x
, 0, 0));
21954 machopic_output_function_base_name (file
);
21961 /* Target hook for assembling integer objects. The PowerPC version has
21962 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21963 is defined. It also needs to handle DI-mode objects on 64-bit
21967 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21969 #ifdef RELOCATABLE_NEEDS_FIXUP
21970 /* Special handling for SI values. */
21971 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
21973 static int recurse
= 0;
21975 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21976 the .fixup section. Since the TOC section is already relocated, we
21977 don't need to mark it here. We used to skip the text section, but it
21978 should never be valid for relocated addresses to be placed in the text
21980 if (DEFAULT_ABI
== ABI_V4
21981 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21982 && in_section
!= toc_section
21984 && !CONST_SCALAR_INT_P (x
)
21990 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
21992 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
21993 fprintf (asm_out_file
, "\t.long\t(");
21994 output_addr_const (asm_out_file
, x
);
21995 fprintf (asm_out_file
, ")@fixup\n");
21996 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
21997 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
21998 fprintf (asm_out_file
, "\t.long\t");
21999 assemble_name (asm_out_file
, buf
);
22000 fprintf (asm_out_file
, "\n\t.previous\n");
22004 /* Remove initial .'s to turn a -mcall-aixdesc function
22005 address into the address of the descriptor, not the function
22007 else if (GET_CODE (x
) == SYMBOL_REF
22008 && XSTR (x
, 0)[0] == '.'
22009 && DEFAULT_ABI
== ABI_AIX
)
22011 const char *name
= XSTR (x
, 0);
22012 while (*name
== '.')
22015 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
22019 #endif /* RELOCATABLE_NEEDS_FIXUP */
22020 return default_assemble_integer (x
, size
, aligned_p
);
22023 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22024 /* Emit an assembler directive to set symbol visibility for DECL to
22025 VISIBILITY_TYPE. */
22028 rs6000_assemble_visibility (tree decl
, int vis
)
22033 /* Functions need to have their entry point symbol visibility set as
22034 well as their descriptor symbol visibility. */
22035 if (DEFAULT_ABI
== ABI_AIX
22037 && TREE_CODE (decl
) == FUNCTION_DECL
)
22039 static const char * const visibility_types
[] = {
22040 NULL
, "protected", "hidden", "internal"
22043 const char *name
, *type
;
22045 name
= ((* targetm
.strip_name_encoding
)
22046 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
22047 type
= visibility_types
[vis
];
22049 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
22050 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
22053 default_assemble_visibility (decl
, vis
);
22058 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
22060 /* Reversal of FP compares takes care -- an ordered compare
22061 becomes an unordered compare and vice versa. */
22062 if (mode
== CCFPmode
22063 && (!flag_finite_math_only
22064 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
22065 || code
== UNEQ
|| code
== LTGT
))
22066 return reverse_condition_maybe_unordered (code
);
22068 return reverse_condition (code
);
22071 /* Generate a compare for CODE. Return a brand-new rtx that
22072 represents the result of the compare. */
22075 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
22077 machine_mode comp_mode
;
22078 rtx compare_result
;
22079 enum rtx_code code
= GET_CODE (cmp
);
22080 rtx op0
= XEXP (cmp
, 0);
22081 rtx op1
= XEXP (cmp
, 1);
22083 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
22084 comp_mode
= CCmode
;
22085 else if (FLOAT_MODE_P (mode
))
22086 comp_mode
= CCFPmode
;
22087 else if (code
== GTU
|| code
== LTU
22088 || code
== GEU
|| code
== LEU
)
22089 comp_mode
= CCUNSmode
;
22090 else if ((code
== EQ
|| code
== NE
)
22091 && unsigned_reg_p (op0
)
22092 && (unsigned_reg_p (op1
)
22093 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
22094 /* These are unsigned values, perhaps there will be a later
22095 ordering compare that can be shared with this one. */
22096 comp_mode
= CCUNSmode
;
22098 comp_mode
= CCmode
;
22100 /* If we have an unsigned compare, make sure we don't have a signed value as
22102 if (comp_mode
== CCUNSmode
&& GET_CODE (op1
) == CONST_INT
22103 && INTVAL (op1
) < 0)
22105 op0
= copy_rtx_if_shared (op0
);
22106 op1
= force_reg (GET_MODE (op0
), op1
);
22107 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
22110 /* First, the compare. */
22111 compare_result
= gen_reg_rtx (comp_mode
);
22113 /* IEEE 128-bit support in VSX registers when we do not have hardware
22115 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
22117 rtx libfunc
= NULL_RTX
;
22118 bool check_nan
= false;
22125 libfunc
= optab_libfunc (eq_optab
, mode
);
22130 libfunc
= optab_libfunc (ge_optab
, mode
);
22135 libfunc
= optab_libfunc (le_optab
, mode
);
22140 libfunc
= optab_libfunc (unord_optab
, mode
);
22141 code
= (code
== UNORDERED
) ? NE
: EQ
;
22147 libfunc
= optab_libfunc (ge_optab
, mode
);
22148 code
= (code
== UNGE
) ? GE
: GT
;
22154 libfunc
= optab_libfunc (le_optab
, mode
);
22155 code
= (code
== UNLE
) ? LE
: LT
;
22161 libfunc
= optab_libfunc (eq_optab
, mode
);
22162 code
= (code
= UNEQ
) ? EQ
: NE
;
22166 gcc_unreachable ();
22169 gcc_assert (libfunc
);
22172 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
22173 SImode
, 2, op0
, mode
, op1
, mode
);
22175 /* The library signals an exception for signalling NaNs, so we need to
22176 handle isgreater, etc. by first checking isordered. */
22179 rtx ne_rtx
, normal_dest
, unord_dest
;
22180 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
22181 rtx join_label
= gen_label_rtx ();
22182 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
22183 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
22186 /* Test for either value being a NaN. */
22187 gcc_assert (unord_func
);
22188 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
22189 SImode
, 2, op0
, mode
, op1
,
22192 /* Set value (0) if either value is a NaN, and jump to the join
22194 dest
= gen_reg_rtx (SImode
);
22195 emit_move_insn (dest
, const1_rtx
);
22196 emit_insn (gen_rtx_SET (unord_cmp
,
22197 gen_rtx_COMPARE (comp_mode
, unord_dest
,
22200 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
22201 emit_jump_insn (gen_rtx_SET (pc_rtx
,
22202 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
22206 /* Do the normal comparison, knowing that the values are not
22208 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
22209 SImode
, 2, op0
, mode
, op1
,
22212 emit_insn (gen_cstoresi4 (dest
,
22213 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
22215 normal_dest
, const0_rtx
));
22217 /* Join NaN and non-Nan paths. Compare dest against 0. */
22218 emit_label (join_label
);
22222 emit_insn (gen_rtx_SET (compare_result
,
22223 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
22228 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22229 CLOBBERs to match cmptf_internal2 pattern. */
22230 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
22231 && FLOAT128_IBM_P (GET_MODE (op0
))
22232 && TARGET_HARD_FLOAT
)
22233 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
22235 gen_rtx_SET (compare_result
,
22236 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
22237 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22238 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22239 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22240 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22241 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22242 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22243 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22244 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
22245 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
22246 else if (GET_CODE (op1
) == UNSPEC
22247 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
22249 rtx op1b
= XVECEXP (op1
, 0, 0);
22250 comp_mode
= CCEQmode
;
22251 compare_result
= gen_reg_rtx (CCEQmode
);
22253 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
22255 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
22258 emit_insn (gen_rtx_SET (compare_result
,
22259 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
22262 /* Some kinds of FP comparisons need an OR operation;
22263 under flag_finite_math_only we don't bother. */
22264 if (FLOAT_MODE_P (mode
)
22265 && (!FLOAT128_IEEE_P (mode
) || TARGET_FLOAT128_HW
)
22266 && !flag_finite_math_only
22267 && (code
== LE
|| code
== GE
22268 || code
== UNEQ
|| code
== LTGT
22269 || code
== UNGT
|| code
== UNLT
))
22271 enum rtx_code or1
, or2
;
22272 rtx or1_rtx
, or2_rtx
, compare2_rtx
;
22273 rtx or_result
= gen_reg_rtx (CCEQmode
);
22277 case LE
: or1
= LT
; or2
= EQ
; break;
22278 case GE
: or1
= GT
; or2
= EQ
; break;
22279 case UNEQ
: or1
= UNORDERED
; or2
= EQ
; break;
22280 case LTGT
: or1
= LT
; or2
= GT
; break;
22281 case UNGT
: or1
= UNORDERED
; or2
= GT
; break;
22282 case UNLT
: or1
= UNORDERED
; or2
= LT
; break;
22283 default: gcc_unreachable ();
22285 validate_condition_mode (or1
, comp_mode
);
22286 validate_condition_mode (or2
, comp_mode
);
22287 or1_rtx
= gen_rtx_fmt_ee (or1
, SImode
, compare_result
, const0_rtx
);
22288 or2_rtx
= gen_rtx_fmt_ee (or2
, SImode
, compare_result
, const0_rtx
);
22289 compare2_rtx
= gen_rtx_COMPARE (CCEQmode
,
22290 gen_rtx_IOR (SImode
, or1_rtx
, or2_rtx
),
22292 emit_insn (gen_rtx_SET (or_result
, compare2_rtx
));
22294 compare_result
= or_result
;
22298 validate_condition_mode (code
, GET_MODE (compare_result
));
22300 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
22304 /* Return the diagnostic message string if the binary operation OP is
22305 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22308 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
22312 machine_mode mode1
= TYPE_MODE (type1
);
22313 machine_mode mode2
= TYPE_MODE (type2
);
22315 /* For complex modes, use the inner type. */
22316 if (COMPLEX_MODE_P (mode1
))
22317 mode1
= GET_MODE_INNER (mode1
);
22319 if (COMPLEX_MODE_P (mode2
))
22320 mode2
= GET_MODE_INNER (mode2
);
22322 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22323 double to intermix unless -mfloat128-convert. */
22324 if (mode1
== mode2
)
22327 if (!TARGET_FLOAT128_CVT
)
22329 if ((mode1
== KFmode
&& mode2
== IFmode
)
22330 || (mode1
== IFmode
&& mode2
== KFmode
))
22331 return N_("__float128 and __ibm128 cannot be used in the same "
22334 if (TARGET_IEEEQUAD
22335 && ((mode1
== IFmode
&& mode2
== TFmode
)
22336 || (mode1
== TFmode
&& mode2
== IFmode
)))
22337 return N_("__ibm128 and long double cannot be used in the same "
22340 if (!TARGET_IEEEQUAD
22341 && ((mode1
== KFmode
&& mode2
== TFmode
)
22342 || (mode1
== TFmode
&& mode2
== KFmode
)))
22343 return N_("__float128 and long double cannot be used in the same "
22351 /* Expand floating point conversion to/from __float128 and __ibm128. */
22354 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
22356 machine_mode dest_mode
= GET_MODE (dest
);
22357 machine_mode src_mode
= GET_MODE (src
);
22358 convert_optab cvt
= unknown_optab
;
22359 bool do_move
= false;
22360 rtx libfunc
= NULL_RTX
;
22362 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
22363 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
22367 rtx_2func_t from_df
;
22368 rtx_2func_t from_sf
;
22369 rtx_2func_t from_si_sign
;
22370 rtx_2func_t from_si_uns
;
22371 rtx_2func_t from_di_sign
;
22372 rtx_2func_t from_di_uns
;
22375 rtx_2func_t to_si_sign
;
22376 rtx_2func_t to_si_uns
;
22377 rtx_2func_t to_di_sign
;
22378 rtx_2func_t to_di_uns
;
22379 } hw_conversions
[2] = {
22380 /* convertions to/from KFmode */
22382 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
22383 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
22384 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
22385 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
22386 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
22387 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
22388 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
22389 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
22390 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
22391 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
22392 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
22393 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
22396 /* convertions to/from TFmode */
22398 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
22399 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
22400 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
22401 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
22402 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
22403 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
22404 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
22405 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
22406 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
22407 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
22408 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
22409 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
22413 if (dest_mode
== src_mode
)
22414 gcc_unreachable ();
22416 /* Eliminate memory operations. */
22418 src
= force_reg (src_mode
, src
);
22422 rtx tmp
= gen_reg_rtx (dest_mode
);
22423 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
22424 rs6000_emit_move (dest
, tmp
, dest_mode
);
22428 /* Convert to IEEE 128-bit floating point. */
22429 if (FLOAT128_IEEE_P (dest_mode
))
22431 if (dest_mode
== KFmode
)
22433 else if (dest_mode
== TFmode
)
22436 gcc_unreachable ();
22442 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
22447 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
22453 if (FLOAT128_IBM_P (src_mode
))
22462 cvt
= ufloat_optab
;
22463 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
22467 cvt
= sfloat_optab
;
22468 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
22475 cvt
= ufloat_optab
;
22476 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
22480 cvt
= sfloat_optab
;
22481 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
22486 gcc_unreachable ();
22490 /* Convert from IEEE 128-bit floating point. */
22491 else if (FLOAT128_IEEE_P (src_mode
))
22493 if (src_mode
== KFmode
)
22495 else if (src_mode
== TFmode
)
22498 gcc_unreachable ();
22504 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
22509 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
22515 if (FLOAT128_IBM_P (dest_mode
))
22525 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
22530 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
22538 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
22543 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
22548 gcc_unreachable ();
22552 /* Both IBM format. */
22553 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
22557 gcc_unreachable ();
22559 /* Handle conversion between TFmode/KFmode. */
22561 emit_move_insn (dest
, gen_lowpart (dest_mode
, src
));
22563 /* Handle conversion if we have hardware support. */
22564 else if (TARGET_FLOAT128_HW
&& hw_convert
)
22565 emit_insn ((hw_convert
) (dest
, src
));
22567 /* Call an external function to do the conversion. */
22568 else if (cvt
!= unknown_optab
)
22570 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
22571 gcc_assert (libfunc
!= NULL_RTX
);
22573 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
, 1, src
,
22576 gcc_assert (dest2
!= NULL_RTX
);
22577 if (!rtx_equal_p (dest
, dest2
))
22578 emit_move_insn (dest
, dest2
);
22582 gcc_unreachable ();
22588 /* Emit the RTL for an sISEL pattern. */
22591 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED
, rtx operands
[])
22593 rs6000_emit_int_cmove (operands
[0], operands
[1], const1_rtx
, const0_rtx
);
22596 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22597 can be used as that dest register. Return the dest register. */
22600 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
22602 if (op2
== const0_rtx
)
22605 if (GET_CODE (scratch
) == SCRATCH
)
22606 scratch
= gen_reg_rtx (mode
);
22608 if (logical_operand (op2
, mode
))
22609 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
22611 emit_insn (gen_rtx_SET (scratch
,
22612 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
22618 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
22621 machine_mode op_mode
;
22622 enum rtx_code cond_code
;
22623 rtx result
= operands
[0];
22625 condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
22626 cond_code
= GET_CODE (condition_rtx
);
22628 if (cond_code
== NE
22629 || cond_code
== GE
|| cond_code
== LE
22630 || cond_code
== GEU
|| cond_code
== LEU
22631 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
22633 rtx not_result
= gen_reg_rtx (CCEQmode
);
22634 rtx not_op
, rev_cond_rtx
;
22635 machine_mode cc_mode
;
22637 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
22639 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
22640 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
22641 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
22642 emit_insn (gen_rtx_SET (not_result
, not_op
));
22643 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
22646 op_mode
= GET_MODE (XEXP (operands
[1], 0));
22647 if (op_mode
== VOIDmode
)
22648 op_mode
= GET_MODE (XEXP (operands
[1], 1));
22650 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
22652 PUT_MODE (condition_rtx
, DImode
);
22653 convert_move (result
, condition_rtx
, 0);
22657 PUT_MODE (condition_rtx
, SImode
);
22658 emit_insn (gen_rtx_SET (result
, condition_rtx
));
22662 /* Emit a branch of kind CODE to location LOC. */
22665 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
22667 rtx condition_rtx
, loc_ref
;
22669 condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
22670 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
22671 emit_jump_insn (gen_rtx_SET (pc_rtx
,
22672 gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
,
22673 loc_ref
, pc_rtx
)));
22676 /* Return the string to output a conditional branch to LABEL, which is
22677 the operand template of the label, or NULL if the branch is really a
22678 conditional return.
22680 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22681 condition code register and its mode specifies what kind of
22682 comparison we made.
22684 REVERSED is nonzero if we should reverse the sense of the comparison.
22686 INSN is the insn. */
22689 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
22691 static char string
[64];
22692 enum rtx_code code
= GET_CODE (op
);
22693 rtx cc_reg
= XEXP (op
, 0);
22694 machine_mode mode
= GET_MODE (cc_reg
);
22695 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
22696 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
22697 int really_reversed
= reversed
^ need_longbranch
;
22703 validate_condition_mode (code
, mode
);
22705 /* Work out which way this really branches. We could use
22706 reverse_condition_maybe_unordered here always but this
22707 makes the resulting assembler clearer. */
22708 if (really_reversed
)
22710 /* Reversal of FP compares takes care -- an ordered compare
22711 becomes an unordered compare and vice versa. */
22712 if (mode
== CCFPmode
)
22713 code
= reverse_condition_maybe_unordered (code
);
22715 code
= reverse_condition (code
);
22720 /* Not all of these are actually distinct opcodes, but
22721 we distinguish them for clarity of the resulting assembler. */
22722 case NE
: case LTGT
:
22723 ccode
= "ne"; break;
22724 case EQ
: case UNEQ
:
22725 ccode
= "eq"; break;
22727 ccode
= "ge"; break;
22728 case GT
: case GTU
: case UNGT
:
22729 ccode
= "gt"; break;
22731 ccode
= "le"; break;
22732 case LT
: case LTU
: case UNLT
:
22733 ccode
= "lt"; break;
22734 case UNORDERED
: ccode
= "un"; break;
22735 case ORDERED
: ccode
= "nu"; break;
22736 case UNGE
: ccode
= "nl"; break;
22737 case UNLE
: ccode
= "ng"; break;
22739 gcc_unreachable ();
22742 /* Maybe we have a guess as to how likely the branch is. */
22744 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
22745 if (note
!= NULL_RTX
)
22747 /* PROB is the difference from 50%. */
22748 int prob
= XINT (note
, 0) - REG_BR_PROB_BASE
/ 2;
22750 /* Only hint for highly probable/improbable branches on newer cpus when
22751 we have real profile data, as static prediction overrides processor
22752 dynamic prediction. For older cpus we may as well always hint, but
22753 assume not taken for branches that are very close to 50% as a
22754 mispredicted taken branch is more expensive than a
22755 mispredicted not-taken branch. */
22756 if (rs6000_always_hint
22757 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
22758 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
22759 && br_prob_note_reliable_p (note
)))
22761 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
22762 && ((prob
> 0) ^ need_longbranch
))
22770 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
22772 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
22774 /* We need to escape any '%' characters in the reg_names string.
22775 Assume they'd only be the first character.... */
22776 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
22778 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
22782 /* If the branch distance was too far, we may have to use an
22783 unconditional branch to go the distance. */
22784 if (need_longbranch
)
22785 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
22787 s
+= sprintf (s
, ",%s", label
);
22793 /* Return insn for VSX or Altivec comparisons. */
22796 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
22799 machine_mode mode
= GET_MODE (op0
);
22807 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22818 mask
= gen_reg_rtx (mode
);
22819 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
22826 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22827 DMODE is expected destination mode. This is a recursive function. */
22830 rs6000_emit_vector_compare (enum rtx_code rcode
,
22832 machine_mode dmode
)
22835 bool swap_operands
= false;
22836 bool try_again
= false;
22838 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
22839 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
22841 /* See if the comparison works as is. */
22842 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
22850 swap_operands
= true;
22855 swap_operands
= true;
22863 /* Invert condition and try again.
22864 e.g., A != B becomes ~(A==B). */
22866 enum rtx_code rev_code
;
22867 enum insn_code nor_code
;
22870 rev_code
= reverse_condition_maybe_unordered (rcode
);
22871 if (rev_code
== UNKNOWN
)
22874 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
22875 if (nor_code
== CODE_FOR_nothing
)
22878 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
22882 mask
= gen_reg_rtx (dmode
);
22883 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
22891 /* Try GT/GTU/LT/LTU OR EQ */
22894 enum insn_code ior_code
;
22895 enum rtx_code new_code
;
22916 gcc_unreachable ();
22919 ior_code
= optab_handler (ior_optab
, dmode
);
22920 if (ior_code
== CODE_FOR_nothing
)
22923 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
22927 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
22931 mask
= gen_reg_rtx (dmode
);
22932 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
22943 std::swap (op0
, op1
);
22945 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
22950 /* You only get two chances. */
22954 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22955 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22956 operands for the relation operation COND. */
22959 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
22960 rtx cond
, rtx cc_op0
, rtx cc_op1
)
22962 machine_mode dest_mode
= GET_MODE (dest
);
22963 machine_mode mask_mode
= GET_MODE (cc_op0
);
22964 enum rtx_code rcode
= GET_CODE (cond
);
22965 machine_mode cc_mode
= CCmode
;
22968 bool invert_move
= false;
22970 if (VECTOR_UNIT_NONE_P (dest_mode
))
22973 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
22974 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
22978 /* Swap operands if we can, and fall back to doing the operation as
22979 specified, and doing a NOR to invert the test. */
22985 /* Invert condition and try again.
22986 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22987 invert_move
= true;
22988 rcode
= reverse_condition_maybe_unordered (rcode
);
22989 if (rcode
== UNKNOWN
)
22995 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
22997 /* Invert condition to avoid compound test. */
22998 invert_move
= true;
22999 rcode
= reverse_condition (rcode
);
23007 /* Mark unsigned tests with CCUNSmode. */
23008 cc_mode
= CCUNSmode
;
23010 /* Invert condition to avoid compound test if necessary. */
23011 if (rcode
== GEU
|| rcode
== LEU
)
23013 invert_move
= true;
23014 rcode
= reverse_condition (rcode
);
23022 /* Get the vector mask for the given relational operations. */
23023 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
23029 std::swap (op_true
, op_false
);
23031 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
23032 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
23033 && (GET_CODE (op_true
) == CONST_VECTOR
23034 || GET_CODE (op_false
) == CONST_VECTOR
))
23036 rtx constant_0
= CONST0_RTX (dest_mode
);
23037 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
23039 if (op_true
== constant_m1
&& op_false
== constant_0
)
23041 emit_move_insn (dest
, mask
);
23045 else if (op_true
== constant_0
&& op_false
== constant_m1
)
23047 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
23051 /* If we can't use the vector comparison directly, perhaps we can use
23052 the mask for the true or false fields, instead of loading up a
23054 if (op_true
== constant_m1
)
23057 if (op_false
== constant_0
)
23061 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
23062 op_true
= force_reg (dest_mode
, op_true
);
23064 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
23065 op_false
= force_reg (dest_mode
, op_false
);
23067 cond2
= gen_rtx_fmt_ee (NE
, cc_mode
, gen_lowpart (dest_mode
, mask
),
23068 CONST0_RTX (dest_mode
));
23069 emit_insn (gen_rtx_SET (dest
,
23070 gen_rtx_IF_THEN_ELSE (dest_mode
,
23077 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
23078 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
23079 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
23080 hardware has no such operation. */
23083 rs6000_emit_p9_fp_minmax (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
23085 enum rtx_code code
= GET_CODE (op
);
23086 rtx op0
= XEXP (op
, 0);
23087 rtx op1
= XEXP (op
, 1);
23088 machine_mode compare_mode
= GET_MODE (op0
);
23089 machine_mode result_mode
= GET_MODE (dest
);
23090 bool max_p
= false;
23092 if (result_mode
!= compare_mode
)
23095 if (code
== GE
|| code
== GT
)
23097 else if (code
== LE
|| code
== LT
)
23102 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
23105 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
))
23111 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
23115 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23116 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23117 operands of the last comparison is nonzero/true, FALSE_COND if it is
23118 zero/false. Return 0 if the hardware has no such operation. */
23121 rs6000_emit_p9_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
23123 enum rtx_code code
= GET_CODE (op
);
23124 rtx op0
= XEXP (op
, 0);
23125 rtx op1
= XEXP (op
, 1);
23126 machine_mode result_mode
= GET_MODE (dest
);
23131 if (!can_create_pseudo_p ())
23144 code
= swap_condition (code
);
23145 std::swap (op0
, op1
);
23152 /* Generate: [(parallel [(set (dest)
23153 (if_then_else (op (cmp1) (cmp2))
23156 (clobber (scratch))])]. */
23158 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
23159 cmove_rtx
= gen_rtx_SET (dest
,
23160 gen_rtx_IF_THEN_ELSE (result_mode
,
23165 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
23166 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
23167 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
23172 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23173 operands of the last comparison is nonzero/true, FALSE_COND if it
23174 is zero/false. Return 0 if the hardware has no such operation. */
23177 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
23179 enum rtx_code code
= GET_CODE (op
);
23180 rtx op0
= XEXP (op
, 0);
23181 rtx op1
= XEXP (op
, 1);
23182 machine_mode compare_mode
= GET_MODE (op0
);
23183 machine_mode result_mode
= GET_MODE (dest
);
23185 bool is_against_zero
;
23187 /* These modes should always match. */
23188 if (GET_MODE (op1
) != compare_mode
23189 /* In the isel case however, we can use a compare immediate, so
23190 op1 may be a small constant. */
23191 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
23193 if (GET_MODE (true_cond
) != result_mode
)
23195 if (GET_MODE (false_cond
) != result_mode
)
23198 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23199 if (TARGET_P9_MINMAX
23200 && (compare_mode
== SFmode
|| compare_mode
== DFmode
)
23201 && (result_mode
== SFmode
|| result_mode
== DFmode
))
23203 if (rs6000_emit_p9_fp_minmax (dest
, op
, true_cond
, false_cond
))
23206 if (rs6000_emit_p9_fp_cmove (dest
, op
, true_cond
, false_cond
))
23210 /* Don't allow using floating point comparisons for integer results for
23212 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
23215 /* First, work out if the hardware can do this at all, or
23216 if it's too slow.... */
23217 if (!FLOAT_MODE_P (compare_mode
))
23220 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
23224 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
23226 /* A floating-point subtract might overflow, underflow, or produce
23227 an inexact result, thus changing the floating-point flags, so it
23228 can't be generated if we care about that. It's safe if one side
23229 of the construct is zero, since then no subtract will be
23231 if (SCALAR_FLOAT_MODE_P (compare_mode
)
23232 && flag_trapping_math
&& ! is_against_zero
)
23235 /* Eliminate half of the comparisons by switching operands, this
23236 makes the remaining code simpler. */
23237 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
23238 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
23240 code
= reverse_condition_maybe_unordered (code
);
23242 true_cond
= false_cond
;
23246 /* UNEQ and LTGT take four instructions for a comparison with zero,
23247 it'll probably be faster to use a branch here too. */
23248 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
23251 /* We're going to try to implement comparisons by performing
23252 a subtract, then comparing against zero. Unfortunately,
23253 Inf - Inf is NaN which is not zero, and so if we don't
23254 know that the operand is finite and the comparison
23255 would treat EQ different to UNORDERED, we can't do it. */
23256 if (HONOR_INFINITIES (compare_mode
)
23257 && code
!= GT
&& code
!= UNGE
23258 && (GET_CODE (op1
) != CONST_DOUBLE
23259 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
23260 /* Constructs of the form (a OP b ? a : b) are safe. */
23261 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
23262 || (! rtx_equal_p (op0
, true_cond
)
23263 && ! rtx_equal_p (op1
, true_cond
))))
23266 /* At this point we know we can use fsel. */
23268 /* Reduce the comparison to a comparison against zero. */
23269 if (! is_against_zero
)
23271 temp
= gen_reg_rtx (compare_mode
);
23272 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
23274 op1
= CONST0_RTX (compare_mode
);
23277 /* If we don't care about NaNs we can reduce some of the comparisons
23278 down to faster ones. */
23279 if (! HONOR_NANS (compare_mode
))
23285 true_cond
= false_cond
;
23298 /* Now, reduce everything down to a GE. */
23305 temp
= gen_reg_rtx (compare_mode
);
23306 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
23311 temp
= gen_reg_rtx (compare_mode
);
23312 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
23317 temp
= gen_reg_rtx (compare_mode
);
23318 emit_insn (gen_rtx_SET (temp
,
23319 gen_rtx_NEG (compare_mode
,
23320 gen_rtx_ABS (compare_mode
, op0
))));
23325 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23326 temp
= gen_reg_rtx (result_mode
);
23327 emit_insn (gen_rtx_SET (temp
,
23328 gen_rtx_IF_THEN_ELSE (result_mode
,
23329 gen_rtx_GE (VOIDmode
,
23331 true_cond
, false_cond
)));
23332 false_cond
= true_cond
;
23335 temp
= gen_reg_rtx (compare_mode
);
23336 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
23341 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23342 temp
= gen_reg_rtx (result_mode
);
23343 emit_insn (gen_rtx_SET (temp
,
23344 gen_rtx_IF_THEN_ELSE (result_mode
,
23345 gen_rtx_GE (VOIDmode
,
23347 true_cond
, false_cond
)));
23348 true_cond
= false_cond
;
23351 temp
= gen_reg_rtx (compare_mode
);
23352 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
23357 gcc_unreachable ();
23360 emit_insn (gen_rtx_SET (dest
,
23361 gen_rtx_IF_THEN_ELSE (result_mode
,
23362 gen_rtx_GE (VOIDmode
,
23364 true_cond
, false_cond
)));
23368 /* Same as above, but for ints (isel). */
23371 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
23373 rtx condition_rtx
, cr
;
23374 machine_mode mode
= GET_MODE (dest
);
23375 enum rtx_code cond_code
;
23376 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
23379 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
23382 /* We still have to do the compare, because isel doesn't do a
23383 compare, it just looks at the CRx bits set by a previous compare
23385 condition_rtx
= rs6000_generate_compare (op
, mode
);
23386 cond_code
= GET_CODE (condition_rtx
);
23387 cr
= XEXP (condition_rtx
, 0);
23388 signedp
= GET_MODE (cr
) == CCmode
;
23390 isel_func
= (mode
== SImode
23391 ? (signedp
? gen_isel_signed_si
: gen_isel_unsigned_si
)
23392 : (signedp
? gen_isel_signed_di
: gen_isel_unsigned_di
));
23396 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
23397 /* isel handles these directly. */
23401 /* We need to swap the sense of the comparison. */
23403 std::swap (false_cond
, true_cond
);
23404 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
23409 false_cond
= force_reg (mode
, false_cond
);
23410 if (true_cond
!= const0_rtx
)
23411 true_cond
= force_reg (mode
, true_cond
);
23413 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
23419 output_isel (rtx
*operands
)
23421 enum rtx_code code
;
23423 code
= GET_CODE (operands
[1]);
23425 if (code
== GE
|| code
== GEU
|| code
== LE
|| code
== LEU
|| code
== NE
)
23427 gcc_assert (GET_CODE (operands
[2]) == REG
23428 && GET_CODE (operands
[3]) == REG
);
23429 PUT_CODE (operands
[1], reverse_condition (code
));
23430 return "isel %0,%3,%2,%j1";
23433 return "isel %0,%2,%3,%j1";
23437 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
23439 machine_mode mode
= GET_MODE (op0
);
23443 /* VSX/altivec have direct min/max insns. */
23444 if ((code
== SMAX
|| code
== SMIN
)
23445 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
23446 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))))
23448 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
23452 if (code
== SMAX
|| code
== SMIN
)
23457 if (code
== SMAX
|| code
== UMAX
)
23458 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
23459 op0
, op1
, mode
, 0);
23461 target
= emit_conditional_move (dest
, c
, op0
, op1
, mode
,
23462 op1
, op0
, mode
, 0);
23463 gcc_assert (target
);
23464 if (target
!= dest
)
23465 emit_move_insn (dest
, target
);
23468 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23469 for the value to come from memory or if it is already loaded into a GPR. */
23472 rs6000_split_signbit (rtx dest
, rtx src
)
23474 machine_mode d_mode
= GET_MODE (dest
);
23475 machine_mode s_mode
= GET_MODE (src
);
23476 rtx dest_di
= (d_mode
== DImode
) ? dest
: gen_lowpart (DImode
, dest
);
23477 rtx shift_reg
= dest_di
;
23479 gcc_assert (FLOAT128_IEEE_P (s_mode
) && TARGET_POWERPC64
);
23483 rtx mem
= (WORDS_BIG_ENDIAN
23484 ? adjust_address (src
, DImode
, 0)
23485 : adjust_address (src
, DImode
, 8));
23486 emit_insn (gen_rtx_SET (dest_di
, mem
));
23491 unsigned int r
= reg_or_subregno (src
);
23493 if (INT_REGNO_P (r
))
23494 shift_reg
= gen_rtx_REG (DImode
, r
+ (BYTES_BIG_ENDIAN
== 0));
23498 /* Generate the special mfvsrd instruction to get it in a GPR. */
23499 gcc_assert (VSX_REGNO_P (r
));
23500 if (s_mode
== KFmode
)
23501 emit_insn (gen_signbitkf2_dm2 (dest_di
, src
));
23503 emit_insn (gen_signbittf2_dm2 (dest_di
, src
));
23507 emit_insn (gen_lshrdi3 (dest_di
, shift_reg
, GEN_INT (63)));
23511 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23512 COND is true. Mark the jump as unlikely to be taken. */
23515 emit_unlikely_jump (rtx cond
, rtx label
)
23517 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
23518 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
23519 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
23520 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
23523 /* A subroutine of the atomic operation splitters. Emit a load-locked
23524 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23525 the zero_extend operation. */
23528 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
23530 rtx (*fn
) (rtx
, rtx
) = NULL
;
23535 fn
= gen_load_lockedqi
;
23538 fn
= gen_load_lockedhi
;
23541 if (GET_MODE (mem
) == QImode
)
23542 fn
= gen_load_lockedqi_si
;
23543 else if (GET_MODE (mem
) == HImode
)
23544 fn
= gen_load_lockedhi_si
;
23546 fn
= gen_load_lockedsi
;
23549 fn
= gen_load_lockeddi
;
23552 fn
= gen_load_lockedti
;
23555 gcc_unreachable ();
23557 emit_insn (fn (reg
, mem
));
23560 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23561 instruction in MODE. */
23564 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
23566 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
23571 fn
= gen_store_conditionalqi
;
23574 fn
= gen_store_conditionalhi
;
23577 fn
= gen_store_conditionalsi
;
23580 fn
= gen_store_conditionaldi
;
23583 fn
= gen_store_conditionalti
;
23586 gcc_unreachable ();
23589 /* Emit sync before stwcx. to address PPC405 Erratum. */
23590 if (PPC405_ERRATUM77
)
23591 emit_insn (gen_hwsync ());
23593 emit_insn (fn (res
, mem
, val
));
23596 /* Expand barriers before and after a load_locked/store_cond sequence. */
23599 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
23601 rtx addr
= XEXP (mem
, 0);
23602 int strict_p
= (reload_in_progress
|| reload_completed
);
23604 if (!legitimate_indirect_address_p (addr
, strict_p
)
23605 && !legitimate_indexed_address_p (addr
, strict_p
))
23607 addr
= force_reg (Pmode
, addr
);
23608 mem
= replace_equiv_address_nv (mem
, addr
);
23613 case MEMMODEL_RELAXED
:
23614 case MEMMODEL_CONSUME
:
23615 case MEMMODEL_ACQUIRE
:
23617 case MEMMODEL_RELEASE
:
23618 case MEMMODEL_ACQ_REL
:
23619 emit_insn (gen_lwsync ());
23621 case MEMMODEL_SEQ_CST
:
23622 emit_insn (gen_hwsync ());
23625 gcc_unreachable ();
23631 rs6000_post_atomic_barrier (enum memmodel model
)
23635 case MEMMODEL_RELAXED
:
23636 case MEMMODEL_CONSUME
:
23637 case MEMMODEL_RELEASE
:
23639 case MEMMODEL_ACQUIRE
:
23640 case MEMMODEL_ACQ_REL
:
23641 case MEMMODEL_SEQ_CST
:
23642 emit_insn (gen_isync ());
23645 gcc_unreachable ();
23649 /* A subroutine of the various atomic expanders. For sub-word operations,
23650 we must adjust things to operate on SImode. Given the original MEM,
23651 return a new aligned memory. Also build and return the quantities by
23652 which to shift and mask. */
23655 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
23657 rtx addr
, align
, shift
, mask
, mem
;
23658 HOST_WIDE_INT shift_mask
;
23659 machine_mode mode
= GET_MODE (orig_mem
);
23661 /* For smaller modes, we have to implement this via SImode. */
23662 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
23664 addr
= XEXP (orig_mem
, 0);
23665 addr
= force_reg (GET_MODE (addr
), addr
);
23667 /* Aligned memory containing subword. Generate a new memory. We
23668 do not want any of the existing MEM_ATTR data, as we're now
23669 accessing memory outside the original object. */
23670 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
23671 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23672 mem
= gen_rtx_MEM (SImode
, align
);
23673 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
23674 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
23675 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
23677 /* Shift amount for subword relative to aligned word. */
23678 shift
= gen_reg_rtx (SImode
);
23679 addr
= gen_lowpart (SImode
, addr
);
23680 rtx tmp
= gen_reg_rtx (SImode
);
23681 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
23682 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
23683 if (BYTES_BIG_ENDIAN
)
23684 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
23685 shift
, 1, OPTAB_LIB_WIDEN
);
23688 /* Mask for insertion. */
23689 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
23690 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23696 /* A subroutine of the various atomic expanders. For sub-word operands,
23697 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23700 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
23704 x
= gen_reg_rtx (SImode
);
23705 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
23706 gen_rtx_NOT (SImode
, mask
),
23709 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
23714 /* A subroutine of the various atomic expanders. For sub-word operands,
23715 extract WIDE to NARROW via SHIFT. */
23718 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
23720 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
23721 wide
, 1, OPTAB_LIB_WIDEN
);
23722 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
23725 /* Expand an atomic compare and swap operation. */
23728 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
23730 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
23731 rtx label1
, label2
, x
, mask
, shift
;
23732 machine_mode mode
, orig_mode
;
23733 enum memmodel mod_s
, mod_f
;
23736 boolval
= operands
[0];
23737 retval
= operands
[1];
23739 oldval
= operands
[3];
23740 newval
= operands
[4];
23741 is_weak
= (INTVAL (operands
[5]) != 0);
23742 mod_s
= memmodel_base (INTVAL (operands
[6]));
23743 mod_f
= memmodel_base (INTVAL (operands
[7]));
23744 orig_mode
= mode
= GET_MODE (mem
);
23746 mask
= shift
= NULL_RTX
;
23747 if (mode
== QImode
|| mode
== HImode
)
23749 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23750 lwarx and shift/mask operations. With power8, we need to do the
23751 comparison in SImode, but the store is still done in QI/HImode. */
23752 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
23754 if (!TARGET_SYNC_HI_QI
)
23756 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
23758 /* Shift and mask OLDVAL into position with the word. */
23759 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
23760 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23762 /* Shift and mask NEWVAL into position within the word. */
23763 newval
= convert_modes (SImode
, mode
, newval
, 1);
23764 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
23765 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23768 /* Prepare to adjust the return value. */
23769 retval
= gen_reg_rtx (SImode
);
23772 else if (reg_overlap_mentioned_p (retval
, oldval
))
23773 oldval
= copy_to_reg (oldval
);
23775 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
23776 oldval
= copy_to_mode_reg (mode
, oldval
);
23778 if (reg_overlap_mentioned_p (retval
, newval
))
23779 newval
= copy_to_reg (newval
);
23781 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
23786 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
23787 emit_label (XEXP (label1
, 0));
23789 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
23791 emit_load_locked (mode
, retval
, mem
);
23795 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
23796 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23798 cond
= gen_reg_rtx (CCmode
);
23799 /* If we have TImode, synthesize a comparison. */
23800 if (mode
!= TImode
)
23801 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
23804 rtx xor1_result
= gen_reg_rtx (DImode
);
23805 rtx xor2_result
= gen_reg_rtx (DImode
);
23806 rtx or_result
= gen_reg_rtx (DImode
);
23807 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
23808 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
23809 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
23810 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
23812 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
23813 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
23814 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
23815 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
23818 emit_insn (gen_rtx_SET (cond
, x
));
23820 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
23821 emit_unlikely_jump (x
, label2
);
23825 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
23827 emit_store_conditional (orig_mode
, cond
, mem
, x
);
23831 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
23832 emit_unlikely_jump (x
, label1
);
23835 if (!is_mm_relaxed (mod_f
))
23836 emit_label (XEXP (label2
, 0));
23838 rs6000_post_atomic_barrier (mod_s
);
23840 if (is_mm_relaxed (mod_f
))
23841 emit_label (XEXP (label2
, 0));
23844 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
23845 else if (mode
!= GET_MODE (operands
[1]))
23846 convert_move (operands
[1], retval
, 1);
23848 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23849 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
23850 emit_insn (gen_rtx_SET (boolval
, x
));
23853 /* Expand an atomic exchange operation. */
23856 rs6000_expand_atomic_exchange (rtx operands
[])
23858 rtx retval
, mem
, val
, cond
;
23860 enum memmodel model
;
23861 rtx label
, x
, mask
, shift
;
23863 retval
= operands
[0];
23866 model
= memmodel_base (INTVAL (operands
[3]));
23867 mode
= GET_MODE (mem
);
23869 mask
= shift
= NULL_RTX
;
23870 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
23872 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
23874 /* Shift and mask VAL into position with the word. */
23875 val
= convert_modes (SImode
, mode
, val
, 1);
23876 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
23877 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23879 /* Prepare to adjust the return value. */
23880 retval
= gen_reg_rtx (SImode
);
23884 mem
= rs6000_pre_atomic_barrier (mem
, model
);
23886 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
23887 emit_label (XEXP (label
, 0));
23889 emit_load_locked (mode
, retval
, mem
);
23893 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
23895 cond
= gen_reg_rtx (CCmode
);
23896 emit_store_conditional (mode
, cond
, mem
, x
);
23898 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
23899 emit_unlikely_jump (x
, label
);
23901 rs6000_post_atomic_barrier (model
);
23904 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
23907 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23908 to perform. MEM is the memory on which to operate. VAL is the second
23909 operand of the binary operator. BEFORE and AFTER are optional locations to
23910 return the value of MEM either before of after the operation. MODEL_RTX
23911 is a CONST_INT containing the memory model to use. */
23914 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
23915 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
23917 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
23918 machine_mode mode
= GET_MODE (mem
);
23919 machine_mode store_mode
= mode
;
23920 rtx label
, x
, cond
, mask
, shift
;
23921 rtx before
= orig_before
, after
= orig_after
;
23923 mask
= shift
= NULL_RTX
;
23924 /* On power8, we want to use SImode for the operation. On previous systems,
23925 use the operation in a subword and shift/mask to get the proper byte or
23927 if (mode
== QImode
|| mode
== HImode
)
23929 if (TARGET_SYNC_HI_QI
)
23931 val
= convert_modes (SImode
, mode
, val
, 1);
23933 /* Prepare to adjust the return value. */
23934 before
= gen_reg_rtx (SImode
);
23936 after
= gen_reg_rtx (SImode
);
23941 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
23943 /* Shift and mask VAL into position with the word. */
23944 val
= convert_modes (SImode
, mode
, val
, 1);
23945 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
23946 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
23952 /* We've already zero-extended VAL. That is sufficient to
23953 make certain that it does not affect other bits. */
23958 /* If we make certain that all of the other bits in VAL are
23959 set, that will be sufficient to not affect other bits. */
23960 x
= gen_rtx_NOT (SImode
, mask
);
23961 x
= gen_rtx_IOR (SImode
, x
, val
);
23962 emit_insn (gen_rtx_SET (val
, x
));
23969 /* These will all affect bits outside the field and need
23970 adjustment via MASK within the loop. */
23974 gcc_unreachable ();
23977 /* Prepare to adjust the return value. */
23978 before
= gen_reg_rtx (SImode
);
23980 after
= gen_reg_rtx (SImode
);
23981 store_mode
= mode
= SImode
;
23985 mem
= rs6000_pre_atomic_barrier (mem
, model
);
23987 label
= gen_label_rtx ();
23988 emit_label (label
);
23989 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
23991 if (before
== NULL_RTX
)
23992 before
= gen_reg_rtx (mode
);
23994 emit_load_locked (mode
, before
, mem
);
23998 x
= expand_simple_binop (mode
, AND
, before
, val
,
23999 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
24000 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
24004 after
= expand_simple_binop (mode
, code
, before
, val
,
24005 after
, 1, OPTAB_LIB_WIDEN
);
24011 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
24012 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
24013 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
24015 else if (store_mode
!= mode
)
24016 x
= convert_modes (store_mode
, mode
, x
, 1);
24018 cond
= gen_reg_rtx (CCmode
);
24019 emit_store_conditional (store_mode
, cond
, mem
, x
);
24021 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
24022 emit_unlikely_jump (x
, label
);
24024 rs6000_post_atomic_barrier (model
);
24028 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
24029 then do the calcuations in a SImode register. */
24031 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
24033 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
24035 else if (store_mode
!= mode
)
24037 /* QImode/HImode on machines with lbarx/lharx where we do the native
24038 operation and then do the calcuations in a SImode register. */
24040 convert_move (orig_before
, before
, 1);
24042 convert_move (orig_after
, after
, 1);
24044 else if (orig_after
&& after
!= orig_after
)
24045 emit_move_insn (orig_after
, after
);
24048 /* Emit instructions to move SRC to DST. Called by splitters for
24049 multi-register moves. It will emit at most one instruction for
24050 each register that is accessed; that is, it won't emit li/lis pairs
24051 (or equivalent for 64-bit code). One of SRC or DST must be a hard
24055 rs6000_split_multireg_move (rtx dst
, rtx src
)
24057 /* The register number of the first register being moved. */
24059 /* The mode that is to be moved. */
24061 /* The mode that the move is being done in, and its size. */
24062 machine_mode reg_mode
;
24064 /* The number of registers that will be moved. */
24067 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
24068 mode
= GET_MODE (dst
);
24069 nregs
= hard_regno_nregs
[reg
][mode
];
24070 if (FP_REGNO_P (reg
))
24071 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
24072 ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
) ? DFmode
: SFmode
);
24073 else if (ALTIVEC_REGNO_P (reg
))
24074 reg_mode
= V16QImode
;
24076 reg_mode
= word_mode
;
24077 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
24079 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
24081 /* TDmode residing in FP registers is special, since the ISA requires that
24082 the lower-numbered word of a register pair is always the most significant
24083 word, even in little-endian mode. This does not match the usual subreg
24084 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
24085 the appropriate constituent registers "by hand" in little-endian mode.
24087 Note we do not need to check for destructive overlap here since TDmode
24088 can only reside in even/odd register pairs. */
24089 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
24094 for (i
= 0; i
< nregs
; i
++)
24096 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
24097 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
24099 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
24100 i
* reg_mode_size
);
24102 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
24103 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
24105 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
24106 i
* reg_mode_size
);
24108 emit_insn (gen_rtx_SET (p_dst
, p_src
));
24114 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
24116 /* Move register range backwards, if we might have destructive
24119 for (i
= nregs
- 1; i
>= 0; i
--)
24120 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
24121 i
* reg_mode_size
),
24122 simplify_gen_subreg (reg_mode
, src
, mode
,
24123 i
* reg_mode_size
)));
24129 bool used_update
= false;
24130 rtx restore_basereg
= NULL_RTX
;
24132 if (MEM_P (src
) && INT_REGNO_P (reg
))
24136 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
24137 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
24140 breg
= XEXP (XEXP (src
, 0), 0);
24141 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
24142 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
24143 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
24144 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
24145 src
= replace_equiv_address (src
, breg
);
24147 else if (! rs6000_offsettable_memref_p (src
, reg_mode
))
24149 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
24151 rtx basereg
= XEXP (XEXP (src
, 0), 0);
24154 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
24155 emit_insn (gen_rtx_SET (ndst
,
24156 gen_rtx_MEM (reg_mode
,
24158 used_update
= true;
24161 emit_insn (gen_rtx_SET (basereg
,
24162 XEXP (XEXP (src
, 0), 1)));
24163 src
= replace_equiv_address (src
, basereg
);
24167 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
24168 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
24169 src
= replace_equiv_address (src
, basereg
);
24173 breg
= XEXP (src
, 0);
24174 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
24175 breg
= XEXP (breg
, 0);
24177 /* If the base register we are using to address memory is
24178 also a destination reg, then change that register last. */
24180 && REGNO (breg
) >= REGNO (dst
)
24181 && REGNO (breg
) < REGNO (dst
) + nregs
)
24182 j
= REGNO (breg
) - REGNO (dst
);
24184 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
24188 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
24189 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
24192 breg
= XEXP (XEXP (dst
, 0), 0);
24193 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
24194 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
24195 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
24197 /* We have to update the breg before doing the store.
24198 Use store with update, if available. */
24202 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
24203 emit_insn (TARGET_32BIT
24204 ? (TARGET_POWERPC64
24205 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
24206 : gen_movsi_update (breg
, breg
, delta_rtx
, nsrc
))
24207 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
24208 used_update
= true;
24211 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
24212 dst
= replace_equiv_address (dst
, breg
);
24214 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
)
24215 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
24217 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
24219 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
24222 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
24223 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
24226 used_update
= true;
24229 emit_insn (gen_rtx_SET (basereg
,
24230 XEXP (XEXP (dst
, 0), 1)));
24231 dst
= replace_equiv_address (dst
, basereg
);
24235 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
24236 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
24237 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
24239 && REG_P (offsetreg
)
24240 && REGNO (basereg
) != REGNO (offsetreg
));
24241 if (REGNO (basereg
) == 0)
24243 rtx tmp
= offsetreg
;
24244 offsetreg
= basereg
;
24247 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
24248 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
24249 dst
= replace_equiv_address (dst
, basereg
);
24252 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
24253 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
));
24256 for (i
= 0; i
< nregs
; i
++)
24258 /* Calculate index to next subword. */
24263 /* If compiler already emitted move of first word by
24264 store with update, no need to do anything. */
24265 if (j
== 0 && used_update
)
24268 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
24269 j
* reg_mode_size
),
24270 simplify_gen_subreg (reg_mode
, src
, mode
,
24271 j
* reg_mode_size
)));
24273 if (restore_basereg
!= NULL_RTX
)
24274 emit_insn (restore_basereg
);
24279 /* This page contains routines that are used to determine what the
24280 function prologue and epilogue code will do and write them out. */
24285 return !call_used_regs
[r
] && df_regs_ever_live_p (r
);
24288 /* Determine whether the gp REG is really used. */
24291 rs6000_reg_live_or_pic_offset_p (int reg
)
24293 /* We need to mark the PIC offset register live for the same conditions
24294 as it is set up, or otherwise it won't be saved before we clobber it. */
24296 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
&& !TARGET_SINGLE_PIC_BASE
)
24298 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
24299 && (crtl
->calls_eh_return
24300 || df_regs_ever_live_p (reg
)
24301 || !constant_pool_empty_p ()))
24304 if ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
24309 /* If the function calls eh_return, claim used all the registers that would
24310 be checked for liveness otherwise. */
24312 return ((crtl
->calls_eh_return
|| df_regs_ever_live_p (reg
))
24313 && !call_used_regs
[reg
]);
24316 /* Return the first fixed-point register that is required to be
24317 saved. 32 if none. */
24320 first_reg_to_save (void)
24324 /* Find lowest numbered live register. */
24325 for (first_reg
= 13; first_reg
<= 31; first_reg
++)
24326 if (save_reg_p (first_reg
))
24329 if (first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
24330 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
!= 0)
24331 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
24332 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
))
24333 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
24334 first_reg
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
24338 && crtl
->uses_pic_offset_table
24339 && first_reg
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
24340 return RS6000_PIC_OFFSET_TABLE_REGNUM
;
24346 /* Similar, for FP regs. */
24349 first_fp_reg_to_save (void)
24353 /* Find lowest numbered live register. */
24354 for (first_reg
= 14 + 32; first_reg
<= 63; first_reg
++)
24355 if (save_reg_p (first_reg
))
24361 /* Similar, for AltiVec regs. */
24364 first_altivec_reg_to_save (void)
24368 /* Stack frame remains as is unless we are in AltiVec ABI. */
24369 if (! TARGET_ALTIVEC_ABI
)
24370 return LAST_ALTIVEC_REGNO
+ 1;
24372 /* On Darwin, the unwind routines are compiled without
24373 TARGET_ALTIVEC, and use save_world to save/restore the
24374 altivec registers when necessary. */
24375 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
24376 && ! TARGET_ALTIVEC
)
24377 return FIRST_ALTIVEC_REGNO
+ 20;
24379 /* Find lowest numbered live register. */
24380 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
<= LAST_ALTIVEC_REGNO
; ++i
)
24381 if (save_reg_p (i
))
24387 /* Return a 32-bit mask of the AltiVec registers we need to set in
24388 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24389 the 32-bit word is 0. */
24391 static unsigned int
24392 compute_vrsave_mask (void)
24394 unsigned int i
, mask
= 0;
24396 /* On Darwin, the unwind routines are compiled without
24397 TARGET_ALTIVEC, and use save_world to save/restore the
24398 call-saved altivec registers when necessary. */
24399 if (DEFAULT_ABI
== ABI_DARWIN
&& crtl
->calls_eh_return
24400 && ! TARGET_ALTIVEC
)
24403 /* First, find out if we use _any_ altivec registers. */
24404 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
24405 if (df_regs_ever_live_p (i
))
24406 mask
|= ALTIVEC_REG_BIT (i
);
24411 /* Next, remove the argument registers from the set. These must
24412 be in the VRSAVE mask set by the caller, so we don't need to add
24413 them in again. More importantly, the mask we compute here is
24414 used to generate CLOBBERs in the set_vrsave insn, and we do not
24415 wish the argument registers to die. */
24416 for (i
= ALTIVEC_ARG_MIN_REG
; i
< (unsigned) crtl
->args
.info
.vregno
; i
++)
24417 mask
&= ~ALTIVEC_REG_BIT (i
);
24419 /* Similarly, remove the return value from the set. */
24422 diddle_return_value (is_altivec_return_reg
, &yes
);
24424 mask
&= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN
);
24430 /* For a very restricted set of circumstances, we can cut down the
24431 size of prologues/epilogues by calling our own save/restore-the-world
24435 compute_save_world_info (rs6000_stack_t
*info
)
24437 info
->world_save_p
= 1;
24439 = (WORLD_SAVE_P (info
)
24440 && DEFAULT_ABI
== ABI_DARWIN
24441 && !cfun
->has_nonlocal_label
24442 && info
->first_fp_reg_save
== FIRST_SAVED_FP_REGNO
24443 && info
->first_gp_reg_save
== FIRST_SAVED_GP_REGNO
24444 && info
->first_altivec_reg_save
== FIRST_SAVED_ALTIVEC_REGNO
24445 && info
->cr_save_p
);
24447 /* This will not work in conjunction with sibcalls. Make sure there
24448 are none. (This check is expensive, but seldom executed.) */
24449 if (WORLD_SAVE_P (info
))
24452 for (insn
= get_last_insn_anywhere (); insn
; insn
= PREV_INSN (insn
))
24453 if (CALL_P (insn
) && SIBLING_CALL_P (insn
))
24455 info
->world_save_p
= 0;
24460 if (WORLD_SAVE_P (info
))
24462 /* Even if we're not touching VRsave, make sure there's room on the
24463 stack for it, if it looks like we're calling SAVE_WORLD, which
24464 will attempt to save it. */
24465 info
->vrsave_size
= 4;
24467 /* If we are going to save the world, we need to save the link register too. */
24468 info
->lr_save_p
= 1;
24470 /* "Save" the VRsave register too if we're saving the world. */
24471 if (info
->vrsave_mask
== 0)
24472 info
->vrsave_mask
= compute_vrsave_mask ();
24474 /* Because the Darwin register save/restore routines only handle
24475 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24477 gcc_assert (info
->first_fp_reg_save
>= FIRST_SAVED_FP_REGNO
24478 && (info
->first_altivec_reg_save
24479 >= FIRST_SAVED_ALTIVEC_REGNO
));
24487 is_altivec_return_reg (rtx reg
, void *xyes
)
24489 bool *yes
= (bool *) xyes
;
24490 if (REGNO (reg
) == ALTIVEC_ARG_RETURN
)
24495 /* Return whether REG is a global user reg or has been specifed by
24496 -ffixed-REG. We should not restore these, and so cannot use
24497 lmw or out-of-line restore functions if there are any. We also
24498 can't save them (well, emit frame notes for them), because frame
24499 unwinding during exception handling will restore saved registers. */
24502 fixed_reg_p (int reg
)
24504 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24505 backend sets it, overriding anything the user might have given. */
24506 if (reg
== RS6000_PIC_OFFSET_TABLE_REGNUM
24507 && ((DEFAULT_ABI
== ABI_V4
&& flag_pic
)
24508 || (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
24509 || (TARGET_TOC
&& TARGET_MINIMAL_TOC
)))
24512 return fixed_regs
[reg
];
24515 /* Determine the strategy for savings/restoring registers. */
24518 SAVE_MULTIPLE
= 0x1,
24519 SAVE_INLINE_GPRS
= 0x2,
24520 SAVE_INLINE_FPRS
= 0x4,
24521 SAVE_NOINLINE_GPRS_SAVES_LR
= 0x8,
24522 SAVE_NOINLINE_FPRS_SAVES_LR
= 0x10,
24523 SAVE_INLINE_VRS
= 0x20,
24524 REST_MULTIPLE
= 0x100,
24525 REST_INLINE_GPRS
= 0x200,
24526 REST_INLINE_FPRS
= 0x400,
24527 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
= 0x800,
24528 REST_INLINE_VRS
= 0x1000
24532 rs6000_savres_strategy (rs6000_stack_t
*info
,
24533 bool using_static_chain_p
)
24537 /* Select between in-line and out-of-line save and restore of regs.
24538 First, all the obvious cases where we don't use out-of-line. */
24539 if (crtl
->calls_eh_return
24540 || cfun
->machine
->ra_need_lr
)
24541 strategy
|= (SAVE_INLINE_FPRS
| REST_INLINE_FPRS
24542 | SAVE_INLINE_GPRS
| REST_INLINE_GPRS
24543 | SAVE_INLINE_VRS
| REST_INLINE_VRS
);
24545 if (info
->first_gp_reg_save
== 32)
24546 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24548 if (info
->first_fp_reg_save
== 64
24549 /* The out-of-line FP routines use double-precision stores;
24550 we can't use those routines if we don't have such stores. */
24551 || (TARGET_HARD_FLOAT
&& !TARGET_DOUBLE_FLOAT
))
24552 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
24554 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1)
24555 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
24557 /* Define cutoff for using out-of-line functions to save registers. */
24558 if (DEFAULT_ABI
== ABI_V4
|| TARGET_ELF
)
24560 if (!optimize_size
)
24562 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
24563 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24564 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
24568 /* Prefer out-of-line restore if it will exit. */
24569 if (info
->first_fp_reg_save
> 61)
24570 strategy
|= SAVE_INLINE_FPRS
;
24571 if (info
->first_gp_reg_save
> 29)
24573 if (info
->first_fp_reg_save
== 64)
24574 strategy
|= SAVE_INLINE_GPRS
;
24576 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24578 if (info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
)
24579 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
24582 else if (DEFAULT_ABI
== ABI_DARWIN
)
24584 if (info
->first_fp_reg_save
> 60)
24585 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
24586 if (info
->first_gp_reg_save
> 29)
24587 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24588 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
24592 gcc_checking_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
24593 if ((flag_shrink_wrap_separate
&& optimize_function_for_speed_p (cfun
))
24594 || info
->first_fp_reg_save
> 61)
24595 strategy
|= SAVE_INLINE_FPRS
| REST_INLINE_FPRS
;
24596 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24597 strategy
|= SAVE_INLINE_VRS
| REST_INLINE_VRS
;
24600 /* Don't bother to try to save things out-of-line if r11 is occupied
24601 by the static chain. It would require too much fiddling and the
24602 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24603 pointer on Darwin, and AIX uses r1 or r12. */
24604 if (using_static_chain_p
24605 && (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
24606 strategy
|= ((DEFAULT_ABI
== ABI_DARWIN
? 0 : SAVE_INLINE_FPRS
)
24608 | SAVE_INLINE_VRS
);
24610 /* We can only use the out-of-line routines to restore fprs if we've
24611 saved all the registers from first_fp_reg_save in the prologue.
24612 Otherwise, we risk loading garbage. Of course, if we have saved
24613 out-of-line then we know we haven't skipped any fprs. */
24614 if ((strategy
& SAVE_INLINE_FPRS
)
24615 && !(strategy
& REST_INLINE_FPRS
))
24619 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
24620 if (fixed_regs
[i
] || !save_reg_p (i
))
24622 strategy
|= REST_INLINE_FPRS
;
24627 /* Similarly, for altivec regs. */
24628 if ((strategy
& SAVE_INLINE_VRS
)
24629 && !(strategy
& REST_INLINE_VRS
))
24633 for (i
= info
->first_altivec_reg_save
; i
< LAST_ALTIVEC_REGNO
+ 1; i
++)
24634 if (fixed_regs
[i
] || !save_reg_p (i
))
24636 strategy
|= REST_INLINE_VRS
;
24641 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24642 saved is an out-of-line save or restore. Set up the value for
24643 the next test (excluding out-of-line gprs). */
24644 bool lr_save_p
= (info
->lr_save_p
24645 || !(strategy
& SAVE_INLINE_FPRS
)
24646 || !(strategy
& SAVE_INLINE_VRS
)
24647 || !(strategy
& REST_INLINE_FPRS
)
24648 || !(strategy
& REST_INLINE_VRS
));
24650 if (TARGET_MULTIPLE
24651 && !TARGET_POWERPC64
24652 && info
->first_gp_reg_save
< 31
24653 && !(flag_shrink_wrap
24654 && flag_shrink_wrap_separate
24655 && optimize_function_for_speed_p (cfun
)))
24657 /* Prefer store multiple for saves over out-of-line routines,
24658 since the store-multiple instruction will always be smaller. */
24659 strategy
|= SAVE_INLINE_GPRS
| SAVE_MULTIPLE
;
24661 /* The situation is more complicated with load multiple. We'd
24662 prefer to use the out-of-line routines for restores, since the
24663 "exit" out-of-line routines can handle the restore of LR and the
24664 frame teardown. However if doesn't make sense to use the
24665 out-of-line routine if that is the only reason we'd need to save
24666 LR, and we can't use the "exit" out-of-line gpr restore if we
24667 have saved some fprs; In those cases it is advantageous to use
24668 load multiple when available. */
24669 if (info
->first_fp_reg_save
!= 64 || !lr_save_p
)
24670 strategy
|= REST_INLINE_GPRS
| REST_MULTIPLE
;
24673 /* Using the "exit" out-of-line routine does not improve code size
24674 if using it would require lr to be saved and if only saving one
24676 else if (!lr_save_p
&& info
->first_gp_reg_save
> 29)
24677 strategy
|= SAVE_INLINE_GPRS
| REST_INLINE_GPRS
;
24679 /* We can only use load multiple or the out-of-line routines to
24680 restore gprs if we've saved all the registers from
24681 first_gp_reg_save. Otherwise, we risk loading garbage.
24682 Of course, if we have saved out-of-line or used stmw then we know
24683 we haven't skipped any gprs. */
24684 if ((strategy
& (SAVE_INLINE_GPRS
| SAVE_MULTIPLE
)) == SAVE_INLINE_GPRS
24685 && (strategy
& (REST_INLINE_GPRS
| REST_MULTIPLE
)) != REST_INLINE_GPRS
)
24689 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
24690 if (fixed_reg_p (i
) || !save_reg_p (i
))
24692 strategy
|= REST_INLINE_GPRS
;
24693 strategy
&= ~REST_MULTIPLE
;
24698 if (TARGET_ELF
&& TARGET_64BIT
)
24700 if (!(strategy
& SAVE_INLINE_FPRS
))
24701 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
24702 else if (!(strategy
& SAVE_INLINE_GPRS
)
24703 && info
->first_fp_reg_save
== 64)
24704 strategy
|= SAVE_NOINLINE_GPRS_SAVES_LR
;
24706 else if (TARGET_AIX
&& !(strategy
& REST_INLINE_FPRS
))
24707 strategy
|= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
;
24709 if (TARGET_MACHO
&& !(strategy
& SAVE_INLINE_FPRS
))
24710 strategy
|= SAVE_NOINLINE_FPRS_SAVES_LR
;
24715 /* Calculate the stack information for the current function. This is
24716 complicated by having two separate calling sequences, the AIX calling
24717 sequence and the V.4 calling sequence.
24719 AIX (and Darwin/Mac OS X) stack frames look like:
24721 SP----> +---------------------------------------+
24722 | back chain to caller | 0 0
24723 +---------------------------------------+
24724 | saved CR | 4 8 (8-11)
24725 +---------------------------------------+
24727 +---------------------------------------+
24728 | reserved for compilers | 12 24
24729 +---------------------------------------+
24730 | reserved for binders | 16 32
24731 +---------------------------------------+
24732 | saved TOC pointer | 20 40
24733 +---------------------------------------+
24734 | Parameter save area (+padding*) (P) | 24 48
24735 +---------------------------------------+
24736 | Alloca space (A) | 24+P etc.
24737 +---------------------------------------+
24738 | Local variable space (L) | 24+P+A
24739 +---------------------------------------+
24740 | Float/int conversion temporary (X) | 24+P+A+L
24741 +---------------------------------------+
24742 | Save area for AltiVec registers (W) | 24+P+A+L+X
24743 +---------------------------------------+
24744 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24745 +---------------------------------------+
24746 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24747 +---------------------------------------+
24748 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24749 +---------------------------------------+
24750 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24751 +---------------------------------------+
24752 old SP->| back chain to caller's caller |
24753 +---------------------------------------+
24755 * If the alloca area is present, the parameter save area is
24756 padded so that the former starts 16-byte aligned.
24758 The required alignment for AIX configurations is two words (i.e., 8
24761 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24763 SP----> +---------------------------------------+
24764 | Back chain to caller | 0
24765 +---------------------------------------+
24766 | Save area for CR | 8
24767 +---------------------------------------+
24769 +---------------------------------------+
24770 | Saved TOC pointer | 24
24771 +---------------------------------------+
24772 | Parameter save area (+padding*) (P) | 32
24773 +---------------------------------------+
24774 | Alloca space (A) | 32+P
24775 +---------------------------------------+
24776 | Local variable space (L) | 32+P+A
24777 +---------------------------------------+
24778 | Save area for AltiVec registers (W) | 32+P+A+L
24779 +---------------------------------------+
24780 | AltiVec alignment padding (Y) | 32+P+A+L+W
24781 +---------------------------------------+
24782 | Save area for GP registers (G) | 32+P+A+L+W+Y
24783 +---------------------------------------+
24784 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24785 +---------------------------------------+
24786 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24787 +---------------------------------------+
24789 * If the alloca area is present, the parameter save area is
24790 padded so that the former starts 16-byte aligned.
24792 V.4 stack frames look like:
24794 SP----> +---------------------------------------+
24795 | back chain to caller | 0
24796 +---------------------------------------+
24797 | caller's saved LR | 4
24798 +---------------------------------------+
24799 | Parameter save area (+padding*) (P) | 8
24800 +---------------------------------------+
24801 | Alloca space (A) | 8+P
24802 +---------------------------------------+
24803 | Varargs save area (V) | 8+P+A
24804 +---------------------------------------+
24805 | Local variable space (L) | 8+P+A+V
24806 +---------------------------------------+
24807 | Float/int conversion temporary (X) | 8+P+A+V+L
24808 +---------------------------------------+
24809 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24810 +---------------------------------------+
24811 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24812 +---------------------------------------+
24813 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24814 +---------------------------------------+
24815 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24816 +---------------------------------------+
24817 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24818 +---------------------------------------+
24819 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24820 +---------------------------------------+
24821 old SP->| back chain to caller's caller |
24822 +---------------------------------------+
24824 * If the alloca area is present and the required alignment is
24825 16 bytes, the parameter save area is padded so that the
24826 alloca area starts 16-byte aligned.
24828 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24829 given. (But note below and in sysv4.h that we require only 8 and
24830 may round up the size of our stack frame anyways. The historical
24831 reason is early versions of powerpc-linux which didn't properly
24832 align the stack at program startup. A happy side-effect is that
24833 -mno-eabi libraries can be used with -meabi programs.)
24835 The EABI configuration defaults to the V.4 layout. However,
24836 the stack alignment requirements may differ. If -mno-eabi is not
24837 given, the required stack alignment is 8 bytes; if -mno-eabi is
24838 given, the required alignment is 16 bytes. (But see V.4 comment
24841 #ifndef ABI_STACK_BOUNDARY
24842 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24845 static rs6000_stack_t
*
24846 rs6000_stack_info (void)
24848 /* We should never be called for thunks, we are not set up for that. */
24849 gcc_assert (!cfun
->is_thunk
);
24851 rs6000_stack_t
*info
= &stack_info
;
24852 int reg_size
= TARGET_32BIT
? 4 : 8;
24857 HOST_WIDE_INT non_fixed_size
;
24858 bool using_static_chain_p
;
24860 if (reload_completed
&& info
->reload_completed
)
24863 memset (info
, 0, sizeof (*info
));
24864 info
->reload_completed
= reload_completed
;
24866 /* Select which calling sequence. */
24867 info
->abi
= DEFAULT_ABI
;
24869 /* Calculate which registers need to be saved & save area size. */
24870 info
->first_gp_reg_save
= first_reg_to_save ();
24871 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24872 even if it currently looks like we won't. Reload may need it to
24873 get at a constant; if so, it will have already created a constant
24874 pool entry for it. */
24875 if (((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
24876 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
24877 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
24878 && crtl
->uses_const_pool
24879 && info
->first_gp_reg_save
> RS6000_PIC_OFFSET_TABLE_REGNUM
)
24880 first_gp
= RS6000_PIC_OFFSET_TABLE_REGNUM
;
24882 first_gp
= info
->first_gp_reg_save
;
24884 info
->gp_size
= reg_size
* (32 - first_gp
);
24886 info
->first_fp_reg_save
= first_fp_reg_to_save ();
24887 info
->fp_size
= 8 * (64 - info
->first_fp_reg_save
);
24889 info
->first_altivec_reg_save
= first_altivec_reg_to_save ();
24890 info
->altivec_size
= 16 * (LAST_ALTIVEC_REGNO
+ 1
24891 - info
->first_altivec_reg_save
);
24893 /* Does this function call anything? */
24894 info
->calls_p
= (!crtl
->is_leaf
|| cfun
->machine
->ra_needs_full_frame
);
24896 /* Determine if we need to save the condition code registers. */
24897 if (save_reg_p (CR2_REGNO
)
24898 || save_reg_p (CR3_REGNO
)
24899 || save_reg_p (CR4_REGNO
))
24901 info
->cr_save_p
= 1;
24902 if (DEFAULT_ABI
== ABI_V4
)
24903 info
->cr_size
= reg_size
;
24906 /* If the current function calls __builtin_eh_return, then we need
24907 to allocate stack space for registers that will hold data for
24908 the exception handler. */
24909 if (crtl
->calls_eh_return
)
24912 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
24915 ehrd_size
= i
* UNITS_PER_WORD
;
24920 /* In the ELFv2 ABI, we also need to allocate space for separate
24921 CR field save areas if the function calls __builtin_eh_return. */
24922 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
24924 /* This hard-codes that we have three call-saved CR fields. */
24925 ehcr_size
= 3 * reg_size
;
24926 /* We do *not* use the regular CR save mechanism. */
24927 info
->cr_save_p
= 0;
24932 /* Determine various sizes. */
24933 info
->reg_size
= reg_size
;
24934 info
->fixed_size
= RS6000_SAVE_AREA
;
24935 info
->vars_size
= RS6000_ALIGN (get_frame_size (), 8);
24936 if (cfun
->calls_alloca
)
24938 RS6000_ALIGN (crtl
->outgoing_args_size
+ info
->fixed_size
,
24939 STACK_BOUNDARY
/ BITS_PER_UNIT
) - info
->fixed_size
;
24941 info
->parm_size
= RS6000_ALIGN (crtl
->outgoing_args_size
,
24942 TARGET_ALTIVEC
? 16 : 8);
24943 if (FRAME_GROWS_DOWNWARD
)
24945 += RS6000_ALIGN (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
,
24946 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
)
24947 - (info
->fixed_size
+ info
->vars_size
+ info
->parm_size
);
24949 if (TARGET_ALTIVEC_ABI
)
24950 info
->vrsave_mask
= compute_vrsave_mask ();
24952 if (TARGET_ALTIVEC_VRSAVE
&& info
->vrsave_mask
)
24953 info
->vrsave_size
= 4;
24955 compute_save_world_info (info
);
24957 /* Calculate the offsets. */
24958 switch (DEFAULT_ABI
)
24962 gcc_unreachable ();
24967 info
->fp_save_offset
= -info
->fp_size
;
24968 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
24970 if (TARGET_ALTIVEC_ABI
)
24972 info
->vrsave_save_offset
= info
->gp_save_offset
- info
->vrsave_size
;
24974 /* Align stack so vector save area is on a quadword boundary.
24975 The padding goes above the vectors. */
24976 if (info
->altivec_size
!= 0)
24977 info
->altivec_padding_size
= info
->vrsave_save_offset
& 0xF;
24979 info
->altivec_save_offset
= info
->vrsave_save_offset
24980 - info
->altivec_padding_size
24981 - info
->altivec_size
;
24982 gcc_assert (info
->altivec_size
== 0
24983 || info
->altivec_save_offset
% 16 == 0);
24985 /* Adjust for AltiVec case. */
24986 info
->ehrd_offset
= info
->altivec_save_offset
- ehrd_size
;
24989 info
->ehrd_offset
= info
->gp_save_offset
- ehrd_size
;
24991 info
->ehcr_offset
= info
->ehrd_offset
- ehcr_size
;
24992 info
->cr_save_offset
= reg_size
; /* first word when 64-bit. */
24993 info
->lr_save_offset
= 2*reg_size
;
24997 info
->fp_save_offset
= -info
->fp_size
;
24998 info
->gp_save_offset
= info
->fp_save_offset
- info
->gp_size
;
24999 info
->cr_save_offset
= info
->gp_save_offset
- info
->cr_size
;
25001 if (TARGET_ALTIVEC_ABI
)
25003 info
->vrsave_save_offset
= info
->cr_save_offset
- info
->vrsave_size
;
25005 /* Align stack so vector save area is on a quadword boundary. */
25006 if (info
->altivec_size
!= 0)
25007 info
->altivec_padding_size
= 16 - (-info
->vrsave_save_offset
% 16);
25009 info
->altivec_save_offset
= info
->vrsave_save_offset
25010 - info
->altivec_padding_size
25011 - info
->altivec_size
;
25013 /* Adjust for AltiVec case. */
25014 info
->ehrd_offset
= info
->altivec_save_offset
;
25017 info
->ehrd_offset
= info
->cr_save_offset
;
25019 info
->ehrd_offset
-= ehrd_size
;
25020 info
->lr_save_offset
= reg_size
;
25023 save_align
= (TARGET_ALTIVEC_ABI
|| DEFAULT_ABI
== ABI_DARWIN
) ? 16 : 8;
25024 info
->save_size
= RS6000_ALIGN (info
->fp_size
25026 + info
->altivec_size
25027 + info
->altivec_padding_size
25031 + info
->vrsave_size
,
25034 non_fixed_size
= info
->vars_size
+ info
->parm_size
+ info
->save_size
;
25036 info
->total_size
= RS6000_ALIGN (non_fixed_size
+ info
->fixed_size
,
25037 ABI_STACK_BOUNDARY
/ BITS_PER_UNIT
);
25039 /* Determine if we need to save the link register. */
25041 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
25043 && !TARGET_PROFILE_KERNEL
)
25044 || (DEFAULT_ABI
== ABI_V4
&& cfun
->calls_alloca
)
25045 #ifdef TARGET_RELOCATABLE
25046 || (DEFAULT_ABI
== ABI_V4
25047 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
25048 && !constant_pool_empty_p ())
25050 || rs6000_ra_ever_killed ())
25051 info
->lr_save_p
= 1;
25053 using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
25054 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
25055 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
25056 info
->savres_strategy
= rs6000_savres_strategy (info
, using_static_chain_p
);
25058 if (!(info
->savres_strategy
& SAVE_INLINE_GPRS
)
25059 || !(info
->savres_strategy
& SAVE_INLINE_FPRS
)
25060 || !(info
->savres_strategy
& SAVE_INLINE_VRS
)
25061 || !(info
->savres_strategy
& REST_INLINE_GPRS
)
25062 || !(info
->savres_strategy
& REST_INLINE_FPRS
)
25063 || !(info
->savres_strategy
& REST_INLINE_VRS
))
25064 info
->lr_save_p
= 1;
25066 if (info
->lr_save_p
)
25067 df_set_regs_ever_live (LR_REGNO
, true);
25069 /* Determine if we need to allocate any stack frame:
25071 For AIX we need to push the stack if a frame pointer is needed
25072 (because the stack might be dynamically adjusted), if we are
25073 debugging, if we make calls, or if the sum of fp_save, gp_save,
25074 and local variables are more than the space needed to save all
25075 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
25076 + 18*8 = 288 (GPR13 reserved).
25078 For V.4 we don't have the stack cushion that AIX uses, but assume
25079 that the debugger can handle stackless frames. */
25084 else if (DEFAULT_ABI
== ABI_V4
)
25085 info
->push_p
= non_fixed_size
!= 0;
25087 else if (frame_pointer_needed
)
25090 else if (TARGET_XCOFF
&& write_symbols
!= NO_DEBUG
)
25094 info
->push_p
= non_fixed_size
> (TARGET_32BIT
? 220 : 288);
25100 debug_stack_info (rs6000_stack_t
*info
)
25102 const char *abi_string
;
25105 info
= rs6000_stack_info ();
25107 fprintf (stderr
, "\nStack information for function %s:\n",
25108 ((current_function_decl
&& DECL_NAME (current_function_decl
))
25109 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl
))
25114 default: abi_string
= "Unknown"; break;
25115 case ABI_NONE
: abi_string
= "NONE"; break;
25116 case ABI_AIX
: abi_string
= "AIX"; break;
25117 case ABI_ELFv2
: abi_string
= "ELFv2"; break;
25118 case ABI_DARWIN
: abi_string
= "Darwin"; break;
25119 case ABI_V4
: abi_string
= "V.4"; break;
25122 fprintf (stderr
, "\tABI = %5s\n", abi_string
);
25124 if (TARGET_ALTIVEC_ABI
)
25125 fprintf (stderr
, "\tALTIVEC ABI extensions enabled.\n");
25127 if (info
->first_gp_reg_save
!= 32)
25128 fprintf (stderr
, "\tfirst_gp_reg_save = %5d\n", info
->first_gp_reg_save
);
25130 if (info
->first_fp_reg_save
!= 64)
25131 fprintf (stderr
, "\tfirst_fp_reg_save = %5d\n", info
->first_fp_reg_save
);
25133 if (info
->first_altivec_reg_save
<= LAST_ALTIVEC_REGNO
)
25134 fprintf (stderr
, "\tfirst_altivec_reg_save = %5d\n",
25135 info
->first_altivec_reg_save
);
25137 if (info
->lr_save_p
)
25138 fprintf (stderr
, "\tlr_save_p = %5d\n", info
->lr_save_p
);
25140 if (info
->cr_save_p
)
25141 fprintf (stderr
, "\tcr_save_p = %5d\n", info
->cr_save_p
);
25143 if (info
->vrsave_mask
)
25144 fprintf (stderr
, "\tvrsave_mask = 0x%x\n", info
->vrsave_mask
);
25147 fprintf (stderr
, "\tpush_p = %5d\n", info
->push_p
);
25150 fprintf (stderr
, "\tcalls_p = %5d\n", info
->calls_p
);
25153 fprintf (stderr
, "\tgp_save_offset = %5d\n", info
->gp_save_offset
);
25156 fprintf (stderr
, "\tfp_save_offset = %5d\n", info
->fp_save_offset
);
25158 if (info
->altivec_size
)
25159 fprintf (stderr
, "\taltivec_save_offset = %5d\n",
25160 info
->altivec_save_offset
);
25162 if (info
->vrsave_size
)
25163 fprintf (stderr
, "\tvrsave_save_offset = %5d\n",
25164 info
->vrsave_save_offset
);
25166 if (info
->lr_save_p
)
25167 fprintf (stderr
, "\tlr_save_offset = %5d\n", info
->lr_save_offset
);
25169 if (info
->cr_save_p
)
25170 fprintf (stderr
, "\tcr_save_offset = %5d\n", info
->cr_save_offset
);
25172 if (info
->varargs_save_offset
)
25173 fprintf (stderr
, "\tvarargs_save_offset = %5d\n", info
->varargs_save_offset
);
25175 if (info
->total_size
)
25176 fprintf (stderr
, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
25179 if (info
->vars_size
)
25180 fprintf (stderr
, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC
"\n",
25183 if (info
->parm_size
)
25184 fprintf (stderr
, "\tparm_size = %5d\n", info
->parm_size
);
25186 if (info
->fixed_size
)
25187 fprintf (stderr
, "\tfixed_size = %5d\n", info
->fixed_size
);
25190 fprintf (stderr
, "\tgp_size = %5d\n", info
->gp_size
);
25193 fprintf (stderr
, "\tfp_size = %5d\n", info
->fp_size
);
25195 if (info
->altivec_size
)
25196 fprintf (stderr
, "\taltivec_size = %5d\n", info
->altivec_size
);
25198 if (info
->vrsave_size
)
25199 fprintf (stderr
, "\tvrsave_size = %5d\n", info
->vrsave_size
);
25201 if (info
->altivec_padding_size
)
25202 fprintf (stderr
, "\taltivec_padding_size= %5d\n",
25203 info
->altivec_padding_size
);
25206 fprintf (stderr
, "\tcr_size = %5d\n", info
->cr_size
);
25208 if (info
->save_size
)
25209 fprintf (stderr
, "\tsave_size = %5d\n", info
->save_size
);
25211 if (info
->reg_size
!= 4)
25212 fprintf (stderr
, "\treg_size = %5d\n", info
->reg_size
);
25214 fprintf (stderr
, "\tsave-strategy = %04x\n", info
->savres_strategy
);
25216 fprintf (stderr
, "\n");
25220 rs6000_return_addr (int count
, rtx frame
)
25222 /* Currently we don't optimize very well between prolog and body
25223 code and for PIC code the code can be actually quite bad, so
25224 don't try to be too clever here. */
25226 || ((DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
) && flag_pic
))
25228 cfun
->machine
->ra_needs_full_frame
= 1;
25235 plus_constant (Pmode
,
25237 (gen_rtx_MEM (Pmode
,
25238 memory_address (Pmode
, frame
))),
25239 RETURN_ADDRESS_OFFSET
)));
25242 cfun
->machine
->ra_need_lr
= 1;
25243 return get_hard_reg_initial_val (Pmode
, LR_REGNO
);
25246 /* Say whether a function is a candidate for sibcall handling or not. */
25249 rs6000_function_ok_for_sibcall (tree decl
, tree exp
)
25254 fntype
= TREE_TYPE (decl
);
25256 fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
25258 /* We can't do it if the called function has more vector parameters
25259 than the current function; there's nowhere to put the VRsave code. */
25260 if (TARGET_ALTIVEC_ABI
25261 && TARGET_ALTIVEC_VRSAVE
25262 && !(decl
&& decl
== current_function_decl
))
25264 function_args_iterator args_iter
;
25268 /* Functions with vector parameters are required to have a
25269 prototype, so the argument type info must be available
25271 FOREACH_FUNCTION_ARGS(fntype
, type
, args_iter
)
25272 if (TREE_CODE (type
) == VECTOR_TYPE
25273 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
25276 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl
), type
, args_iter
)
25277 if (TREE_CODE (type
) == VECTOR_TYPE
25278 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type
)))
25285 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25286 functions, because the callee may have a different TOC pointer to
25287 the caller and there's no way to ensure we restore the TOC when
25288 we return. With the secure-plt SYSV ABI we can't make non-local
25289 calls when -fpic/PIC because the plt call stubs use r30. */
25290 if (DEFAULT_ABI
== ABI_DARWIN
25291 || ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
25293 && !DECL_EXTERNAL (decl
)
25294 && !DECL_WEAK (decl
)
25295 && (*targetm
.binds_local_p
) (decl
))
25296 || (DEFAULT_ABI
== ABI_V4
25297 && (!TARGET_SECURE_PLT
25300 && (*targetm
.binds_local_p
) (decl
)))))
25302 tree attr_list
= TYPE_ATTRIBUTES (fntype
);
25304 if (!lookup_attribute ("longcall", attr_list
)
25305 || lookup_attribute ("shortcall", attr_list
))
25313 rs6000_ra_ever_killed (void)
25319 if (cfun
->is_thunk
)
25322 if (cfun
->machine
->lr_save_state
)
25323 return cfun
->machine
->lr_save_state
- 1;
25325 /* regs_ever_live has LR marked as used if any sibcalls are present,
25326 but this should not force saving and restoring in the
25327 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25328 clobbers LR, so that is inappropriate. */
25330 /* Also, the prologue can generate a store into LR that
25331 doesn't really count, like this:
25334 bcl to set PIC register
25338 When we're called from the epilogue, we need to avoid counting
25339 this as a store. */
25341 push_topmost_sequence ();
25342 top
= get_insns ();
25343 pop_topmost_sequence ();
25344 reg
= gen_rtx_REG (Pmode
, LR_REGNO
);
25346 for (insn
= NEXT_INSN (top
); insn
!= NULL_RTX
; insn
= NEXT_INSN (insn
))
25352 if (!SIBLING_CALL_P (insn
))
25355 else if (find_regno_note (insn
, REG_INC
, LR_REGNO
))
25357 else if (set_of (reg
, insn
) != NULL_RTX
25358 && !prologue_epilogue_contains (insn
))
25365 /* Emit instructions needed to load the TOC register.
25366 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25367 a constant pool; or for SVR4 -fpic. */
25370 rs6000_emit_load_toc_table (int fromprolog
)
25373 dest
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
25375 if (TARGET_ELF
&& TARGET_SECURE_PLT
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
)
25378 rtx lab
, tmp1
, tmp2
, got
;
25380 lab
= gen_label_rtx ();
25381 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (lab
));
25382 lab
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
25385 got
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
25389 got
= rs6000_got_sym ();
25390 tmp1
= tmp2
= dest
;
25393 tmp1
= gen_reg_rtx (Pmode
);
25394 tmp2
= gen_reg_rtx (Pmode
);
25396 emit_insn (gen_load_toc_v4_PIC_1 (lab
));
25397 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
25398 emit_insn (gen_load_toc_v4_PIC_3b (tmp2
, tmp1
, got
, lab
));
25399 emit_insn (gen_load_toc_v4_PIC_3c (dest
, tmp2
, got
, lab
));
25401 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
25403 emit_insn (gen_load_toc_v4_pic_si ());
25404 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
25406 else if (TARGET_ELF
&& DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
25409 rtx temp0
= (fromprolog
25410 ? gen_rtx_REG (Pmode
, 0)
25411 : gen_reg_rtx (Pmode
));
25417 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
25418 symF
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
25420 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
25421 symL
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (buf
));
25423 emit_insn (gen_load_toc_v4_PIC_1 (symF
));
25424 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
25425 emit_insn (gen_load_toc_v4_PIC_2 (temp0
, dest
, symL
, symF
));
25431 tocsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
25433 lab
= gen_label_rtx ();
25434 emit_insn (gen_load_toc_v4_PIC_1b (tocsym
, lab
));
25435 emit_move_insn (dest
, gen_rtx_REG (Pmode
, LR_REGNO
));
25436 if (TARGET_LINK_STACK
)
25437 emit_insn (gen_addsi3 (dest
, dest
, GEN_INT (4)));
25438 emit_move_insn (temp0
, gen_rtx_MEM (Pmode
, dest
));
25440 emit_insn (gen_addsi3 (dest
, temp0
, dest
));
25442 else if (TARGET_ELF
&& !TARGET_AIX
&& flag_pic
== 0 && TARGET_MINIMAL_TOC
)
25444 /* This is for AIX code running in non-PIC ELF32. */
25445 rtx realsym
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (toc_label_name
));
25448 emit_insn (gen_elf_high (dest
, realsym
));
25449 emit_insn (gen_elf_low (dest
, dest
, realsym
));
25453 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
25456 emit_insn (gen_load_toc_aix_si (dest
));
25458 emit_insn (gen_load_toc_aix_di (dest
));
25462 /* Emit instructions to restore the link register after determining where
25463 its value has been stored. */
25466 rs6000_emit_eh_reg_restore (rtx source
, rtx scratch
)
25468 rs6000_stack_t
*info
= rs6000_stack_info ();
25471 operands
[0] = source
;
25472 operands
[1] = scratch
;
25474 if (info
->lr_save_p
)
25476 rtx frame_rtx
= stack_pointer_rtx
;
25477 HOST_WIDE_INT sp_offset
= 0;
25480 if (frame_pointer_needed
25481 || cfun
->calls_alloca
25482 || info
->total_size
> 32767)
25484 tmp
= gen_frame_mem (Pmode
, frame_rtx
);
25485 emit_move_insn (operands
[1], tmp
);
25486 frame_rtx
= operands
[1];
25488 else if (info
->push_p
)
25489 sp_offset
= info
->total_size
;
25491 tmp
= plus_constant (Pmode
, frame_rtx
,
25492 info
->lr_save_offset
+ sp_offset
);
25493 tmp
= gen_frame_mem (Pmode
, tmp
);
25494 emit_move_insn (tmp
, operands
[0]);
25497 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNO
), operands
[0]);
25499 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25500 state of lr_save_p so any change from here on would be a bug. In
25501 particular, stop rs6000_ra_ever_killed from considering the SET
25502 of lr we may have added just above. */
25503 cfun
->machine
->lr_save_state
= info
->lr_save_p
+ 1;
25506 static GTY(()) alias_set_type set
= -1;
25509 get_TOC_alias_set (void)
25512 set
= new_alias_set ();
25516 /* This returns nonzero if the current function uses the TOC. This is
25517 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25518 is generated by the ABI_V4 load_toc_* patterns. */
25525 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25528 rtx pat
= PATTERN (insn
);
25531 if (GET_CODE (pat
) == PARALLEL
)
25532 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25534 rtx sub
= XVECEXP (pat
, 0, i
);
25535 if (GET_CODE (sub
) == USE
)
25537 sub
= XEXP (sub
, 0);
25538 if (GET_CODE (sub
) == UNSPEC
25539 && XINT (sub
, 1) == UNSPEC_TOC
)
25549 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
25551 rtx tocrel
, tocreg
, hi
;
25553 if (TARGET_DEBUG_ADDR
)
25555 if (GET_CODE (symbol
) == SYMBOL_REF
)
25556 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25560 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
25561 GET_RTX_NAME (GET_CODE (symbol
)));
25562 debug_rtx (symbol
);
25566 if (!can_create_pseudo_p ())
25567 df_set_regs_ever_live (TOC_REGISTER
, true);
25569 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
25570 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
25571 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
25574 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
25575 if (largetoc_reg
!= NULL
)
25577 emit_move_insn (largetoc_reg
, hi
);
25580 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
25583 /* Issue assembly directives that create a reference to the given DWARF
25584 FRAME_TABLE_LABEL from the current function section. */
25586 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label
)
25588 fprintf (asm_out_file
, "\t.ref %s\n",
25589 (* targetm
.strip_name_encoding
) (frame_table_label
));
25592 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25593 and the change to the stack pointer. */
25596 rs6000_emit_stack_tie (rtx fp
, bool hard_frame_needed
)
25603 regs
[i
++] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25604 if (hard_frame_needed
)
25605 regs
[i
++] = gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
25606 if (!(REGNO (fp
) == STACK_POINTER_REGNUM
25607 || (hard_frame_needed
25608 && REGNO (fp
) == HARD_FRAME_POINTER_REGNUM
)))
25611 p
= rtvec_alloc (i
);
25614 rtx mem
= gen_frame_mem (BLKmode
, regs
[i
]);
25615 RTVEC_ELT (p
, i
) = gen_rtx_SET (mem
, const0_rtx
);
25618 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode
, p
)));
25621 /* Emit the correct code for allocating stack space, as insns.
25622 If COPY_REG, make sure a copy of the old frame is left there.
25623 The generated code may use hard register 0 as a temporary. */
25626 rs6000_emit_allocate_stack (HOST_WIDE_INT size
, rtx copy_reg
, int copy_off
)
25629 rtx stack_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25630 rtx tmp_reg
= gen_rtx_REG (Pmode
, 0);
25631 rtx todec
= gen_int_mode (-size
, Pmode
);
25634 if (INTVAL (todec
) != -size
)
25636 warning (0, "stack frame too large");
25637 emit_insn (gen_trap ());
25641 if (crtl
->limit_stack
)
25643 if (REG_P (stack_limit_rtx
)
25644 && REGNO (stack_limit_rtx
) > 1
25645 && REGNO (stack_limit_rtx
) <= 31)
25648 = gen_add3_insn (tmp_reg
, stack_limit_rtx
, GEN_INT (size
));
25651 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
, const0_rtx
));
25653 else if (GET_CODE (stack_limit_rtx
) == SYMBOL_REF
25655 && DEFAULT_ABI
== ABI_V4
25658 rtx toload
= gen_rtx_CONST (VOIDmode
,
25659 gen_rtx_PLUS (Pmode
,
25663 emit_insn (gen_elf_high (tmp_reg
, toload
));
25664 emit_insn (gen_elf_low (tmp_reg
, tmp_reg
, toload
));
25665 emit_insn (gen_cond_trap (LTU
, stack_reg
, tmp_reg
,
25669 warning (0, "stack limit expression is not supported");
25675 emit_insn (gen_add3_insn (copy_reg
, stack_reg
, GEN_INT (copy_off
)));
25677 emit_move_insn (copy_reg
, stack_reg
);
25682 /* Need a note here so that try_split doesn't get confused. */
25683 if (get_last_insn () == NULL_RTX
)
25684 emit_note (NOTE_INSN_DELETED
);
25685 insn
= emit_move_insn (tmp_reg
, todec
);
25686 try_split (PATTERN (insn
), insn
, 0);
25690 insn
= emit_insn (TARGET_32BIT
25691 ? gen_movsi_update_stack (stack_reg
, stack_reg
,
25693 : gen_movdi_di_update_stack (stack_reg
, stack_reg
,
25694 todec
, stack_reg
));
25695 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25696 it now and set the alias set/attributes. The above gen_*_update
25697 calls will generate a PARALLEL with the MEM set being the first
25699 par
= PATTERN (insn
);
25700 gcc_assert (GET_CODE (par
) == PARALLEL
);
25701 set
= XVECEXP (par
, 0, 0);
25702 gcc_assert (GET_CODE (set
) == SET
);
25703 mem
= SET_DEST (set
);
25704 gcc_assert (MEM_P (mem
));
25705 MEM_NOTRAP_P (mem
) = 1;
25706 set_mem_alias_set (mem
, get_frame_alias_set ());
25708 RTX_FRAME_RELATED_P (insn
) = 1;
25709 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
25710 gen_rtx_SET (stack_reg
, gen_rtx_PLUS (Pmode
, stack_reg
,
25711 GEN_INT (-size
))));
25715 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25717 #if PROBE_INTERVAL > 32768
25718 #error Cannot use indexed addressing mode for stack probing
25721 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25722 inclusive. These are offsets from the current stack pointer. */
25725 rs6000_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
25727 /* See if we have a constant small number of probes to generate. If so,
25728 that's the easy case. */
25729 if (first
+ size
<= 32768)
25733 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25734 it exceeds SIZE. If only one probe is needed, this will not
25735 generate any code. Then probe at FIRST + SIZE. */
25736 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
25737 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
25740 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
25744 /* Otherwise, do the same as above, but in a loop. Note that we must be
25745 extra careful with variables wrapping around because we might be at
25746 the very top (or the very bottom) of the address space and we have
25747 to be able to handle this case properly; in particular, we use an
25748 equality test for the loop condition. */
25751 HOST_WIDE_INT rounded_size
;
25752 rtx r12
= gen_rtx_REG (Pmode
, 12);
25753 rtx r0
= gen_rtx_REG (Pmode
, 0);
25755 /* Sanity check for the addressing mode we're going to use. */
25756 gcc_assert (first
<= 32768);
25758 /* Step 1: round SIZE to the previous multiple of the interval. */
25760 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
25763 /* Step 2: compute initial and final value of the loop counter. */
25765 /* TEST_ADDR = SP + FIRST. */
25766 emit_insn (gen_rtx_SET (r12
, plus_constant (Pmode
, stack_pointer_rtx
,
25769 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25770 if (rounded_size
> 32768)
25772 emit_move_insn (r0
, GEN_INT (-rounded_size
));
25773 emit_insn (gen_rtx_SET (r0
, gen_rtx_PLUS (Pmode
, r12
, r0
)));
25776 emit_insn (gen_rtx_SET (r0
, plus_constant (Pmode
, r12
,
25780 /* Step 3: the loop
25784 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25787 while (TEST_ADDR != LAST_ADDR)
25789 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25790 until it is equal to ROUNDED_SIZE. */
25793 emit_insn (gen_probe_stack_rangedi (r12
, r12
, r0
));
25795 emit_insn (gen_probe_stack_rangesi (r12
, r12
, r0
));
25798 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25799 that SIZE is equal to ROUNDED_SIZE. */
25801 if (size
!= rounded_size
)
25802 emit_stack_probe (plus_constant (Pmode
, r12
, rounded_size
- size
));
25806 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25807 absolute addresses. */
25810 output_probe_stack_range (rtx reg1
, rtx reg2
)
25812 static int labelno
= 0;
25816 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
25819 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
25821 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25823 xops
[1] = GEN_INT (-PROBE_INTERVAL
);
25824 output_asm_insn ("addi %0,%0,%1", xops
);
25826 /* Probe at TEST_ADDR. */
25827 xops
[1] = gen_rtx_REG (Pmode
, 0);
25828 output_asm_insn ("stw %1,0(%0)", xops
);
25830 /* Test if TEST_ADDR == LAST_ADDR. */
25833 output_asm_insn ("cmpd 0,%0,%1", xops
);
25835 output_asm_insn ("cmpw 0,%0,%1", xops
);
25838 fputs ("\tbne 0,", asm_out_file
);
25839 assemble_name_raw (asm_out_file
, loop_lab
);
25840 fputc ('\n', asm_out_file
);
25845 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25846 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25847 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25848 deduce these equivalences by itself so it wasn't necessary to hold
25849 its hand so much. Don't be tempted to always supply d2_f_d_e with
25850 the actual cfa register, ie. r31 when we are using a hard frame
25851 pointer. That fails when saving regs off r1, and sched moves the
25852 r31 setup past the reg saves. */
25855 rs6000_frame_related (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT val
,
25856 rtx reg2
, rtx repl2
)
25860 if (REGNO (reg
) == STACK_POINTER_REGNUM
)
25862 gcc_checking_assert (val
== 0);
25866 repl
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
25869 rtx pat
= PATTERN (insn
);
25870 if (!repl
&& !reg2
)
25872 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25873 if (GET_CODE (pat
) == PARALLEL
)
25874 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
25875 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25877 rtx set
= XVECEXP (pat
, 0, i
);
25879 /* If this PARALLEL has been emitted for out-of-line
25880 register save functions, or store multiple, then omit
25881 eh_frame info for any user-defined global regs. If
25882 eh_frame info is supplied, frame unwinding will
25883 restore a user reg. */
25884 if (!REG_P (SET_SRC (set
))
25885 || !fixed_reg_p (REGNO (SET_SRC (set
))))
25886 RTX_FRAME_RELATED_P (set
) = 1;
25888 RTX_FRAME_RELATED_P (insn
) = 1;
25892 /* We expect that 'pat' is either a SET or a PARALLEL containing
25893 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25894 are important so they all have to be marked RTX_FRAME_RELATED_P.
25895 Call simplify_replace_rtx on the SETs rather than the whole insn
25896 so as to leave the other stuff alone (for example USE of r12). */
25898 set_used_flags (pat
);
25899 if (GET_CODE (pat
) == SET
)
25902 pat
= simplify_replace_rtx (pat
, reg
, repl
);
25904 pat
= simplify_replace_rtx (pat
, reg2
, repl2
);
25906 else if (GET_CODE (pat
) == PARALLEL
)
25908 pat
= shallow_copy_rtx (pat
);
25909 XVEC (pat
, 0) = shallow_copy_rtvec (XVEC (pat
, 0));
25911 for (int i
= 0; i
< XVECLEN (pat
, 0); i
++)
25912 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25914 rtx set
= XVECEXP (pat
, 0, i
);
25917 set
= simplify_replace_rtx (set
, reg
, repl
);
25919 set
= simplify_replace_rtx (set
, reg2
, repl2
);
25920 XVECEXP (pat
, 0, i
) = set
;
25922 /* Omit eh_frame info for any user-defined global regs. */
25923 if (!REG_P (SET_SRC (set
))
25924 || !fixed_reg_p (REGNO (SET_SRC (set
))))
25925 RTX_FRAME_RELATED_P (set
) = 1;
25929 gcc_unreachable ();
25931 RTX_FRAME_RELATED_P (insn
) = 1;
25932 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, copy_rtx_if_shared (pat
));
25937 /* Returns an insn that has a vrsave set operation with the
25938 appropriate CLOBBERs. */
25941 generate_set_vrsave (rtx reg
, rs6000_stack_t
*info
, int epiloguep
)
25944 rtx insn
, clobs
[TOTAL_ALTIVEC_REGS
+ 1];
25945 rtx vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
25948 = gen_rtx_SET (vrsave
,
25949 gen_rtx_UNSPEC_VOLATILE (SImode
,
25950 gen_rtvec (2, reg
, vrsave
),
25951 UNSPECV_SET_VRSAVE
));
25955 /* We need to clobber the registers in the mask so the scheduler
25956 does not move sets to VRSAVE before sets of AltiVec registers.
25958 However, if the function receives nonlocal gotos, reload will set
25959 all call saved registers live. We will end up with:
25961 (set (reg 999) (mem))
25962 (parallel [ (set (reg vrsave) (unspec blah))
25963 (clobber (reg 999))])
25965 The clobber will cause the store into reg 999 to be dead, and
25966 flow will attempt to delete an epilogue insn. In this case, we
25967 need an unspec use/set of the register. */
25969 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
25970 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
25972 if (!epiloguep
|| call_used_regs
[i
])
25973 clobs
[nclobs
++] = gen_rtx_CLOBBER (VOIDmode
,
25974 gen_rtx_REG (V4SImode
, i
));
25977 rtx reg
= gen_rtx_REG (V4SImode
, i
);
25980 = gen_rtx_SET (reg
,
25981 gen_rtx_UNSPEC (V4SImode
,
25982 gen_rtvec (1, reg
), 27));
25986 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nclobs
));
25988 for (i
= 0; i
< nclobs
; ++i
)
25989 XVECEXP (insn
, 0, i
) = clobs
[i
];
25995 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
25999 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
26000 mem
= gen_frame_mem (GET_MODE (reg
), addr
);
26001 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
26005 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
26007 return gen_frame_set (reg
, frame_reg
, offset
, false);
26011 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
26013 return gen_frame_set (reg
, frame_reg
, offset
, true);
26016 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26017 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26020 emit_frame_save (rtx frame_reg
, machine_mode mode
,
26021 unsigned int regno
, int offset
, HOST_WIDE_INT frame_reg_to_sp
)
26025 /* Some cases that need register indexed addressing. */
26026 gcc_checking_assert (!(TARGET_ALTIVEC_ABI
&& ALTIVEC_VECTOR_MODE (mode
))
26027 || (TARGET_VSX
&& ALTIVEC_OR_VSX_VECTOR_MODE (mode
)));
26029 reg
= gen_rtx_REG (mode
, regno
);
26030 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, frame_reg
, offset
));
26031 return rs6000_frame_related (insn
, frame_reg
, frame_reg_to_sp
,
26032 NULL_RTX
, NULL_RTX
);
26035 /* Emit an offset memory reference suitable for a frame store, while
26036 converting to a valid addressing mode. */
26039 gen_frame_mem_offset (machine_mode mode
, rtx reg
, int offset
)
26041 return gen_frame_mem (mode
, gen_rtx_PLUS (Pmode
, reg
, GEN_INT (offset
)));
26044 #ifndef TARGET_FIX_AND_CONTINUE
26045 #define TARGET_FIX_AND_CONTINUE 0
26048 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26049 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26050 #define LAST_SAVRES_REGISTER 31
26051 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26062 static GTY(()) rtx savres_routine_syms
[N_SAVRES_REGISTERS
][12];
26064 /* Temporary holding space for an out-of-line register save/restore
26066 static char savres_routine_name
[30];
26068 /* Return the name for an out-of-line register save/restore routine.
26069 We are saving/restoring GPRs if GPR is true. */
26072 rs6000_savres_routine_name (int regno
, int sel
)
26074 const char *prefix
= "";
26075 const char *suffix
= "";
26077 /* Different targets are supposed to define
26078 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26079 routine name could be defined with:
26081 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26083 This is a nice idea in practice, but in reality, things are
26084 complicated in several ways:
26086 - ELF targets have save/restore routines for GPRs.
26088 - PPC64 ELF targets have routines for save/restore of GPRs that
26089 differ in what they do with the link register, so having a set
26090 prefix doesn't work. (We only use one of the save routines at
26091 the moment, though.)
26093 - PPC32 elf targets have "exit" versions of the restore routines
26094 that restore the link register and can save some extra space.
26095 These require an extra suffix. (There are also "tail" versions
26096 of the restore routines and "GOT" versions of the save routines,
26097 but we don't generate those at present. Same problems apply,
26100 We deal with all this by synthesizing our own prefix/suffix and
26101 using that for the simple sprintf call shown above. */
26102 if (DEFAULT_ABI
== ABI_V4
)
26107 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
26108 prefix
= (sel
& SAVRES_SAVE
) ? "_savegpr_" : "_restgpr_";
26109 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
26110 prefix
= (sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_";
26111 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
26112 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
26116 if ((sel
& SAVRES_LR
))
26119 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
26121 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26122 /* No out-of-line save/restore routines for GPRs on AIX. */
26123 gcc_assert (!TARGET_AIX
|| (sel
& SAVRES_REG
) != SAVRES_GPR
);
26127 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
26128 prefix
= ((sel
& SAVRES_SAVE
)
26129 ? ((sel
& SAVRES_LR
) ? "_savegpr0_" : "_savegpr1_")
26130 : ((sel
& SAVRES_LR
) ? "_restgpr0_" : "_restgpr1_"));
26131 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
26133 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26134 if ((sel
& SAVRES_LR
))
26135 prefix
= ((sel
& SAVRES_SAVE
) ? "_savefpr_" : "_restfpr_");
26139 prefix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_PREFIX
: RESTORE_FP_PREFIX
;
26140 suffix
= (sel
& SAVRES_SAVE
) ? SAVE_FP_SUFFIX
: RESTORE_FP_SUFFIX
;
26143 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
26144 prefix
= (sel
& SAVRES_SAVE
) ? "_savevr_" : "_restvr_";
26149 if (DEFAULT_ABI
== ABI_DARWIN
)
26151 /* The Darwin approach is (slightly) different, in order to be
26152 compatible with code generated by the system toolchain. There is a
26153 single symbol for the start of save sequence, and the code here
26154 embeds an offset into that code on the basis of the first register
26156 prefix
= (sel
& SAVRES_SAVE
) ? "save" : "rest" ;
26157 if ((sel
& SAVRES_REG
) == SAVRES_GPR
)
26158 sprintf (savres_routine_name
, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix
,
26159 ((sel
& SAVRES_LR
) ? "x" : ""), (regno
== 13 ? "" : "+"),
26160 (regno
- 13) * 4, prefix
, regno
);
26161 else if ((sel
& SAVRES_REG
) == SAVRES_FPR
)
26162 sprintf (savres_routine_name
, "*%sFP%s%.0d ; %s f%d-f31", prefix
,
26163 (regno
== 14 ? "" : "+"), (regno
- 14) * 4, prefix
, regno
);
26164 else if ((sel
& SAVRES_REG
) == SAVRES_VR
)
26165 sprintf (savres_routine_name
, "*%sVEC%s%.0d ; %s v%d-v31", prefix
,
26166 (regno
== 20 ? "" : "+"), (regno
- 20) * 8, prefix
, regno
);
26171 sprintf (savres_routine_name
, "%s%d%s", prefix
, regno
, suffix
);
26173 return savres_routine_name
;
26176 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26177 We are saving/restoring GPRs if GPR is true. */
26180 rs6000_savres_routine_sym (rs6000_stack_t
*info
, int sel
)
26182 int regno
= ((sel
& SAVRES_REG
) == SAVRES_GPR
26183 ? info
->first_gp_reg_save
26184 : (sel
& SAVRES_REG
) == SAVRES_FPR
26185 ? info
->first_fp_reg_save
- 32
26186 : (sel
& SAVRES_REG
) == SAVRES_VR
26187 ? info
->first_altivec_reg_save
- FIRST_ALTIVEC_REGNO
26192 /* Don't generate bogus routine names. */
26193 gcc_assert (FIRST_SAVRES_REGISTER
<= regno
26194 && regno
<= LAST_SAVRES_REGISTER
26195 && select
>= 0 && select
<= 12);
26197 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
];
26203 name
= rs6000_savres_routine_name (regno
, sel
);
26205 sym
= savres_routine_syms
[regno
-FIRST_SAVRES_REGISTER
][select
]
26206 = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
26207 SYMBOL_REF_FLAGS (sym
) |= SYMBOL_FLAG_FUNCTION
;
26213 /* Emit a sequence of insns, including a stack tie if needed, for
26214 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26215 reset the stack pointer, but move the base of the frame into
26216 reg UPDT_REGNO for use by out-of-line register restore routines. */
26219 rs6000_emit_stack_reset (rtx frame_reg_rtx
, HOST_WIDE_INT frame_off
,
26220 unsigned updt_regno
)
26222 /* If there is nothing to do, don't do anything. */
26223 if (frame_off
== 0 && REGNO (frame_reg_rtx
) == updt_regno
)
26226 rtx updt_reg_rtx
= gen_rtx_REG (Pmode
, updt_regno
);
26228 /* This blockage is needed so that sched doesn't decide to move
26229 the sp change before the register restores. */
26230 if (DEFAULT_ABI
== ABI_V4
)
26231 return emit_insn (gen_stack_restore_tie (updt_reg_rtx
, frame_reg_rtx
,
26232 GEN_INT (frame_off
)));
26234 /* If we are restoring registers out-of-line, we will be using the
26235 "exit" variants of the restore routines, which will reset the
26236 stack for us. But we do need to point updt_reg into the
26237 right place for those routines. */
26238 if (frame_off
!= 0)
26239 return emit_insn (gen_add3_insn (updt_reg_rtx
,
26240 frame_reg_rtx
, GEN_INT (frame_off
)));
26242 return emit_move_insn (updt_reg_rtx
, frame_reg_rtx
);
26247 /* Return the register number used as a pointer by out-of-line
26248 save/restore functions. */
26250 static inline unsigned
26251 ptr_regno_for_savres (int sel
)
26253 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
26254 return (sel
& SAVRES_REG
) == SAVRES_FPR
|| (sel
& SAVRES_LR
) ? 1 : 12;
26255 return DEFAULT_ABI
== ABI_DARWIN
&& (sel
& SAVRES_REG
) == SAVRES_FPR
? 1 : 11;
26258 /* Construct a parallel rtx describing the effect of a call to an
26259 out-of-line register save/restore routine, and emit the insn
26260 or jump_insn as appropriate. */
26263 rs6000_emit_savres_rtx (rs6000_stack_t
*info
,
26264 rtx frame_reg_rtx
, int save_area_offset
, int lr_offset
,
26265 machine_mode reg_mode
, int sel
)
26268 int offset
, start_reg
, end_reg
, n_regs
, use_reg
;
26269 int reg_size
= GET_MODE_SIZE (reg_mode
);
26276 start_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
26277 ? info
->first_gp_reg_save
26278 : (sel
& SAVRES_REG
) == SAVRES_FPR
26279 ? info
->first_fp_reg_save
26280 : (sel
& SAVRES_REG
) == SAVRES_VR
26281 ? info
->first_altivec_reg_save
26283 end_reg
= ((sel
& SAVRES_REG
) == SAVRES_GPR
26285 : (sel
& SAVRES_REG
) == SAVRES_FPR
26287 : (sel
& SAVRES_REG
) == SAVRES_VR
26288 ? LAST_ALTIVEC_REGNO
+ 1
26290 n_regs
= end_reg
- start_reg
;
26291 p
= rtvec_alloc (3 + ((sel
& SAVRES_LR
) ? 1 : 0)
26292 + ((sel
& SAVRES_REG
) == SAVRES_VR
? 1 : 0)
26295 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
26296 RTVEC_ELT (p
, offset
++) = ret_rtx
;
26298 RTVEC_ELT (p
, offset
++)
26299 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
26301 sym
= rs6000_savres_routine_sym (info
, sel
);
26302 RTVEC_ELT (p
, offset
++) = gen_rtx_USE (VOIDmode
, sym
);
26304 use_reg
= ptr_regno_for_savres (sel
);
26305 if ((sel
& SAVRES_REG
) == SAVRES_VR
)
26307 /* Vector regs are saved/restored using [reg+reg] addressing. */
26308 RTVEC_ELT (p
, offset
++)
26309 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
26310 RTVEC_ELT (p
, offset
++)
26311 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, 0));
26314 RTVEC_ELT (p
, offset
++)
26315 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, use_reg
));
26317 for (i
= 0; i
< end_reg
- start_reg
; i
++)
26318 RTVEC_ELT (p
, i
+ offset
)
26319 = gen_frame_set (gen_rtx_REG (reg_mode
, start_reg
+ i
),
26320 frame_reg_rtx
, save_area_offset
+ reg_size
* i
,
26321 (sel
& SAVRES_SAVE
) != 0);
26323 if ((sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
26324 RTVEC_ELT (p
, i
+ offset
)
26325 = gen_frame_store (gen_rtx_REG (Pmode
, 0), frame_reg_rtx
, lr_offset
);
26327 par
= gen_rtx_PARALLEL (VOIDmode
, p
);
26329 if (!(sel
& SAVRES_SAVE
) && (sel
& SAVRES_LR
))
26331 insn
= emit_jump_insn (par
);
26332 JUMP_LABEL (insn
) = ret_rtx
;
26335 insn
= emit_insn (par
);
26339 /* Emit code to store CR fields that need to be saved into REG. */
26342 rs6000_emit_move_from_cr (rtx reg
)
26344 /* Only the ELFv2 ABI allows storing only selected fields. */
26345 if (DEFAULT_ABI
== ABI_ELFv2
&& TARGET_MFCRF
)
26347 int i
, cr_reg
[8], count
= 0;
26349 /* Collect CR fields that must be saved. */
26350 for (i
= 0; i
< 8; i
++)
26351 if (save_reg_p (CR0_REGNO
+ i
))
26352 cr_reg
[count
++] = i
;
26354 /* If it's just a single one, use mfcrf. */
26357 rtvec p
= rtvec_alloc (1);
26358 rtvec r
= rtvec_alloc (2);
26359 RTVEC_ELT (r
, 0) = gen_rtx_REG (CCmode
, CR0_REGNO
+ cr_reg
[0]);
26360 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7 - cr_reg
[0]));
26362 = gen_rtx_SET (reg
,
26363 gen_rtx_UNSPEC (SImode
, r
, UNSPEC_MOVESI_FROM_CR
));
26365 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
26369 /* ??? It might be better to handle count == 2 / 3 cases here
26370 as well, using logical operations to combine the values. */
26373 emit_insn (gen_movesi_from_cr (reg
));
26376 /* Return whether the split-stack arg pointer (r12) is used. */
26379 split_stack_arg_pointer_used_p (void)
26381 /* If the pseudo holding the arg pointer is no longer a pseudo,
26382 then the arg pointer is used. */
26383 if (cfun
->machine
->split_stack_arg_pointer
!= NULL_RTX
26384 && (!REG_P (cfun
->machine
->split_stack_arg_pointer
)
26385 || (REGNO (cfun
->machine
->split_stack_arg_pointer
)
26386 < FIRST_PSEUDO_REGISTER
)))
26389 /* Unfortunately we also need to do some code scanning, since
26390 r12 may have been substituted for the pseudo. */
26392 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
26393 FOR_BB_INSNS (bb
, insn
)
26394 if (NONDEBUG_INSN_P (insn
))
26396 /* A call destroys r12. */
26401 FOR_EACH_INSN_USE (use
, insn
)
26403 rtx x
= DF_REF_REG (use
);
26404 if (REG_P (x
) && REGNO (x
) == 12)
26408 FOR_EACH_INSN_DEF (def
, insn
)
26410 rtx x
= DF_REF_REG (def
);
26411 if (REG_P (x
) && REGNO (x
) == 12)
26415 return bitmap_bit_p (DF_LR_OUT (bb
), 12);
26418 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26421 rs6000_global_entry_point_needed_p (void)
26423 /* Only needed for the ELFv2 ABI. */
26424 if (DEFAULT_ABI
!= ABI_ELFv2
)
26427 /* With -msingle-pic-base, we assume the whole program shares the same
26428 TOC, so no global entry point prologues are needed anywhere. */
26429 if (TARGET_SINGLE_PIC_BASE
)
26432 /* Ensure we have a global entry point for thunks. ??? We could
26433 avoid that if the target routine doesn't need a global entry point,
26434 but we do not know whether this is the case at this point. */
26435 if (cfun
->is_thunk
)
26438 /* For regular functions, rs6000_emit_prologue sets this flag if the
26439 routine ever uses the TOC pointer. */
26440 return cfun
->machine
->r2_setup_needed
;
26443 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26445 rs6000_get_separate_components (void)
26447 rs6000_stack_t
*info
= rs6000_stack_info ();
26449 if (WORLD_SAVE_P (info
))
26452 gcc_assert (!(info
->savres_strategy
& SAVE_MULTIPLE
)
26453 && !(info
->savres_strategy
& REST_MULTIPLE
));
26455 /* Component 0 is the save/restore of LR (done via GPR0).
26456 Components 13..31 are the save/restore of GPR13..GPR31.
26457 Components 46..63 are the save/restore of FPR14..FPR31. */
26459 cfun
->machine
->n_components
= 64;
26461 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
26462 bitmap_clear (components
);
26464 int reg_size
= TARGET_32BIT
? 4 : 8;
26465 int fp_reg_size
= 8;
26467 /* The GPRs we need saved to the frame. */
26468 if ((info
->savres_strategy
& SAVE_INLINE_GPRS
)
26469 && (info
->savres_strategy
& REST_INLINE_GPRS
))
26471 int offset
= info
->gp_save_offset
;
26473 offset
+= info
->total_size
;
26475 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
26477 if (IN_RANGE (offset
, -0x8000, 0x7fff)
26478 && rs6000_reg_live_or_pic_offset_p (regno
))
26479 bitmap_set_bit (components
, regno
);
26481 offset
+= reg_size
;
26485 /* Don't mess with the hard frame pointer. */
26486 if (frame_pointer_needed
)
26487 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
26489 /* Don't mess with the fixed TOC register. */
26490 if ((TARGET_TOC
&& TARGET_MINIMAL_TOC
)
26491 || (flag_pic
== 1 && DEFAULT_ABI
== ABI_V4
)
26492 || (flag_pic
&& DEFAULT_ABI
== ABI_DARWIN
))
26493 bitmap_clear_bit (components
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26495 /* The FPRs we need saved to the frame. */
26496 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
)
26497 && (info
->savres_strategy
& REST_INLINE_FPRS
))
26499 int offset
= info
->fp_save_offset
;
26501 offset
+= info
->total_size
;
26503 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
26505 if (IN_RANGE (offset
, -0x8000, 0x7fff) && save_reg_p (regno
))
26506 bitmap_set_bit (components
, regno
);
26508 offset
+= fp_reg_size
;
26512 /* Optimize LR save and restore if we can. This is component 0. Any
26513 out-of-line register save/restore routines need LR. */
26514 if (info
->lr_save_p
26515 && !(flag_pic
&& (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
))
26516 && (info
->savres_strategy
& SAVE_INLINE_GPRS
)
26517 && (info
->savres_strategy
& REST_INLINE_GPRS
)
26518 && (info
->savres_strategy
& SAVE_INLINE_FPRS
)
26519 && (info
->savres_strategy
& REST_INLINE_FPRS
)
26520 && (info
->savres_strategy
& SAVE_INLINE_VRS
)
26521 && (info
->savres_strategy
& REST_INLINE_VRS
))
26523 int offset
= info
->lr_save_offset
;
26525 offset
+= info
->total_size
;
26526 if (IN_RANGE (offset
, -0x8000, 0x7fff))
26527 bitmap_set_bit (components
, 0);
26533 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26535 rs6000_components_for_bb (basic_block bb
)
26537 rs6000_stack_t
*info
= rs6000_stack_info ();
26539 bitmap in
= DF_LIVE_IN (bb
);
26540 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
26541 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
26543 sbitmap components
= sbitmap_alloc (cfun
->machine
->n_components
);
26544 bitmap_clear (components
);
26546 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26549 for (unsigned regno
= info
->first_gp_reg_save
; regno
< 32; regno
++)
26550 if (bitmap_bit_p (in
, regno
)
26551 || bitmap_bit_p (gen
, regno
)
26552 || bitmap_bit_p (kill
, regno
))
26553 bitmap_set_bit (components
, regno
);
26556 for (unsigned regno
= info
->first_fp_reg_save
; regno
< 64; regno
++)
26557 if (bitmap_bit_p (in
, regno
)
26558 || bitmap_bit_p (gen
, regno
)
26559 || bitmap_bit_p (kill
, regno
))
26560 bitmap_set_bit (components
, regno
);
26562 /* The link register. */
26563 if (bitmap_bit_p (in
, LR_REGNO
)
26564 || bitmap_bit_p (gen
, LR_REGNO
)
26565 || bitmap_bit_p (kill
, LR_REGNO
))
26566 bitmap_set_bit (components
, 0);
26571 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26573 rs6000_disqualify_components (sbitmap components
, edge e
,
26574 sbitmap edge_components
, bool /*is_prologue*/)
26576 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26577 live where we want to place that code. */
26578 if (bitmap_bit_p (edge_components
, 0)
26579 && bitmap_bit_p (DF_LIVE_IN (e
->dest
), 0))
26582 fprintf (dump_file
, "Disqualifying LR because GPR0 is live "
26583 "on entry to bb %d\n", e
->dest
->index
);
26584 bitmap_clear_bit (components
, 0);
26588 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26590 rs6000_emit_prologue_components (sbitmap components
)
26592 rs6000_stack_t
*info
= rs6000_stack_info ();
26593 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
26594 ? HARD_FRAME_POINTER_REGNUM
26595 : STACK_POINTER_REGNUM
);
26597 machine_mode reg_mode
= Pmode
;
26598 int reg_size
= TARGET_32BIT
? 4 : 8;
26599 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
26601 int fp_reg_size
= 8;
26603 /* Prologue for LR. */
26604 if (bitmap_bit_p (components
, 0))
26606 rtx reg
= gen_rtx_REG (reg_mode
, 0);
26607 rtx_insn
*insn
= emit_move_insn (reg
, gen_rtx_REG (reg_mode
, LR_REGNO
));
26608 RTX_FRAME_RELATED_P (insn
) = 1;
26609 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
26611 int offset
= info
->lr_save_offset
;
26613 offset
+= info
->total_size
;
26615 insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
26616 RTX_FRAME_RELATED_P (insn
) = 1;
26617 rtx lr
= gen_rtx_REG (reg_mode
, LR_REGNO
);
26618 rtx mem
= copy_rtx (SET_DEST (single_set (insn
)));
26619 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, lr
));
26622 /* Prologue for the GPRs. */
26623 int offset
= info
->gp_save_offset
;
26625 offset
+= info
->total_size
;
26627 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
26629 if (bitmap_bit_p (components
, i
))
26631 rtx reg
= gen_rtx_REG (reg_mode
, i
);
26632 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
26633 RTX_FRAME_RELATED_P (insn
) = 1;
26634 rtx set
= copy_rtx (single_set (insn
));
26635 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
26638 offset
+= reg_size
;
26641 /* Prologue for the FPRs. */
26642 offset
= info
->fp_save_offset
;
26644 offset
+= info
->total_size
;
26646 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
26648 if (bitmap_bit_p (components
, i
))
26650 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
26651 rtx_insn
*insn
= emit_insn (gen_frame_store (reg
, ptr_reg
, offset
));
26652 RTX_FRAME_RELATED_P (insn
) = 1;
26653 rtx set
= copy_rtx (single_set (insn
));
26654 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
26657 offset
+= fp_reg_size
;
26661 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26663 rs6000_emit_epilogue_components (sbitmap components
)
26665 rs6000_stack_t
*info
= rs6000_stack_info ();
26666 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
26667 ? HARD_FRAME_POINTER_REGNUM
26668 : STACK_POINTER_REGNUM
);
26670 machine_mode reg_mode
= Pmode
;
26671 int reg_size
= TARGET_32BIT
? 4 : 8;
26673 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
26675 int fp_reg_size
= 8;
26677 /* Epilogue for the FPRs. */
26678 int offset
= info
->fp_save_offset
;
26680 offset
+= info
->total_size
;
26682 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
26684 if (bitmap_bit_p (components
, i
))
26686 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
26687 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
26688 RTX_FRAME_RELATED_P (insn
) = 1;
26689 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
26692 offset
+= fp_reg_size
;
26695 /* Epilogue for the GPRs. */
26696 offset
= info
->gp_save_offset
;
26698 offset
+= info
->total_size
;
26700 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
26702 if (bitmap_bit_p (components
, i
))
26704 rtx reg
= gen_rtx_REG (reg_mode
, i
);
26705 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
26706 RTX_FRAME_RELATED_P (insn
) = 1;
26707 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
26710 offset
+= reg_size
;
26713 /* Epilogue for LR. */
26714 if (bitmap_bit_p (components
, 0))
26716 int offset
= info
->lr_save_offset
;
26718 offset
+= info
->total_size
;
26720 rtx reg
= gen_rtx_REG (reg_mode
, 0);
26721 rtx_insn
*insn
= emit_insn (gen_frame_load (reg
, ptr_reg
, offset
));
26723 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
26724 insn
= emit_move_insn (lr
, reg
);
26725 RTX_FRAME_RELATED_P (insn
) = 1;
26726 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
26730 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26732 rs6000_set_handled_components (sbitmap components
)
26734 rs6000_stack_t
*info
= rs6000_stack_info ();
26736 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
26737 if (bitmap_bit_p (components
, i
))
26738 cfun
->machine
->gpr_is_wrapped_separately
[i
] = true;
26740 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
26741 if (bitmap_bit_p (components
, i
))
26742 cfun
->machine
->fpr_is_wrapped_separately
[i
- 32] = true;
26744 if (bitmap_bit_p (components
, 0))
26745 cfun
->machine
->lr_is_wrapped_separately
= true;
26748 /* VRSAVE is a bit vector representing which AltiVec registers
26749 are used. The OS uses this to determine which vector
26750 registers to save on a context switch. We need to save
26751 VRSAVE on the stack frame, add whatever AltiVec registers we
26752 used in this function, and do the corresponding magic in the
26755 emit_vrsave_prologue (rs6000_stack_t
*info
, int save_regno
,
26756 HOST_WIDE_INT frame_off
, rtx frame_reg_rtx
)
26758 /* Get VRSAVE into a GPR. */
26759 rtx reg
= gen_rtx_REG (SImode
, save_regno
);
26760 rtx vrsave
= gen_rtx_REG (SImode
, VRSAVE_REGNO
);
26762 emit_insn (gen_get_vrsave_internal (reg
));
26764 emit_insn (gen_rtx_SET (reg
, vrsave
));
26767 int offset
= info
->vrsave_save_offset
+ frame_off
;
26768 emit_insn (gen_frame_store (reg
, frame_reg_rtx
, offset
));
26770 /* Include the registers in the mask. */
26771 emit_insn (gen_iorsi3 (reg
, reg
, GEN_INT (info
->vrsave_mask
)));
26773 emit_insn (generate_set_vrsave (reg
, info
, 0));
26776 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26777 called, it left the arg pointer to the old stack in r29. Otherwise, the
26778 arg pointer is the top of the current frame. */
26780 emit_split_stack_prologue (rs6000_stack_t
*info
, rtx_insn
*sp_adjust
,
26781 HOST_WIDE_INT frame_off
, rtx frame_reg_rtx
)
26783 cfun
->machine
->split_stack_argp_used
= true;
26787 rtx r12
= gen_rtx_REG (Pmode
, 12);
26788 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
26789 rtx set_r12
= gen_rtx_SET (r12
, sp_reg_rtx
);
26790 emit_insn_before (set_r12
, sp_adjust
);
26792 else if (frame_off
!= 0 || REGNO (frame_reg_rtx
) != 12)
26794 rtx r12
= gen_rtx_REG (Pmode
, 12);
26795 if (frame_off
== 0)
26796 emit_move_insn (r12
, frame_reg_rtx
);
26798 emit_insn (gen_add3_insn (r12
, frame_reg_rtx
, GEN_INT (frame_off
)));
26803 rtx r12
= gen_rtx_REG (Pmode
, 12);
26804 rtx r29
= gen_rtx_REG (Pmode
, 29);
26805 rtx cr7
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
26806 rtx not_more
= gen_label_rtx ();
26809 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
26810 gen_rtx_GEU (VOIDmode
, cr7
, const0_rtx
),
26811 gen_rtx_LABEL_REF (VOIDmode
, not_more
),
26813 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
26814 JUMP_LABEL (jump
) = not_more
;
26815 LABEL_NUSES (not_more
) += 1;
26816 emit_move_insn (r12
, r29
);
26817 emit_label (not_more
);
26821 /* Emit function prologue as insns. */
26824 rs6000_emit_prologue (void)
26826 rs6000_stack_t
*info
= rs6000_stack_info ();
26827 machine_mode reg_mode
= Pmode
;
26828 int reg_size
= TARGET_32BIT
? 4 : 8;
26829 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
26831 int fp_reg_size
= 8;
26832 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
26833 rtx frame_reg_rtx
= sp_reg_rtx
;
26834 unsigned int cr_save_regno
;
26835 rtx cr_save_rtx
= NULL_RTX
;
26838 int using_static_chain_p
= (cfun
->static_chain_decl
!= NULL_TREE
26839 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
26840 && call_used_regs
[STATIC_CHAIN_REGNUM
]);
26841 int using_split_stack
= (flag_split_stack
26842 && (lookup_attribute ("no_split_stack",
26843 DECL_ATTRIBUTES (cfun
->decl
))
26846 /* Offset to top of frame for frame_reg and sp respectively. */
26847 HOST_WIDE_INT frame_off
= 0;
26848 HOST_WIDE_INT sp_off
= 0;
26849 /* sp_adjust is the stack adjusting instruction, tracked so that the
26850 insn setting up the split-stack arg pointer can be emitted just
26851 prior to it, when r12 is not used here for other purposes. */
26852 rtx_insn
*sp_adjust
= 0;
26855 /* Track and check usage of r0, r11, r12. */
26856 int reg_inuse
= using_static_chain_p
? 1 << 11 : 0;
26857 #define START_USE(R) do \
26859 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26860 reg_inuse |= 1 << (R); \
26862 #define END_USE(R) do \
26864 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26865 reg_inuse &= ~(1 << (R)); \
26867 #define NOT_INUSE(R) do \
26869 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26872 #define START_USE(R) do {} while (0)
26873 #define END_USE(R) do {} while (0)
26874 #define NOT_INUSE(R) do {} while (0)
26877 if (DEFAULT_ABI
== ABI_ELFv2
26878 && !TARGET_SINGLE_PIC_BASE
)
26880 cfun
->machine
->r2_setup_needed
= df_regs_ever_live_p (TOC_REGNUM
);
26882 /* With -mminimal-toc we may generate an extra use of r2 below. */
26883 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
26884 && !constant_pool_empty_p ())
26885 cfun
->machine
->r2_setup_needed
= true;
26889 if (flag_stack_usage_info
)
26890 current_function_static_stack_size
= info
->total_size
;
26892 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
26894 HOST_WIDE_INT size
= info
->total_size
;
26896 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
26898 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
26899 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
,
26900 size
- STACK_CHECK_PROTECT
);
26903 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
26906 if (TARGET_FIX_AND_CONTINUE
)
26908 /* gdb on darwin arranges to forward a function from the old
26909 address by modifying the first 5 instructions of the function
26910 to branch to the overriding function. This is necessary to
26911 permit function pointers that point to the old function to
26912 actually forward to the new function. */
26913 emit_insn (gen_nop ());
26914 emit_insn (gen_nop ());
26915 emit_insn (gen_nop ());
26916 emit_insn (gen_nop ());
26917 emit_insn (gen_nop ());
26920 /* Handle world saves specially here. */
26921 if (WORLD_SAVE_P (info
))
26928 /* save_world expects lr in r0. */
26929 reg0
= gen_rtx_REG (Pmode
, 0);
26930 if (info
->lr_save_p
)
26932 insn
= emit_move_insn (reg0
,
26933 gen_rtx_REG (Pmode
, LR_REGNO
));
26934 RTX_FRAME_RELATED_P (insn
) = 1;
26937 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26938 assumptions about the offsets of various bits of the stack
26940 gcc_assert (info
->gp_save_offset
== -220
26941 && info
->fp_save_offset
== -144
26942 && info
->lr_save_offset
== 8
26943 && info
->cr_save_offset
== 4
26946 && (!crtl
->calls_eh_return
26947 || info
->ehrd_offset
== -432)
26948 && info
->vrsave_save_offset
== -224
26949 && info
->altivec_save_offset
== -416);
26951 treg
= gen_rtx_REG (SImode
, 11);
26952 emit_move_insn (treg
, GEN_INT (-info
->total_size
));
26954 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26955 in R11. It also clobbers R12, so beware! */
26957 /* Preserve CR2 for save_world prologues */
26959 sz
+= 32 - info
->first_gp_reg_save
;
26960 sz
+= 64 - info
->first_fp_reg_save
;
26961 sz
+= LAST_ALTIVEC_REGNO
- info
->first_altivec_reg_save
+ 1;
26962 p
= rtvec_alloc (sz
);
26964 RTVEC_ELT (p
, j
++) = gen_rtx_CLOBBER (VOIDmode
,
26965 gen_rtx_REG (SImode
,
26967 RTVEC_ELT (p
, j
++) = gen_rtx_USE (VOIDmode
,
26968 gen_rtx_SYMBOL_REF (Pmode
,
26970 /* We do floats first so that the instruction pattern matches
26972 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
26974 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
26976 info
->first_fp_reg_save
+ i
),
26978 info
->fp_save_offset
+ frame_off
+ 8 * i
);
26979 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
26981 = gen_frame_store (gen_rtx_REG (V4SImode
,
26982 info
->first_altivec_reg_save
+ i
),
26984 info
->altivec_save_offset
+ frame_off
+ 16 * i
);
26985 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
26987 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
26989 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
26991 /* CR register traditionally saved as CR2. */
26993 = gen_frame_store (gen_rtx_REG (SImode
, CR2_REGNO
),
26994 frame_reg_rtx
, info
->cr_save_offset
+ frame_off
);
26995 /* Explain about use of R0. */
26996 if (info
->lr_save_p
)
26998 = gen_frame_store (reg0
,
26999 frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
27000 /* Explain what happens to the stack pointer. */
27002 rtx newval
= gen_rtx_PLUS (Pmode
, sp_reg_rtx
, treg
);
27003 RTVEC_ELT (p
, j
++) = gen_rtx_SET (sp_reg_rtx
, newval
);
27006 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
27007 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
27008 treg
, GEN_INT (-info
->total_size
));
27009 sp_off
= frame_off
= info
->total_size
;
27012 strategy
= info
->savres_strategy
;
27014 /* For V.4, update stack before we do any saving and set back pointer. */
27015 if (! WORLD_SAVE_P (info
)
27017 && (DEFAULT_ABI
== ABI_V4
27018 || crtl
->calls_eh_return
))
27020 bool need_r11
= (!(strategy
& SAVE_INLINE_FPRS
)
27021 || !(strategy
& SAVE_INLINE_GPRS
)
27022 || !(strategy
& SAVE_INLINE_VRS
));
27023 int ptr_regno
= -1;
27024 rtx ptr_reg
= NULL_RTX
;
27027 if (info
->total_size
< 32767)
27028 frame_off
= info
->total_size
;
27031 else if (info
->cr_save_p
27033 || info
->first_fp_reg_save
< 64
27034 || info
->first_gp_reg_save
< 32
27035 || info
->altivec_size
!= 0
27036 || info
->vrsave_size
!= 0
27037 || crtl
->calls_eh_return
)
27041 /* The prologue won't be saving any regs so there is no need
27042 to set up a frame register to access any frame save area.
27043 We also won't be using frame_off anywhere below, but set
27044 the correct value anyway to protect against future
27045 changes to this function. */
27046 frame_off
= info
->total_size
;
27048 if (ptr_regno
!= -1)
27050 /* Set up the frame offset to that needed by the first
27051 out-of-line save function. */
27052 START_USE (ptr_regno
);
27053 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
27054 frame_reg_rtx
= ptr_reg
;
27055 if (!(strategy
& SAVE_INLINE_FPRS
) && info
->fp_size
!= 0)
27056 gcc_checking_assert (info
->fp_save_offset
+ info
->fp_size
== 0);
27057 else if (!(strategy
& SAVE_INLINE_GPRS
) && info
->first_gp_reg_save
< 32)
27058 ptr_off
= info
->gp_save_offset
+ info
->gp_size
;
27059 else if (!(strategy
& SAVE_INLINE_VRS
) && info
->altivec_size
!= 0)
27060 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
27061 frame_off
= -ptr_off
;
27063 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
27065 if (REGNO (frame_reg_rtx
) == 12)
27067 sp_off
= info
->total_size
;
27068 if (frame_reg_rtx
!= sp_reg_rtx
)
27069 rs6000_emit_stack_tie (frame_reg_rtx
, false);
27072 /* If we use the link register, get it into r0. */
27073 if (!WORLD_SAVE_P (info
) && info
->lr_save_p
27074 && !cfun
->machine
->lr_is_wrapped_separately
)
27076 rtx addr
, reg
, mem
;
27078 reg
= gen_rtx_REG (Pmode
, 0);
27080 insn
= emit_move_insn (reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
27081 RTX_FRAME_RELATED_P (insn
) = 1;
27083 if (!(strategy
& (SAVE_NOINLINE_GPRS_SAVES_LR
27084 | SAVE_NOINLINE_FPRS_SAVES_LR
)))
27086 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
27087 GEN_INT (info
->lr_save_offset
+ frame_off
));
27088 mem
= gen_rtx_MEM (Pmode
, addr
);
27089 /* This should not be of rs6000_sr_alias_set, because of
27090 __builtin_return_address. */
27092 insn
= emit_move_insn (mem
, reg
);
27093 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
27094 NULL_RTX
, NULL_RTX
);
27099 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27100 r12 will be needed by out-of-line gpr restore. */
27101 cr_save_regno
= ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27102 && !(strategy
& (SAVE_INLINE_GPRS
27103 | SAVE_NOINLINE_GPRS_SAVES_LR
))
27105 if (!WORLD_SAVE_P (info
)
27107 && REGNO (frame_reg_rtx
) != cr_save_regno
27108 && !(using_static_chain_p
&& cr_save_regno
== 11)
27109 && !(using_split_stack
&& cr_save_regno
== 12 && sp_adjust
))
27111 cr_save_rtx
= gen_rtx_REG (SImode
, cr_save_regno
);
27112 START_USE (cr_save_regno
);
27113 rs6000_emit_move_from_cr (cr_save_rtx
);
27116 /* Do any required saving of fpr's. If only one or two to save, do
27117 it ourselves. Otherwise, call function. */
27118 if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_INLINE_FPRS
))
27120 int offset
= info
->fp_save_offset
+ frame_off
;
27121 for (int i
= info
->first_fp_reg_save
; i
< 64; i
++)
27124 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
27125 emit_frame_save (frame_reg_rtx
, fp_reg_mode
, i
, offset
,
27126 sp_off
- frame_off
);
27128 offset
+= fp_reg_size
;
27131 else if (!WORLD_SAVE_P (info
) && info
->first_fp_reg_save
!= 64)
27133 bool lr
= (strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
27134 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
27135 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
27136 rtx ptr_reg
= frame_reg_rtx
;
27138 if (REGNO (frame_reg_rtx
) == ptr_regno
)
27139 gcc_checking_assert (frame_off
== 0);
27142 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
27143 NOT_INUSE (ptr_regno
);
27144 emit_insn (gen_add3_insn (ptr_reg
,
27145 frame_reg_rtx
, GEN_INT (frame_off
)));
27147 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
27148 info
->fp_save_offset
,
27149 info
->lr_save_offset
,
27151 rs6000_frame_related (insn
, ptr_reg
, sp_off
,
27152 NULL_RTX
, NULL_RTX
);
27157 /* Save GPRs. This is done as a PARALLEL if we are using
27158 the store-multiple instructions. */
27159 if (!WORLD_SAVE_P (info
) && !(strategy
& SAVE_INLINE_GPRS
))
27161 bool lr
= (strategy
& SAVE_NOINLINE_GPRS_SAVES_LR
) != 0;
27162 int sel
= SAVRES_SAVE
| SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
27163 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
27164 rtx ptr_reg
= frame_reg_rtx
;
27165 bool ptr_set_up
= REGNO (ptr_reg
) == ptr_regno
;
27166 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
27169 if (ptr_regno
== 12)
27172 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
27174 /* Need to adjust r11 (r12) if we saved any FPRs. */
27175 if (end_save
+ frame_off
!= 0)
27177 rtx offset
= GEN_INT (end_save
+ frame_off
);
27180 frame_off
= -end_save
;
27182 NOT_INUSE (ptr_regno
);
27183 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
27185 else if (!ptr_set_up
)
27187 NOT_INUSE (ptr_regno
);
27188 emit_move_insn (ptr_reg
, frame_reg_rtx
);
27190 ptr_off
= -end_save
;
27191 insn
= rs6000_emit_savres_rtx (info
, ptr_reg
,
27192 info
->gp_save_offset
+ ptr_off
,
27193 info
->lr_save_offset
+ ptr_off
,
27195 rs6000_frame_related (insn
, ptr_reg
, sp_off
- ptr_off
,
27196 NULL_RTX
, NULL_RTX
);
27200 else if (!WORLD_SAVE_P (info
) && (strategy
& SAVE_MULTIPLE
))
27204 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
27205 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
27207 = gen_frame_store (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
27209 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
27210 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
27211 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
27212 NULL_RTX
, NULL_RTX
);
27214 else if (!WORLD_SAVE_P (info
))
27216 int offset
= info
->gp_save_offset
+ frame_off
;
27217 for (int i
= info
->first_gp_reg_save
; i
< 32; i
++)
27219 if (rs6000_reg_live_or_pic_offset_p (i
)
27220 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
27221 emit_frame_save (frame_reg_rtx
, reg_mode
, i
, offset
,
27222 sp_off
- frame_off
);
27224 offset
+= reg_size
;
27228 if (crtl
->calls_eh_return
)
27235 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
27236 if (regno
== INVALID_REGNUM
)
27240 p
= rtvec_alloc (i
);
27244 unsigned int regno
= EH_RETURN_DATA_REGNO (i
);
27245 if (regno
== INVALID_REGNUM
)
27249 = gen_frame_store (gen_rtx_REG (reg_mode
, regno
),
27251 info
->ehrd_offset
+ sp_off
+ reg_size
* (int) i
);
27252 RTVEC_ELT (p
, i
) = set
;
27253 RTX_FRAME_RELATED_P (set
) = 1;
27256 insn
= emit_insn (gen_blockage ());
27257 RTX_FRAME_RELATED_P (insn
) = 1;
27258 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, gen_rtx_PARALLEL (VOIDmode
, p
));
27261 /* In AIX ABI we need to make sure r2 is really saved. */
27262 if (TARGET_AIX
&& crtl
->calls_eh_return
)
27264 rtx tmp_reg
, tmp_reg_si
, hi
, lo
, compare_result
, toc_save_done
, jump
;
27265 rtx join_insn
, note
;
27266 rtx_insn
*save_insn
;
27267 long toc_restore_insn
;
27269 tmp_reg
= gen_rtx_REG (Pmode
, 11);
27270 tmp_reg_si
= gen_rtx_REG (SImode
, 11);
27271 if (using_static_chain_p
)
27274 emit_move_insn (gen_rtx_REG (Pmode
, 0), tmp_reg
);
27278 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, LR_REGNO
));
27279 /* Peek at instruction to which this function returns. If it's
27280 restoring r2, then we know we've already saved r2. We can't
27281 unconditionally save r2 because the value we have will already
27282 be updated if we arrived at this function via a plt call or
27283 toc adjusting stub. */
27284 emit_move_insn (tmp_reg_si
, gen_rtx_MEM (SImode
, tmp_reg
));
27285 toc_restore_insn
= ((TARGET_32BIT
? 0x80410000 : 0xE8410000)
27286 + RS6000_TOC_SAVE_SLOT
);
27287 hi
= gen_int_mode (toc_restore_insn
& ~0xffff, SImode
);
27288 emit_insn (gen_xorsi3 (tmp_reg_si
, tmp_reg_si
, hi
));
27289 compare_result
= gen_rtx_REG (CCUNSmode
, CR0_REGNO
);
27290 validate_condition_mode (EQ
, CCUNSmode
);
27291 lo
= gen_int_mode (toc_restore_insn
& 0xffff, SImode
);
27292 emit_insn (gen_rtx_SET (compare_result
,
27293 gen_rtx_COMPARE (CCUNSmode
, tmp_reg_si
, lo
)));
27294 toc_save_done
= gen_label_rtx ();
27295 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
27296 gen_rtx_EQ (VOIDmode
, compare_result
,
27298 gen_rtx_LABEL_REF (VOIDmode
, toc_save_done
),
27300 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
27301 JUMP_LABEL (jump
) = toc_save_done
;
27302 LABEL_NUSES (toc_save_done
) += 1;
27304 save_insn
= emit_frame_save (frame_reg_rtx
, reg_mode
,
27305 TOC_REGNUM
, frame_off
+ RS6000_TOC_SAVE_SLOT
,
27306 sp_off
- frame_off
);
27308 emit_label (toc_save_done
);
27310 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27311 have a CFG that has different saves along different paths.
27312 Move the note to a dummy blockage insn, which describes that
27313 R2 is unconditionally saved after the label. */
27314 /* ??? An alternate representation might be a special insn pattern
27315 containing both the branch and the store. That might let the
27316 code that minimizes the number of DW_CFA_advance opcodes better
27317 freedom in placing the annotations. */
27318 note
= find_reg_note (save_insn
, REG_FRAME_RELATED_EXPR
, NULL
);
27320 remove_note (save_insn
, note
);
27322 note
= alloc_reg_note (REG_FRAME_RELATED_EXPR
,
27323 copy_rtx (PATTERN (save_insn
)), NULL_RTX
);
27324 RTX_FRAME_RELATED_P (save_insn
) = 0;
27326 join_insn
= emit_insn (gen_blockage ());
27327 REG_NOTES (join_insn
) = note
;
27328 RTX_FRAME_RELATED_P (join_insn
) = 1;
27330 if (using_static_chain_p
)
27332 emit_move_insn (tmp_reg
, gen_rtx_REG (Pmode
, 0));
27339 /* Save CR if we use any that must be preserved. */
27340 if (!WORLD_SAVE_P (info
) && info
->cr_save_p
)
27342 rtx addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
27343 GEN_INT (info
->cr_save_offset
+ frame_off
));
27344 rtx mem
= gen_frame_mem (SImode
, addr
);
27346 /* If we didn't copy cr before, do so now using r0. */
27347 if (cr_save_rtx
== NULL_RTX
)
27350 cr_save_rtx
= gen_rtx_REG (SImode
, 0);
27351 rs6000_emit_move_from_cr (cr_save_rtx
);
27354 /* Saving CR requires a two-instruction sequence: one instruction
27355 to move the CR to a general-purpose register, and a second
27356 instruction that stores the GPR to memory.
27358 We do not emit any DWARF CFI records for the first of these,
27359 because we cannot properly represent the fact that CR is saved in
27360 a register. One reason is that we cannot express that multiple
27361 CR fields are saved; another reason is that on 64-bit, the size
27362 of the CR register in DWARF (4 bytes) differs from the size of
27363 a general-purpose register.
27365 This means if any intervening instruction were to clobber one of
27366 the call-saved CR fields, we'd have incorrect CFI. To prevent
27367 this from happening, we mark the store to memory as a use of
27368 those CR fields, which prevents any such instruction from being
27369 scheduled in between the two instructions. */
27374 crsave_v
[n_crsave
++] = gen_rtx_SET (mem
, cr_save_rtx
);
27375 for (i
= 0; i
< 8; i
++)
27376 if (save_reg_p (CR0_REGNO
+ i
))
27377 crsave_v
[n_crsave
++]
27378 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
27380 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
,
27381 gen_rtvec_v (n_crsave
, crsave_v
)));
27382 END_USE (REGNO (cr_save_rtx
));
27384 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27385 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27386 so we need to construct a frame expression manually. */
27387 RTX_FRAME_RELATED_P (insn
) = 1;
27389 /* Update address to be stack-pointer relative, like
27390 rs6000_frame_related would do. */
27391 addr
= gen_rtx_PLUS (Pmode
, gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
),
27392 GEN_INT (info
->cr_save_offset
+ sp_off
));
27393 mem
= gen_frame_mem (SImode
, addr
);
27395 if (DEFAULT_ABI
== ABI_ELFv2
)
27397 /* In the ELFv2 ABI we generate separate CFI records for each
27398 CR field that was actually saved. They all point to the
27399 same 32-bit stack slot. */
27403 for (i
= 0; i
< 8; i
++)
27404 if (save_reg_p (CR0_REGNO
+ i
))
27407 = gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
27409 RTX_FRAME_RELATED_P (crframe
[n_crframe
]) = 1;
27413 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
27414 gen_rtx_PARALLEL (VOIDmode
,
27415 gen_rtvec_v (n_crframe
, crframe
)));
27419 /* In other ABIs, by convention, we use a single CR regnum to
27420 represent the fact that all call-saved CR fields are saved.
27421 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27422 rtx set
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, CR2_REGNO
));
27423 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, set
);
27427 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27428 *separate* slots if the routine calls __builtin_eh_return, so
27429 that they can be independently restored by the unwinder. */
27430 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
27432 int i
, cr_off
= info
->ehcr_offset
;
27435 /* ??? We might get better performance by using multiple mfocrf
27437 crsave
= gen_rtx_REG (SImode
, 0);
27438 emit_insn (gen_movesi_from_cr (crsave
));
27440 for (i
= 0; i
< 8; i
++)
27441 if (!call_used_regs
[CR0_REGNO
+ i
])
27443 rtvec p
= rtvec_alloc (2);
27445 = gen_frame_store (crsave
, frame_reg_rtx
, cr_off
+ frame_off
);
27447 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (CCmode
, CR0_REGNO
+ i
));
27449 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
27451 RTX_FRAME_RELATED_P (insn
) = 1;
27452 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
27453 gen_frame_store (gen_rtx_REG (SImode
, CR0_REGNO
+ i
),
27454 sp_reg_rtx
, cr_off
+ sp_off
));
27456 cr_off
+= reg_size
;
27460 /* Update stack and set back pointer unless this is V.4,
27461 for which it was done previously. */
27462 if (!WORLD_SAVE_P (info
) && info
->push_p
27463 && !(DEFAULT_ABI
== ABI_V4
|| crtl
->calls_eh_return
))
27465 rtx ptr_reg
= NULL
;
27468 /* If saving altivec regs we need to be able to address all save
27469 locations using a 16-bit offset. */
27470 if ((strategy
& SAVE_INLINE_VRS
) == 0
27471 || (info
->altivec_size
!= 0
27472 && (info
->altivec_save_offset
+ info
->altivec_size
- 16
27473 + info
->total_size
- frame_off
) > 32767)
27474 || (info
->vrsave_size
!= 0
27475 && (info
->vrsave_save_offset
27476 + info
->total_size
- frame_off
) > 32767))
27478 int sel
= SAVRES_SAVE
| SAVRES_VR
;
27479 unsigned ptr_regno
= ptr_regno_for_savres (sel
);
27481 if (using_static_chain_p
27482 && ptr_regno
== STATIC_CHAIN_REGNUM
)
27484 if (REGNO (frame_reg_rtx
) != ptr_regno
)
27485 START_USE (ptr_regno
);
27486 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
27487 frame_reg_rtx
= ptr_reg
;
27488 ptr_off
= info
->altivec_save_offset
+ info
->altivec_size
;
27489 frame_off
= -ptr_off
;
27491 else if (REGNO (frame_reg_rtx
) == 1)
27492 frame_off
= info
->total_size
;
27493 sp_adjust
= rs6000_emit_allocate_stack (info
->total_size
,
27495 if (REGNO (frame_reg_rtx
) == 12)
27497 sp_off
= info
->total_size
;
27498 if (frame_reg_rtx
!= sp_reg_rtx
)
27499 rs6000_emit_stack_tie (frame_reg_rtx
, false);
27502 /* Set frame pointer, if needed. */
27503 if (frame_pointer_needed
)
27505 insn
= emit_move_insn (gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
),
27507 RTX_FRAME_RELATED_P (insn
) = 1;
27510 /* Save AltiVec registers if needed. Save here because the red zone does
27511 not always include AltiVec registers. */
27512 if (!WORLD_SAVE_P (info
)
27513 && info
->altivec_size
!= 0 && (strategy
& SAVE_INLINE_VRS
) == 0)
27515 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
27517 /* Oddly, the vector save/restore functions point r0 at the end
27518 of the save area, then use r11 or r12 to load offsets for
27519 [reg+reg] addressing. */
27520 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
27521 int scratch_regno
= ptr_regno_for_savres (SAVRES_SAVE
| SAVRES_VR
);
27522 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
27524 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
27526 if (scratch_regno
== 12)
27528 if (end_save
+ frame_off
!= 0)
27530 rtx offset
= GEN_INT (end_save
+ frame_off
);
27532 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
27535 emit_move_insn (ptr_reg
, frame_reg_rtx
);
27537 ptr_off
= -end_save
;
27538 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
27539 info
->altivec_save_offset
+ ptr_off
,
27540 0, V4SImode
, SAVRES_SAVE
| SAVRES_VR
);
27541 rs6000_frame_related (insn
, scratch_reg
, sp_off
- ptr_off
,
27542 NULL_RTX
, NULL_RTX
);
27543 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
27545 /* The oddity mentioned above clobbered our frame reg. */
27546 emit_move_insn (frame_reg_rtx
, ptr_reg
);
27547 frame_off
= ptr_off
;
27550 else if (!WORLD_SAVE_P (info
)
27551 && info
->altivec_size
!= 0)
27555 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
27556 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
27558 rtx areg
, savereg
, mem
;
27559 HOST_WIDE_INT offset
;
27561 offset
= (info
->altivec_save_offset
+ frame_off
27562 + 16 * (i
- info
->first_altivec_reg_save
));
27564 savereg
= gen_rtx_REG (V4SImode
, i
);
27566 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
27568 mem
= gen_frame_mem (V4SImode
,
27569 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
27570 GEN_INT (offset
)));
27571 insn
= emit_insn (gen_rtx_SET (mem
, savereg
));
27577 areg
= gen_rtx_REG (Pmode
, 0);
27578 emit_move_insn (areg
, GEN_INT (offset
));
27580 /* AltiVec addressing mode is [reg+reg]. */
27581 mem
= gen_frame_mem (V4SImode
,
27582 gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
));
27584 /* Rather than emitting a generic move, force use of the stvx
27585 instruction, which we always want on ISA 2.07 (power8) systems.
27586 In particular we don't want xxpermdi/stxvd2x for little
27588 insn
= emit_insn (gen_altivec_stvx_v4si_internal (mem
, savereg
));
27591 rs6000_frame_related (insn
, frame_reg_rtx
, sp_off
- frame_off
,
27592 areg
, GEN_INT (offset
));
27596 /* VRSAVE is a bit vector representing which AltiVec registers
27597 are used. The OS uses this to determine which vector
27598 registers to save on a context switch. We need to save
27599 VRSAVE on the stack frame, add whatever AltiVec registers we
27600 used in this function, and do the corresponding magic in the
27603 if (!WORLD_SAVE_P (info
) && info
->vrsave_size
!= 0)
27605 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27606 be using r12 as frame_reg_rtx and r11 as the static chain
27607 pointer for nested functions. */
27608 int save_regno
= 12;
27609 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
27610 && !using_static_chain_p
)
27612 else if (using_split_stack
|| REGNO (frame_reg_rtx
) == 12)
27615 if (using_static_chain_p
)
27618 NOT_INUSE (save_regno
);
27620 emit_vrsave_prologue (info
, save_regno
, frame_off
, frame_reg_rtx
);
27623 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27624 if (!TARGET_SINGLE_PIC_BASE
27625 && ((TARGET_TOC
&& TARGET_MINIMAL_TOC
27626 && !constant_pool_empty_p ())
27627 || (DEFAULT_ABI
== ABI_V4
27628 && (flag_pic
== 1 || (flag_pic
&& TARGET_SECURE_PLT
))
27629 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))))
27631 /* If emit_load_toc_table will use the link register, we need to save
27632 it. We use R12 for this purpose because emit_load_toc_table
27633 can use register 0. This allows us to use a plain 'blr' to return
27634 from the procedure more often. */
27635 int save_LR_around_toc_setup
= (TARGET_ELF
27636 && DEFAULT_ABI
== ABI_V4
27638 && ! info
->lr_save_p
27639 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
) > 0);
27640 if (save_LR_around_toc_setup
)
27642 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
27643 rtx tmp
= gen_rtx_REG (Pmode
, 12);
27646 insn
= emit_move_insn (tmp
, lr
);
27647 RTX_FRAME_RELATED_P (insn
) = 1;
27649 rs6000_emit_load_toc_table (TRUE
);
27651 insn
= emit_move_insn (lr
, tmp
);
27652 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
27653 RTX_FRAME_RELATED_P (insn
) = 1;
27656 rs6000_emit_load_toc_table (TRUE
);
27660 if (!TARGET_SINGLE_PIC_BASE
27661 && DEFAULT_ABI
== ABI_DARWIN
27662 && flag_pic
&& crtl
->uses_pic_offset_table
)
27664 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
27665 rtx src
= gen_rtx_SYMBOL_REF (Pmode
, MACHOPIC_FUNCTION_BASE_NAME
);
27667 /* Save and restore LR locally around this call (in R0). */
27668 if (!info
->lr_save_p
)
27669 emit_move_insn (gen_rtx_REG (Pmode
, 0), lr
);
27671 emit_insn (gen_load_macho_picbase (src
));
27673 emit_move_insn (gen_rtx_REG (Pmode
,
27674 RS6000_PIC_OFFSET_TABLE_REGNUM
),
27677 if (!info
->lr_save_p
)
27678 emit_move_insn (lr
, gen_rtx_REG (Pmode
, 0));
27682 /* If we need to, save the TOC register after doing the stack setup.
27683 Do not emit eh frame info for this save. The unwinder wants info,
27684 conceptually attached to instructions in this function, about
27685 register values in the caller of this function. This R2 may have
27686 already been changed from the value in the caller.
27687 We don't attempt to write accurate DWARF EH frame info for R2
27688 because code emitted by gcc for a (non-pointer) function call
27689 doesn't save and restore R2. Instead, R2 is managed out-of-line
27690 by a linker generated plt call stub when the function resides in
27691 a shared library. This behavior is costly to describe in DWARF,
27692 both in terms of the size of DWARF info and the time taken in the
27693 unwinder to interpret it. R2 changes, apart from the
27694 calls_eh_return case earlier in this function, are handled by
27695 linux-unwind.h frob_update_context. */
27696 if (rs6000_save_toc_in_prologue_p ())
27698 rtx reg
= gen_rtx_REG (reg_mode
, TOC_REGNUM
);
27699 emit_insn (gen_frame_store (reg
, sp_reg_rtx
, RS6000_TOC_SAVE_SLOT
));
27702 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27703 if (using_split_stack
&& split_stack_arg_pointer_used_p ())
27704 emit_split_stack_prologue (info
, sp_adjust
, frame_off
, frame_reg_rtx
);
27707 /* Output .extern statements for the save/restore routines we use. */
27710 rs6000_output_savres_externs (FILE *file
)
27712 rs6000_stack_t
*info
= rs6000_stack_info ();
27714 if (TARGET_DEBUG_STACK
)
27715 debug_stack_info (info
);
27717 /* Write .extern for any function we will call to save and restore
27719 if (info
->first_fp_reg_save
< 64
27724 int regno
= info
->first_fp_reg_save
- 32;
27726 if ((info
->savres_strategy
& SAVE_INLINE_FPRS
) == 0)
27728 bool lr
= (info
->savres_strategy
& SAVE_NOINLINE_FPRS_SAVES_LR
) != 0;
27729 int sel
= SAVRES_SAVE
| SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
27730 name
= rs6000_savres_routine_name (regno
, sel
);
27731 fprintf (file
, "\t.extern %s\n", name
);
27733 if ((info
->savres_strategy
& REST_INLINE_FPRS
) == 0)
27735 bool lr
= (info
->savres_strategy
27736 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
27737 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
27738 name
= rs6000_savres_routine_name (regno
, sel
);
27739 fprintf (file
, "\t.extern %s\n", name
);
27744 /* Write function prologue. */
27747 rs6000_output_function_prologue (FILE *file
,
27748 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
27750 if (!cfun
->is_thunk
)
27751 rs6000_output_savres_externs (file
);
27753 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27754 immediately after the global entry point label. */
27755 if (rs6000_global_entry_point_needed_p ())
27757 const char *name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27759 (*targetm
.asm_out
.internal_label
) (file
, "LCF", rs6000_pic_labelno
);
27761 if (TARGET_CMODEL
!= CMODEL_LARGE
)
27763 /* In the small and medium code models, we assume the TOC is less
27764 2 GB away from the text section, so it can be computed via the
27765 following two-instruction sequence. */
27768 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
27769 fprintf (file
, "0:\taddis 2,12,.TOC.-");
27770 assemble_name (file
, buf
);
27771 fprintf (file
, "@ha\n");
27772 fprintf (file
, "\taddi 2,2,.TOC.-");
27773 assemble_name (file
, buf
);
27774 fprintf (file
, "@l\n");
27778 /* In the large code model, we allow arbitrary offsets between the
27779 TOC and the text section, so we have to load the offset from
27780 memory. The data field is emitted directly before the global
27781 entry point in rs6000_elf_declare_function_name. */
27784 #ifdef HAVE_AS_ENTRY_MARKERS
27785 /* If supported by the linker, emit a marker relocation. If the
27786 total code size of the final executable or shared library
27787 happens to fit into 2 GB after all, the linker will replace
27788 this code sequence with the sequence for the small or medium
27790 fprintf (file
, "\t.reloc .,R_PPC64_ENTRY\n");
27792 fprintf (file
, "\tld 2,");
27793 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCL", rs6000_pic_labelno
);
27794 assemble_name (file
, buf
);
27795 fprintf (file
, "-");
27796 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
27797 assemble_name (file
, buf
);
27798 fprintf (file
, "(12)\n");
27799 fprintf (file
, "\tadd 2,2,12\n");
27802 fputs ("\t.localentry\t", file
);
27803 assemble_name (file
, name
);
27804 fputs (",.-", file
);
27805 assemble_name (file
, name
);
27806 fputs ("\n", file
);
27809 /* Output -mprofile-kernel code. This needs to be done here instead of
27810 in output_function_profile since it must go after the ELFv2 ABI
27811 local entry point. */
27812 if (TARGET_PROFILE_KERNEL
&& crtl
->profile
)
27814 gcc_assert (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
);
27815 gcc_assert (!TARGET_32BIT
);
27817 asm_fprintf (file
, "\tmflr %s\n", reg_names
[0]);
27819 /* In the ELFv2 ABI we have no compiler stack word. It must be
27820 the resposibility of _mcount to preserve the static chain
27821 register if required. */
27822 if (DEFAULT_ABI
!= ABI_ELFv2
27823 && cfun
->static_chain_decl
!= NULL
)
27825 asm_fprintf (file
, "\tstd %s,24(%s)\n",
27826 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
27827 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
27828 asm_fprintf (file
, "\tld %s,24(%s)\n",
27829 reg_names
[STATIC_CHAIN_REGNUM
], reg_names
[1]);
27832 fprintf (file
, "\tbl %s\n", RS6000_MCOUNT
);
27835 rs6000_pic_labelno
++;
27838 /* -mprofile-kernel code calls mcount before the function prolog,
27839 so a profiled leaf function should stay a leaf function. */
27841 rs6000_keep_leaf_when_profiled ()
27843 return TARGET_PROFILE_KERNEL
;
27846 /* Non-zero if vmx regs are restored before the frame pop, zero if
27847 we restore after the pop when possible. */
27848 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27850 /* Restoring cr is a two step process: loading a reg from the frame
27851 save, then moving the reg to cr. For ABI_V4 we must let the
27852 unwinder know that the stack location is no longer valid at or
27853 before the stack deallocation, but we can't emit a cfa_restore for
27854 cr at the stack deallocation like we do for other registers.
27855 The trouble is that it is possible for the move to cr to be
27856 scheduled after the stack deallocation. So say exactly where cr
27857 is located on each of the two insns. */
27860 load_cr_save (int regno
, rtx frame_reg_rtx
, int offset
, bool exit_func
)
27862 rtx mem
= gen_frame_mem_offset (SImode
, frame_reg_rtx
, offset
);
27863 rtx reg
= gen_rtx_REG (SImode
, regno
);
27864 rtx_insn
*insn
= emit_move_insn (reg
, mem
);
27866 if (!exit_func
&& DEFAULT_ABI
== ABI_V4
)
27868 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
27869 rtx set
= gen_rtx_SET (reg
, cr
);
27871 add_reg_note (insn
, REG_CFA_REGISTER
, set
);
27872 RTX_FRAME_RELATED_P (insn
) = 1;
27877 /* Reload CR from REG. */
27880 restore_saved_cr (rtx reg
, int using_mfcr_multiple
, bool exit_func
)
27885 if (using_mfcr_multiple
)
27887 for (i
= 0; i
< 8; i
++)
27888 if (save_reg_p (CR0_REGNO
+ i
))
27890 gcc_assert (count
);
27893 if (using_mfcr_multiple
&& count
> 1)
27899 p
= rtvec_alloc (count
);
27902 for (i
= 0; i
< 8; i
++)
27903 if (save_reg_p (CR0_REGNO
+ i
))
27905 rtvec r
= rtvec_alloc (2);
27906 RTVEC_ELT (r
, 0) = reg
;
27907 RTVEC_ELT (r
, 1) = GEN_INT (1 << (7-i
));
27908 RTVEC_ELT (p
, ndx
) =
27909 gen_rtx_SET (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
),
27910 gen_rtx_UNSPEC (CCmode
, r
, UNSPEC_MOVESI_TO_CR
));
27913 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
27914 gcc_assert (ndx
== count
);
27916 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27917 CR field separately. */
27918 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
27920 for (i
= 0; i
< 8; i
++)
27921 if (save_reg_p (CR0_REGNO
+ i
))
27922 add_reg_note (insn
, REG_CFA_RESTORE
,
27923 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
27925 RTX_FRAME_RELATED_P (insn
) = 1;
27929 for (i
= 0; i
< 8; i
++)
27930 if (save_reg_p (CR0_REGNO
+ i
))
27932 rtx insn
= emit_insn (gen_movsi_to_cr_one
27933 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
27935 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27936 CR field separately, attached to the insn that in fact
27937 restores this particular CR field. */
27938 if (!exit_func
&& DEFAULT_ABI
== ABI_ELFv2
&& flag_shrink_wrap
)
27940 add_reg_note (insn
, REG_CFA_RESTORE
,
27941 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
27943 RTX_FRAME_RELATED_P (insn
) = 1;
27947 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27948 if (!exit_func
&& DEFAULT_ABI
!= ABI_ELFv2
27949 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
27951 rtx_insn
*insn
= get_last_insn ();
27952 rtx cr
= gen_rtx_REG (SImode
, CR2_REGNO
);
27954 add_reg_note (insn
, REG_CFA_RESTORE
, cr
);
27955 RTX_FRAME_RELATED_P (insn
) = 1;
27959 /* Like cr, the move to lr instruction can be scheduled after the
27960 stack deallocation, but unlike cr, its stack frame save is still
27961 valid. So we only need to emit the cfa_restore on the correct
27965 load_lr_save (int regno
, rtx frame_reg_rtx
, int offset
)
27967 rtx mem
= gen_frame_mem_offset (Pmode
, frame_reg_rtx
, offset
);
27968 rtx reg
= gen_rtx_REG (Pmode
, regno
);
27970 emit_move_insn (reg
, mem
);
27974 restore_saved_lr (int regno
, bool exit_func
)
27976 rtx reg
= gen_rtx_REG (Pmode
, regno
);
27977 rtx lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
27978 rtx_insn
*insn
= emit_move_insn (lr
, reg
);
27980 if (!exit_func
&& flag_shrink_wrap
)
27982 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
27983 RTX_FRAME_RELATED_P (insn
) = 1;
27988 add_crlr_cfa_restore (const rs6000_stack_t
*info
, rtx cfa_restores
)
27990 if (DEFAULT_ABI
== ABI_ELFv2
)
27993 for (i
= 0; i
< 8; i
++)
27994 if (save_reg_p (CR0_REGNO
+ i
))
27996 rtx cr
= gen_rtx_REG (SImode
, CR0_REGNO
+ i
);
27997 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, cr
,
28001 else if (info
->cr_save_p
)
28002 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
28003 gen_rtx_REG (SImode
, CR2_REGNO
),
28006 if (info
->lr_save_p
)
28007 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
28008 gen_rtx_REG (Pmode
, LR_REGNO
),
28010 return cfa_restores
;
28013 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28014 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28015 below stack pointer not cloberred by signals. */
28018 offset_below_red_zone_p (HOST_WIDE_INT offset
)
28020 return offset
< (DEFAULT_ABI
== ABI_V4
28022 : TARGET_32BIT
? -220 : -288);
28025 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28028 emit_cfa_restores (rtx cfa_restores
)
28030 rtx_insn
*insn
= get_last_insn ();
28031 rtx
*loc
= ®_NOTES (insn
);
28034 loc
= &XEXP (*loc
, 1);
28035 *loc
= cfa_restores
;
28036 RTX_FRAME_RELATED_P (insn
) = 1;
28039 /* Emit function epilogue as insns. */
28042 rs6000_emit_epilogue (int sibcall
)
28044 rs6000_stack_t
*info
;
28045 int restoring_GPRs_inline
;
28046 int restoring_FPRs_inline
;
28047 int using_load_multiple
;
28048 int using_mtcr_multiple
;
28049 int use_backchain_to_restore_sp
;
28052 HOST_WIDE_INT frame_off
= 0;
28053 rtx sp_reg_rtx
= gen_rtx_REG (Pmode
, 1);
28054 rtx frame_reg_rtx
= sp_reg_rtx
;
28055 rtx cfa_restores
= NULL_RTX
;
28057 rtx cr_save_reg
= NULL_RTX
;
28058 machine_mode reg_mode
= Pmode
;
28059 int reg_size
= TARGET_32BIT
? 4 : 8;
28060 machine_mode fp_reg_mode
= (TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
)
28062 int fp_reg_size
= 8;
28065 unsigned ptr_regno
;
28067 info
= rs6000_stack_info ();
28069 strategy
= info
->savres_strategy
;
28070 using_load_multiple
= strategy
& REST_MULTIPLE
;
28071 restoring_FPRs_inline
= sibcall
|| (strategy
& REST_INLINE_FPRS
);
28072 restoring_GPRs_inline
= sibcall
|| (strategy
& REST_INLINE_GPRS
);
28073 using_mtcr_multiple
= (rs6000_cpu
== PROCESSOR_PPC601
28074 || rs6000_cpu
== PROCESSOR_PPC603
28075 || rs6000_cpu
== PROCESSOR_PPC750
28077 /* Restore via the backchain when we have a large frame, since this
28078 is more efficient than an addis, addi pair. The second condition
28079 here will not trigger at the moment; We don't actually need a
28080 frame pointer for alloca, but the generic parts of the compiler
28081 give us one anyway. */
28082 use_backchain_to_restore_sp
= (info
->total_size
+ (info
->lr_save_p
28083 ? info
->lr_save_offset
28085 || (cfun
->calls_alloca
28086 && !frame_pointer_needed
));
28087 restore_lr
= (info
->lr_save_p
28088 && (restoring_FPRs_inline
28089 || (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
))
28090 && (restoring_GPRs_inline
28091 || info
->first_fp_reg_save
< 64)
28092 && !cfun
->machine
->lr_is_wrapped_separately
);
28095 if (WORLD_SAVE_P (info
))
28099 const char *alloc_rname
;
28102 /* eh_rest_world_r10 will return to the location saved in the LR
28103 stack slot (which is not likely to be our caller.)
28104 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28105 rest_world is similar, except any R10 parameter is ignored.
28106 The exception-handling stuff that was here in 2.95 is no
28107 longer necessary. */
28110 + 32 - info
->first_gp_reg_save
28111 + LAST_ALTIVEC_REGNO
+ 1 - info
->first_altivec_reg_save
28112 + 63 + 1 - info
->first_fp_reg_save
);
28114 strcpy (rname
, ((crtl
->calls_eh_return
) ?
28115 "*eh_rest_world_r10" : "*rest_world"));
28116 alloc_rname
= ggc_strdup (rname
);
28119 RTVEC_ELT (p
, j
++) = ret_rtx
;
28121 = gen_rtx_USE (VOIDmode
, gen_rtx_SYMBOL_REF (Pmode
, alloc_rname
));
28122 /* The instruction pattern requires a clobber here;
28123 it is shared with the restVEC helper. */
28125 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 11));
28128 /* CR register traditionally saved as CR2. */
28129 rtx reg
= gen_rtx_REG (SImode
, CR2_REGNO
);
28131 = gen_frame_load (reg
, frame_reg_rtx
, info
->cr_save_offset
);
28132 if (flag_shrink_wrap
)
28134 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
,
28135 gen_rtx_REG (Pmode
, LR_REGNO
),
28137 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28141 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
28143 rtx reg
= gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
);
28145 = gen_frame_load (reg
,
28146 frame_reg_rtx
, info
->gp_save_offset
+ reg_size
* i
);
28147 if (flag_shrink_wrap
)
28148 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28150 for (i
= 0; info
->first_altivec_reg_save
+ i
<= LAST_ALTIVEC_REGNO
; i
++)
28152 rtx reg
= gen_rtx_REG (V4SImode
, info
->first_altivec_reg_save
+ i
);
28154 = gen_frame_load (reg
,
28155 frame_reg_rtx
, info
->altivec_save_offset
+ 16 * i
);
28156 if (flag_shrink_wrap
)
28157 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28159 for (i
= 0; info
->first_fp_reg_save
+ i
<= 63; i
++)
28161 rtx reg
= gen_rtx_REG ((TARGET_HARD_FLOAT
&& TARGET_DOUBLE_FLOAT
28162 ? DFmode
: SFmode
),
28163 info
->first_fp_reg_save
+ i
);
28165 = gen_frame_load (reg
, frame_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
28166 if (flag_shrink_wrap
)
28167 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28170 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, 0));
28172 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 12));
28174 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 7));
28176 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 8));
28178 = gen_rtx_USE (VOIDmode
, gen_rtx_REG (SImode
, 10));
28179 insn
= emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
28181 if (flag_shrink_wrap
)
28183 REG_NOTES (insn
) = cfa_restores
;
28184 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
28185 RTX_FRAME_RELATED_P (insn
) = 1;
28190 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28192 frame_off
= info
->total_size
;
28194 /* Restore AltiVec registers if we must do so before adjusting the
28196 if (info
->altivec_size
!= 0
28197 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28198 || (DEFAULT_ABI
!= ABI_V4
28199 && offset_below_red_zone_p (info
->altivec_save_offset
))))
28202 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
28204 gcc_checking_assert (scratch_regno
== 11 || scratch_regno
== 12);
28205 if (use_backchain_to_restore_sp
)
28207 int frame_regno
= 11;
28209 if ((strategy
& REST_INLINE_VRS
) == 0)
28211 /* Of r11 and r12, select the one not clobbered by an
28212 out-of-line restore function for the frame register. */
28213 frame_regno
= 11 + 12 - scratch_regno
;
28215 frame_reg_rtx
= gen_rtx_REG (Pmode
, frame_regno
);
28216 emit_move_insn (frame_reg_rtx
,
28217 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
28220 else if (frame_pointer_needed
)
28221 frame_reg_rtx
= hard_frame_pointer_rtx
;
28223 if ((strategy
& REST_INLINE_VRS
) == 0)
28225 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
28227 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
28228 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
28230 if (end_save
+ frame_off
!= 0)
28232 rtx offset
= GEN_INT (end_save
+ frame_off
);
28234 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
28237 emit_move_insn (ptr_reg
, frame_reg_rtx
);
28239 ptr_off
= -end_save
;
28240 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
28241 info
->altivec_save_offset
+ ptr_off
,
28242 0, V4SImode
, SAVRES_VR
);
28246 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28247 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
28249 rtx addr
, areg
, mem
, insn
;
28250 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28251 HOST_WIDE_INT offset
28252 = (info
->altivec_save_offset
+ frame_off
28253 + 16 * (i
- info
->first_altivec_reg_save
));
28255 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
28257 mem
= gen_frame_mem (V4SImode
,
28258 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
28259 GEN_INT (offset
)));
28260 insn
= gen_rtx_SET (reg
, mem
);
28264 areg
= gen_rtx_REG (Pmode
, 0);
28265 emit_move_insn (areg
, GEN_INT (offset
));
28267 /* AltiVec addressing mode is [reg+reg]. */
28268 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
28269 mem
= gen_frame_mem (V4SImode
, addr
);
28271 /* Rather than emitting a generic move, force use of the
28272 lvx instruction, which we always want. In particular we
28273 don't want lxvd2x/xxpermdi for little endian. */
28274 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
28277 (void) emit_insn (insn
);
28281 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28282 if (((strategy
& REST_INLINE_VRS
) == 0
28283 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
28284 && (flag_shrink_wrap
28285 || (offset_below_red_zone_p
28286 (info
->altivec_save_offset
28287 + 16 * (i
- info
->first_altivec_reg_save
)))))
28289 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28290 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28294 /* Restore VRSAVE if we must do so before adjusting the stack. */
28295 if (info
->vrsave_size
!= 0
28296 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28297 || (DEFAULT_ABI
!= ABI_V4
28298 && offset_below_red_zone_p (info
->vrsave_save_offset
))))
28302 if (frame_reg_rtx
== sp_reg_rtx
)
28304 if (use_backchain_to_restore_sp
)
28306 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
28307 emit_move_insn (frame_reg_rtx
,
28308 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
28311 else if (frame_pointer_needed
)
28312 frame_reg_rtx
= hard_frame_pointer_rtx
;
28315 reg
= gen_rtx_REG (SImode
, 12);
28316 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
28317 info
->vrsave_save_offset
+ frame_off
));
28319 emit_insn (generate_set_vrsave (reg
, info
, 1));
28323 /* If we have a large stack frame, restore the old stack pointer
28324 using the backchain. */
28325 if (use_backchain_to_restore_sp
)
28327 if (frame_reg_rtx
== sp_reg_rtx
)
28329 /* Under V.4, don't reset the stack pointer until after we're done
28330 loading the saved registers. */
28331 if (DEFAULT_ABI
== ABI_V4
)
28332 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
28334 insn
= emit_move_insn (frame_reg_rtx
,
28335 gen_rtx_MEM (Pmode
, sp_reg_rtx
));
28338 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28339 && DEFAULT_ABI
== ABI_V4
)
28340 /* frame_reg_rtx has been set up by the altivec restore. */
28344 insn
= emit_move_insn (sp_reg_rtx
, frame_reg_rtx
);
28345 frame_reg_rtx
= sp_reg_rtx
;
28348 /* If we have a frame pointer, we can restore the old stack pointer
28350 else if (frame_pointer_needed
)
28352 frame_reg_rtx
= sp_reg_rtx
;
28353 if (DEFAULT_ABI
== ABI_V4
)
28354 frame_reg_rtx
= gen_rtx_REG (Pmode
, 11);
28355 /* Prevent reordering memory accesses against stack pointer restore. */
28356 else if (cfun
->calls_alloca
28357 || offset_below_red_zone_p (-info
->total_size
))
28358 rs6000_emit_stack_tie (frame_reg_rtx
, true);
28360 insn
= emit_insn (gen_add3_insn (frame_reg_rtx
, hard_frame_pointer_rtx
,
28361 GEN_INT (info
->total_size
)));
28364 else if (info
->push_p
28365 && DEFAULT_ABI
!= ABI_V4
28366 && !crtl
->calls_eh_return
)
28368 /* Prevent reordering memory accesses against stack pointer restore. */
28369 if (cfun
->calls_alloca
28370 || offset_below_red_zone_p (-info
->total_size
))
28371 rs6000_emit_stack_tie (frame_reg_rtx
, false);
28372 insn
= emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
,
28373 GEN_INT (info
->total_size
)));
28376 if (insn
&& frame_reg_rtx
== sp_reg_rtx
)
28380 REG_NOTES (insn
) = cfa_restores
;
28381 cfa_restores
= NULL_RTX
;
28383 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
28384 RTX_FRAME_RELATED_P (insn
) = 1;
28387 /* Restore AltiVec registers if we have not done so already. */
28388 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28389 && info
->altivec_size
!= 0
28390 && (DEFAULT_ABI
== ABI_V4
28391 || !offset_below_red_zone_p (info
->altivec_save_offset
)))
28395 if ((strategy
& REST_INLINE_VRS
) == 0)
28397 int end_save
= info
->altivec_save_offset
+ info
->altivec_size
;
28399 rtx ptr_reg
= gen_rtx_REG (Pmode
, 0);
28400 int scratch_regno
= ptr_regno_for_savres (SAVRES_VR
);
28401 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
28403 if (end_save
+ frame_off
!= 0)
28405 rtx offset
= GEN_INT (end_save
+ frame_off
);
28407 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
, offset
));
28410 emit_move_insn (ptr_reg
, frame_reg_rtx
);
28412 ptr_off
= -end_save
;
28413 insn
= rs6000_emit_savres_rtx (info
, scratch_reg
,
28414 info
->altivec_save_offset
+ ptr_off
,
28415 0, V4SImode
, SAVRES_VR
);
28416 if (REGNO (frame_reg_rtx
) == REGNO (scratch_reg
))
28418 /* Frame reg was clobbered by out-of-line save. Restore it
28419 from ptr_reg, and if we are calling out-of-line gpr or
28420 fpr restore set up the correct pointer and offset. */
28421 unsigned newptr_regno
= 1;
28422 if (!restoring_GPRs_inline
)
28424 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
28425 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
28426 newptr_regno
= ptr_regno_for_savres (sel
);
28427 end_save
= info
->gp_save_offset
+ info
->gp_size
;
28429 else if (!restoring_FPRs_inline
)
28431 bool lr
= !(strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
);
28432 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
28433 newptr_regno
= ptr_regno_for_savres (sel
);
28434 end_save
= info
->fp_save_offset
+ info
->fp_size
;
28437 if (newptr_regno
!= 1 && REGNO (frame_reg_rtx
) != newptr_regno
)
28438 frame_reg_rtx
= gen_rtx_REG (Pmode
, newptr_regno
);
28440 if (end_save
+ ptr_off
!= 0)
28442 rtx offset
= GEN_INT (end_save
+ ptr_off
);
28444 frame_off
= -end_save
;
28446 emit_insn (gen_addsi3_carry (frame_reg_rtx
,
28449 emit_insn (gen_adddi3_carry (frame_reg_rtx
,
28454 frame_off
= ptr_off
;
28455 emit_move_insn (frame_reg_rtx
, ptr_reg
);
28461 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28462 if (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
))
28464 rtx addr
, areg
, mem
, insn
;
28465 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28466 HOST_WIDE_INT offset
28467 = (info
->altivec_save_offset
+ frame_off
28468 + 16 * (i
- info
->first_altivec_reg_save
));
28470 if (TARGET_P9_DFORM_VECTOR
&& quad_address_offset_p (offset
))
28472 mem
= gen_frame_mem (V4SImode
,
28473 gen_rtx_PLUS (Pmode
, frame_reg_rtx
,
28474 GEN_INT (offset
)));
28475 insn
= gen_rtx_SET (reg
, mem
);
28479 areg
= gen_rtx_REG (Pmode
, 0);
28480 emit_move_insn (areg
, GEN_INT (offset
));
28482 /* AltiVec addressing mode is [reg+reg]. */
28483 addr
= gen_rtx_PLUS (Pmode
, frame_reg_rtx
, areg
);
28484 mem
= gen_frame_mem (V4SImode
, addr
);
28486 /* Rather than emitting a generic move, force use of the
28487 lvx instruction, which we always want. In particular we
28488 don't want lxvd2x/xxpermdi for little endian. */
28489 insn
= gen_altivec_lvx_v4si_internal (reg
, mem
);
28492 (void) emit_insn (insn
);
28496 for (i
= info
->first_altivec_reg_save
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
28497 if (((strategy
& REST_INLINE_VRS
) == 0
28498 || (info
->vrsave_mask
& ALTIVEC_REG_BIT (i
)) != 0)
28499 && (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
))
28501 rtx reg
= gen_rtx_REG (V4SImode
, i
);
28502 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28506 /* Restore VRSAVE if we have not done so already. */
28507 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28508 && info
->vrsave_size
!= 0
28509 && (DEFAULT_ABI
== ABI_V4
28510 || !offset_below_red_zone_p (info
->vrsave_save_offset
)))
28514 reg
= gen_rtx_REG (SImode
, 12);
28515 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
28516 info
->vrsave_save_offset
+ frame_off
));
28518 emit_insn (generate_set_vrsave (reg
, info
, 1));
28521 /* If we exit by an out-of-line restore function on ABI_V4 then that
28522 function will deallocate the stack, so we don't need to worry
28523 about the unwinder restoring cr from an invalid stack frame
28525 exit_func
= (!restoring_FPRs_inline
28526 || (!restoring_GPRs_inline
28527 && info
->first_fp_reg_save
== 64));
28529 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28530 *separate* slots if the routine calls __builtin_eh_return, so
28531 that they can be independently restored by the unwinder. */
28532 if (DEFAULT_ABI
== ABI_ELFv2
&& crtl
->calls_eh_return
)
28534 int i
, cr_off
= info
->ehcr_offset
;
28536 for (i
= 0; i
< 8; i
++)
28537 if (!call_used_regs
[CR0_REGNO
+ i
])
28539 rtx reg
= gen_rtx_REG (SImode
, 0);
28540 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
28541 cr_off
+ frame_off
));
28543 insn
= emit_insn (gen_movsi_to_cr_one
28544 (gen_rtx_REG (CCmode
, CR0_REGNO
+ i
), reg
));
28546 if (!exit_func
&& flag_shrink_wrap
)
28548 add_reg_note (insn
, REG_CFA_RESTORE
,
28549 gen_rtx_REG (SImode
, CR0_REGNO
+ i
));
28551 RTX_FRAME_RELATED_P (insn
) = 1;
28554 cr_off
+= reg_size
;
28558 /* Get the old lr if we saved it. If we are restoring registers
28559 out-of-line, then the out-of-line routines can do this for us. */
28560 if (restore_lr
&& restoring_GPRs_inline
)
28561 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
28563 /* Get the old cr if we saved it. */
28564 if (info
->cr_save_p
)
28566 unsigned cr_save_regno
= 12;
28568 if (!restoring_GPRs_inline
)
28570 /* Ensure we don't use the register used by the out-of-line
28571 gpr register restore below. */
28572 bool lr
= info
->gp_save_offset
+ info
->gp_size
== 0;
28573 int sel
= SAVRES_GPR
| (lr
? SAVRES_LR
: 0);
28574 int gpr_ptr_regno
= ptr_regno_for_savres (sel
);
28576 if (gpr_ptr_regno
== 12)
28577 cr_save_regno
= 11;
28578 gcc_checking_assert (REGNO (frame_reg_rtx
) != cr_save_regno
);
28580 else if (REGNO (frame_reg_rtx
) == 12)
28581 cr_save_regno
= 11;
28583 cr_save_reg
= load_cr_save (cr_save_regno
, frame_reg_rtx
,
28584 info
->cr_save_offset
+ frame_off
,
28588 /* Set LR here to try to overlap restores below. */
28589 if (restore_lr
&& restoring_GPRs_inline
)
28590 restore_saved_lr (0, exit_func
);
28592 /* Load exception handler data registers, if needed. */
28593 if (crtl
->calls_eh_return
)
28595 unsigned int i
, regno
;
28599 rtx reg
= gen_rtx_REG (reg_mode
, 2);
28600 emit_insn (gen_frame_load (reg
, frame_reg_rtx
,
28601 frame_off
+ RS6000_TOC_SAVE_SLOT
));
28608 regno
= EH_RETURN_DATA_REGNO (i
);
28609 if (regno
== INVALID_REGNUM
)
28612 mem
= gen_frame_mem_offset (reg_mode
, frame_reg_rtx
,
28613 info
->ehrd_offset
+ frame_off
28614 + reg_size
* (int) i
);
28616 emit_move_insn (gen_rtx_REG (reg_mode
, regno
), mem
);
28620 /* Restore GPRs. This is done as a PARALLEL if we are using
28621 the load-multiple instructions. */
28622 if (!restoring_GPRs_inline
)
28624 /* We are jumping to an out-of-line function. */
28626 int end_save
= info
->gp_save_offset
+ info
->gp_size
;
28627 bool can_use_exit
= end_save
== 0;
28628 int sel
= SAVRES_GPR
| (can_use_exit
? SAVRES_LR
: 0);
28631 /* Emit stack reset code if we need it. */
28632 ptr_regno
= ptr_regno_for_savres (sel
);
28633 ptr_reg
= gen_rtx_REG (Pmode
, ptr_regno
);
28635 rs6000_emit_stack_reset (frame_reg_rtx
, frame_off
, ptr_regno
);
28636 else if (end_save
+ frame_off
!= 0)
28637 emit_insn (gen_add3_insn (ptr_reg
, frame_reg_rtx
,
28638 GEN_INT (end_save
+ frame_off
)));
28639 else if (REGNO (frame_reg_rtx
) != ptr_regno
)
28640 emit_move_insn (ptr_reg
, frame_reg_rtx
);
28641 if (REGNO (frame_reg_rtx
) == ptr_regno
)
28642 frame_off
= -end_save
;
28644 if (can_use_exit
&& info
->cr_save_p
)
28645 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, true);
28647 ptr_off
= -end_save
;
28648 rs6000_emit_savres_rtx (info
, ptr_reg
,
28649 info
->gp_save_offset
+ ptr_off
,
28650 info
->lr_save_offset
+ ptr_off
,
28653 else if (using_load_multiple
)
28656 p
= rtvec_alloc (32 - info
->first_gp_reg_save
);
28657 for (i
= 0; i
< 32 - info
->first_gp_reg_save
; i
++)
28659 = gen_frame_load (gen_rtx_REG (reg_mode
, info
->first_gp_reg_save
+ i
),
28661 info
->gp_save_offset
+ frame_off
+ reg_size
* i
);
28662 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
28666 int offset
= info
->gp_save_offset
+ frame_off
;
28667 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
28669 if (rs6000_reg_live_or_pic_offset_p (i
)
28670 && !cfun
->machine
->gpr_is_wrapped_separately
[i
])
28672 rtx reg
= gen_rtx_REG (reg_mode
, i
);
28673 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
28676 offset
+= reg_size
;
28680 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
28682 /* If the frame pointer was used then we can't delay emitting
28683 a REG_CFA_DEF_CFA note. This must happen on the insn that
28684 restores the frame pointer, r31. We may have already emitted
28685 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28686 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28687 be harmless if emitted. */
28688 if (frame_pointer_needed
)
28690 insn
= get_last_insn ();
28691 add_reg_note (insn
, REG_CFA_DEF_CFA
,
28692 plus_constant (Pmode
, frame_reg_rtx
, frame_off
));
28693 RTX_FRAME_RELATED_P (insn
) = 1;
28696 /* Set up cfa_restores. We always need these when
28697 shrink-wrapping. If not shrink-wrapping then we only need
28698 the cfa_restore when the stack location is no longer valid.
28699 The cfa_restores must be emitted on or before the insn that
28700 invalidates the stack, and of course must not be emitted
28701 before the insn that actually does the restore. The latter
28702 is why it is a bad idea to emit the cfa_restores as a group
28703 on the last instruction here that actually does a restore:
28704 That insn may be reordered with respect to others doing
28706 if (flag_shrink_wrap
28707 && !restoring_GPRs_inline
28708 && info
->first_fp_reg_save
== 64)
28709 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
28711 for (i
= info
->first_gp_reg_save
; i
< 32; i
++)
28712 if (!restoring_GPRs_inline
28713 || using_load_multiple
28714 || rs6000_reg_live_or_pic_offset_p (i
))
28716 if (cfun
->machine
->gpr_is_wrapped_separately
[i
])
28719 rtx reg
= gen_rtx_REG (reg_mode
, i
);
28720 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28724 if (!restoring_GPRs_inline
28725 && info
->first_fp_reg_save
== 64)
28727 /* We are jumping to an out-of-line function. */
28729 emit_cfa_restores (cfa_restores
);
28733 if (restore_lr
&& !restoring_GPRs_inline
)
28735 load_lr_save (0, frame_reg_rtx
, info
->lr_save_offset
+ frame_off
);
28736 restore_saved_lr (0, exit_func
);
28739 /* Restore fpr's if we need to do it without calling a function. */
28740 if (restoring_FPRs_inline
)
28742 int offset
= info
->fp_save_offset
+ frame_off
;
28743 for (i
= info
->first_fp_reg_save
; i
< 64; i
++)
28746 && !cfun
->machine
->fpr_is_wrapped_separately
[i
- 32])
28748 rtx reg
= gen_rtx_REG (fp_reg_mode
, i
);
28749 emit_insn (gen_frame_load (reg
, frame_reg_rtx
, offset
));
28750 if (DEFAULT_ABI
== ABI_V4
|| flag_shrink_wrap
)
28751 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
,
28755 offset
+= fp_reg_size
;
28759 /* If we saved cr, restore it here. Just those that were used. */
28760 if (info
->cr_save_p
)
28761 restore_saved_cr (cr_save_reg
, using_mtcr_multiple
, exit_func
);
28763 /* If this is V.4, unwind the stack pointer after all of the loads
28764 have been done, or set up r11 if we are restoring fp out of line. */
28766 if (!restoring_FPRs_inline
)
28768 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
28769 int sel
= SAVRES_FPR
| (lr
? SAVRES_LR
: 0);
28770 ptr_regno
= ptr_regno_for_savres (sel
);
28773 insn
= rs6000_emit_stack_reset (frame_reg_rtx
, frame_off
, ptr_regno
);
28774 if (REGNO (frame_reg_rtx
) == ptr_regno
)
28777 if (insn
&& restoring_FPRs_inline
)
28781 REG_NOTES (insn
) = cfa_restores
;
28782 cfa_restores
= NULL_RTX
;
28784 add_reg_note (insn
, REG_CFA_DEF_CFA
, sp_reg_rtx
);
28785 RTX_FRAME_RELATED_P (insn
) = 1;
28788 if (crtl
->calls_eh_return
)
28790 rtx sa
= EH_RETURN_STACKADJ_RTX
;
28791 emit_insn (gen_add3_insn (sp_reg_rtx
, sp_reg_rtx
, sa
));
28794 if (!sibcall
&& restoring_FPRs_inline
)
28798 /* We can't hang the cfa_restores off a simple return,
28799 since the shrink-wrap code sometimes uses an existing
28800 return. This means there might be a path from
28801 pre-prologue code to this return, and dwarf2cfi code
28802 wants the eh_frame unwinder state to be the same on
28803 all paths to any point. So we need to emit the
28804 cfa_restores before the return. For -m64 we really
28805 don't need epilogue cfa_restores at all, except for
28806 this irritating dwarf2cfi with shrink-wrap
28807 requirement; The stack red-zone means eh_frame info
28808 from the prologue telling the unwinder to restore
28809 from the stack is perfectly good right to the end of
28811 emit_insn (gen_blockage ());
28812 emit_cfa_restores (cfa_restores
);
28813 cfa_restores
= NULL_RTX
;
28816 emit_jump_insn (targetm
.gen_simple_return ());
28819 if (!sibcall
&& !restoring_FPRs_inline
)
28821 bool lr
= (strategy
& REST_NOINLINE_FPRS_DOESNT_RESTORE_LR
) == 0;
28822 rtvec p
= rtvec_alloc (3 + !!lr
+ 64 - info
->first_fp_reg_save
);
28824 RTVEC_ELT (p
, elt
++) = ret_rtx
;
28826 RTVEC_ELT (p
, elt
++)
28827 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
28829 /* We have to restore more than two FP registers, so branch to the
28830 restore function. It will return to our caller. */
28835 if (flag_shrink_wrap
)
28836 cfa_restores
= add_crlr_cfa_restore (info
, cfa_restores
);
28838 sym
= rs6000_savres_routine_sym (info
, SAVRES_FPR
| (lr
? SAVRES_LR
: 0));
28839 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, sym
);
28840 reg
= (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)? 1 : 11;
28841 RTVEC_ELT (p
, elt
++) = gen_rtx_USE (VOIDmode
, gen_rtx_REG (Pmode
, reg
));
28843 for (i
= 0; i
< 64 - info
->first_fp_reg_save
; i
++)
28845 rtx reg
= gen_rtx_REG (DFmode
, info
->first_fp_reg_save
+ i
);
28847 RTVEC_ELT (p
, elt
++)
28848 = gen_frame_load (reg
, sp_reg_rtx
, info
->fp_save_offset
+ 8 * i
);
28849 if (flag_shrink_wrap
)
28850 cfa_restores
= alloc_reg_note (REG_CFA_RESTORE
, reg
, cfa_restores
);
28853 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
28859 /* Ensure the cfa_restores are hung off an insn that won't
28860 be reordered above other restores. */
28861 emit_insn (gen_blockage ());
28863 emit_cfa_restores (cfa_restores
);
28867 /* Write function epilogue. */
28870 rs6000_output_function_epilogue (FILE *file
,
28871 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
28874 macho_branch_islands ();
28877 rtx_insn
*insn
= get_last_insn ();
28878 rtx_insn
*deleted_debug_label
= NULL
;
28880 /* Mach-O doesn't support labels at the end of objects, so if
28881 it looks like we might want one, take special action.
28883 First, collect any sequence of deleted debug labels. */
28886 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
28888 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28889 notes only, instead set their CODE_LABEL_NUMBER to -1,
28890 otherwise there would be code generation differences
28891 in between -g and -g0. */
28892 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
28893 deleted_debug_label
= insn
;
28894 insn
= PREV_INSN (insn
);
28897 /* Second, if we have:
28900 then this needs to be detected, so skip past the barrier. */
28902 if (insn
&& BARRIER_P (insn
))
28903 insn
= PREV_INSN (insn
);
28905 /* Up to now we've only seen notes or barriers. */
28910 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
28911 /* Trailing label: <barrier>. */
28912 fputs ("\tnop\n", file
);
28915 /* Lastly, see if we have a completely empty function body. */
28916 while (insn
&& ! INSN_P (insn
))
28917 insn
= PREV_INSN (insn
);
28918 /* If we don't find any insns, we've got an empty function body;
28919 I.e. completely empty - without a return or branch. This is
28920 taken as the case where a function body has been removed
28921 because it contains an inline __builtin_unreachable(). GCC
28922 states that reaching __builtin_unreachable() means UB so we're
28923 not obliged to do anything special; however, we want
28924 non-zero-sized function bodies. To meet this, and help the
28925 user out, let's trap the case. */
28927 fputs ("\ttrap\n", file
);
28930 else if (deleted_debug_label
)
28931 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
28932 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
28933 CODE_LABEL_NUMBER (insn
) = -1;
28937 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28940 We don't output a traceback table if -finhibit-size-directive was
28941 used. The documentation for -finhibit-size-directive reads
28942 ``don't output a @code{.size} assembler directive, or anything
28943 else that would cause trouble if the function is split in the
28944 middle, and the two halves are placed at locations far apart in
28945 memory.'' The traceback table has this property, since it
28946 includes the offset from the start of the function to the
28947 traceback table itself.
28949 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28950 different traceback table. */
28951 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
28952 && ! flag_inhibit_size_directive
28953 && rs6000_traceback
!= traceback_none
&& !cfun
->is_thunk
)
28955 const char *fname
= NULL
;
28956 const char *language_string
= lang_hooks
.name
;
28957 int fixed_parms
= 0, float_parms
= 0, parm_info
= 0;
28959 int optional_tbtab
;
28960 rs6000_stack_t
*info
= rs6000_stack_info ();
28962 if (rs6000_traceback
== traceback_full
)
28963 optional_tbtab
= 1;
28964 else if (rs6000_traceback
== traceback_part
)
28965 optional_tbtab
= 0;
28967 optional_tbtab
= !optimize_size
&& !TARGET_ELF
;
28969 if (optional_tbtab
)
28971 fname
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
28972 while (*fname
== '.') /* V.4 encodes . in the name */
28975 /* Need label immediately before tbtab, so we can compute
28976 its offset from the function start. */
28977 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
28978 ASM_OUTPUT_LABEL (file
, fname
);
28981 /* The .tbtab pseudo-op can only be used for the first eight
28982 expressions, since it can't handle the possibly variable
28983 length fields that follow. However, if you omit the optional
28984 fields, the assembler outputs zeros for all optional fields
28985 anyways, giving each variable length field is minimum length
28986 (as defined in sys/debug.h). Thus we can not use the .tbtab
28987 pseudo-op at all. */
28989 /* An all-zero word flags the start of the tbtab, for debuggers
28990 that have to find it by searching forward from the entry
28991 point or from the current pc. */
28992 fputs ("\t.long 0\n", file
);
28994 /* Tbtab format type. Use format type 0. */
28995 fputs ("\t.byte 0,", file
);
28997 /* Language type. Unfortunately, there does not seem to be any
28998 official way to discover the language being compiled, so we
28999 use language_string.
29000 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
29001 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29002 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
29003 either, so for now use 0. */
29005 || ! strcmp (language_string
, "GNU GIMPLE")
29006 || ! strcmp (language_string
, "GNU Go")
29007 || ! strcmp (language_string
, "libgccjit"))
29009 else if (! strcmp (language_string
, "GNU F77")
29010 || lang_GNU_Fortran ())
29012 else if (! strcmp (language_string
, "GNU Pascal"))
29014 else if (! strcmp (language_string
, "GNU Ada"))
29016 else if (lang_GNU_CXX ()
29017 || ! strcmp (language_string
, "GNU Objective-C++"))
29019 else if (! strcmp (language_string
, "GNU Java"))
29021 else if (! strcmp (language_string
, "GNU Objective-C"))
29024 gcc_unreachable ();
29025 fprintf (file
, "%d,", i
);
29027 /* 8 single bit fields: global linkage (not set for C extern linkage,
29028 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29029 from start of procedure stored in tbtab, internal function, function
29030 has controlled storage, function has no toc, function uses fp,
29031 function logs/aborts fp operations. */
29032 /* Assume that fp operations are used if any fp reg must be saved. */
29033 fprintf (file
, "%d,",
29034 (optional_tbtab
<< 5) | ((info
->first_fp_reg_save
!= 64) << 1));
29036 /* 6 bitfields: function is interrupt handler, name present in
29037 proc table, function calls alloca, on condition directives
29038 (controls stack walks, 3 bits), saves condition reg, saves
29040 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29041 set up as a frame pointer, even when there is no alloca call. */
29042 fprintf (file
, "%d,",
29043 ((optional_tbtab
<< 6)
29044 | ((optional_tbtab
& frame_pointer_needed
) << 5)
29045 | (info
->cr_save_p
<< 1)
29046 | (info
->lr_save_p
)));
29048 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29050 fprintf (file
, "%d,",
29051 (info
->push_p
<< 7) | (64 - info
->first_fp_reg_save
));
29053 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29054 fprintf (file
, "%d,", (32 - first_reg_to_save ()));
29056 if (optional_tbtab
)
29058 /* Compute the parameter info from the function decl argument
29061 int next_parm_info_bit
= 31;
29063 for (decl
= DECL_ARGUMENTS (current_function_decl
);
29064 decl
; decl
= DECL_CHAIN (decl
))
29066 rtx parameter
= DECL_INCOMING_RTL (decl
);
29067 machine_mode mode
= GET_MODE (parameter
);
29069 if (GET_CODE (parameter
) == REG
)
29071 if (SCALAR_FLOAT_MODE_P (mode
))
29094 gcc_unreachable ();
29097 /* If only one bit will fit, don't or in this entry. */
29098 if (next_parm_info_bit
> 0)
29099 parm_info
|= (bits
<< (next_parm_info_bit
- 1));
29100 next_parm_info_bit
-= 2;
29104 fixed_parms
+= ((GET_MODE_SIZE (mode
)
29105 + (UNITS_PER_WORD
- 1))
29107 next_parm_info_bit
-= 1;
29113 /* Number of fixed point parameters. */
29114 /* This is actually the number of words of fixed point parameters; thus
29115 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29116 fprintf (file
, "%d,", fixed_parms
);
29118 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29120 /* This is actually the number of fp registers that hold parameters;
29121 and thus the maximum value is 13. */
29122 /* Set parameters on stack bit if parameters are not in their original
29123 registers, regardless of whether they are on the stack? Xlc
29124 seems to set the bit when not optimizing. */
29125 fprintf (file
, "%d\n", ((float_parms
<< 1) | (! optimize
)));
29127 if (optional_tbtab
)
29129 /* Optional fields follow. Some are variable length. */
29131 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
29132 float, 11 double float. */
29133 /* There is an entry for each parameter in a register, in the order
29134 that they occur in the parameter list. Any intervening arguments
29135 on the stack are ignored. If the list overflows a long (max
29136 possible length 34 bits) then completely leave off all elements
29138 /* Only emit this long if there was at least one parameter. */
29139 if (fixed_parms
|| float_parms
)
29140 fprintf (file
, "\t.long %d\n", parm_info
);
29142 /* Offset from start of code to tb table. */
29143 fputs ("\t.long ", file
);
29144 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LT");
29145 RS6000_OUTPUT_BASENAME (file
, fname
);
29147 rs6000_output_function_entry (file
, fname
);
29150 /* Interrupt handler mask. */
29151 /* Omit this long, since we never set the interrupt handler bit
29154 /* Number of CTL (controlled storage) anchors. */
29155 /* Omit this long, since the has_ctl bit is never set above. */
29157 /* Displacement into stack of each CTL anchor. */
29158 /* Omit this list of longs, because there are no CTL anchors. */
29160 /* Length of function name. */
29163 fprintf (file
, "\t.short %d\n", (int) strlen (fname
));
29165 /* Function name. */
29166 assemble_string (fname
, strlen (fname
));
29168 /* Register for alloca automatic storage; this is always reg 31.
29169 Only emit this if the alloca bit was set above. */
29170 if (frame_pointer_needed
)
29171 fputs ("\t.byte 31\n", file
);
29173 fputs ("\t.align 2\n", file
);
29177 /* Arrange to define .LCTOC1 label, if not already done. */
29181 if (!toc_initialized
)
29183 switch_to_section (toc_section
);
29184 switch_to_section (current_function_section ());
29189 /* -fsplit-stack support. */
29191 /* A SYMBOL_REF for __morestack. */
29192 static GTY(()) rtx morestack_ref
;
29195 gen_add3_const (rtx rt
, rtx ra
, long c
)
29198 return gen_adddi3 (rt
, ra
, GEN_INT (c
));
29200 return gen_addsi3 (rt
, ra
, GEN_INT (c
));
29203 /* Emit -fsplit-stack prologue, which goes before the regular function
29204 prologue (at local entry point in the case of ELFv2). */
29207 rs6000_expand_split_stack_prologue (void)
29209 rs6000_stack_t
*info
= rs6000_stack_info ();
29210 unsigned HOST_WIDE_INT allocate
;
29211 long alloc_hi
, alloc_lo
;
29212 rtx r0
, r1
, r12
, lr
, ok_label
, compare
, jump
, call_fusage
;
29215 gcc_assert (flag_split_stack
&& reload_completed
);
29220 if (global_regs
[29])
29222 error ("-fsplit-stack uses register r29");
29223 inform (DECL_SOURCE_LOCATION (global_regs_decl
[29]),
29224 "conflicts with %qD", global_regs_decl
[29]);
29227 allocate
= info
->total_size
;
29228 if (allocate
> (unsigned HOST_WIDE_INT
) 1 << 31)
29230 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29233 if (morestack_ref
== NULL_RTX
)
29235 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
29236 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
29237 | SYMBOL_FLAG_FUNCTION
);
29240 r0
= gen_rtx_REG (Pmode
, 0);
29241 r1
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
29242 r12
= gen_rtx_REG (Pmode
, 12);
29243 emit_insn (gen_load_split_stack_limit (r0
));
29244 /* Always emit two insns here to calculate the requested stack,
29245 so that the linker can edit them when adjusting size for calling
29246 non-split-stack code. */
29247 alloc_hi
= (-allocate
+ 0x8000) & ~0xffffL
;
29248 alloc_lo
= -allocate
- alloc_hi
;
29251 emit_insn (gen_add3_const (r12
, r1
, alloc_hi
));
29253 emit_insn (gen_add3_const (r12
, r12
, alloc_lo
));
29255 emit_insn (gen_nop ());
29259 emit_insn (gen_add3_const (r12
, r1
, alloc_lo
));
29260 emit_insn (gen_nop ());
29263 compare
= gen_rtx_REG (CCUNSmode
, CR7_REGNO
);
29264 emit_insn (gen_rtx_SET (compare
, gen_rtx_COMPARE (CCUNSmode
, r12
, r0
)));
29265 ok_label
= gen_label_rtx ();
29266 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
29267 gen_rtx_GEU (VOIDmode
, compare
, const0_rtx
),
29268 gen_rtx_LABEL_REF (VOIDmode
, ok_label
),
29270 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
29271 JUMP_LABEL (insn
) = ok_label
;
29272 /* Mark the jump as very likely to be taken. */
29273 add_int_reg_note (insn
, REG_BR_PROB
,
29274 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
29276 lr
= gen_rtx_REG (Pmode
, LR_REGNO
);
29277 insn
= emit_move_insn (r0
, lr
);
29278 RTX_FRAME_RELATED_P (insn
) = 1;
29279 insn
= emit_insn (gen_frame_store (r0
, r1
, info
->lr_save_offset
));
29280 RTX_FRAME_RELATED_P (insn
) = 1;
29282 insn
= emit_call_insn (gen_call (gen_rtx_MEM (SImode
, morestack_ref
),
29283 const0_rtx
, const0_rtx
));
29284 call_fusage
= NULL_RTX
;
29285 use_reg (&call_fusage
, r12
);
29286 /* Say the call uses r0, even though it doesn't, to stop regrename
29287 from twiddling with the insns saving lr, trashing args for cfun.
29288 The insns restoring lr are similarly protected by making
29289 split_stack_return use r0. */
29290 use_reg (&call_fusage
, r0
);
29291 add_function_usage_to (insn
, call_fusage
);
29292 /* Indicate that this function can't jump to non-local gotos. */
29293 make_reg_eh_region_note_nothrow_nononlocal (insn
);
29294 emit_insn (gen_frame_load (r0
, r1
, info
->lr_save_offset
));
29295 insn
= emit_move_insn (lr
, r0
);
29296 add_reg_note (insn
, REG_CFA_RESTORE
, lr
);
29297 RTX_FRAME_RELATED_P (insn
) = 1;
29298 emit_insn (gen_split_stack_return ());
29300 emit_label (ok_label
);
29301 LABEL_NUSES (ok_label
) = 1;
29304 /* Return the internal arg pointer used for function incoming
29305 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29306 to copy it to a pseudo in order for it to be preserved over calls
29307 and suchlike. We'd really like to use a pseudo here for the
29308 internal arg pointer but data-flow analysis is not prepared to
29309 accept pseudos as live at the beginning of a function. */
29312 rs6000_internal_arg_pointer (void)
29314 if (flag_split_stack
29315 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
29319 if (cfun
->machine
->split_stack_arg_pointer
== NULL_RTX
)
29323 cfun
->machine
->split_stack_arg_pointer
= gen_reg_rtx (Pmode
);
29324 REG_POINTER (cfun
->machine
->split_stack_arg_pointer
) = 1;
29326 /* Put the pseudo initialization right after the note at the
29327 beginning of the function. */
29328 pat
= gen_rtx_SET (cfun
->machine
->split_stack_arg_pointer
,
29329 gen_rtx_REG (Pmode
, 12));
29330 push_topmost_sequence ();
29331 emit_insn_after (pat
, get_insns ());
29332 pop_topmost_sequence ();
29334 return plus_constant (Pmode
, cfun
->machine
->split_stack_arg_pointer
,
29335 FIRST_PARM_OFFSET (current_function_decl
));
29337 return virtual_incoming_args_rtx
;
29340 /* We may have to tell the dataflow pass that the split stack prologue
29341 is initializing a register. */
29344 rs6000_live_on_entry (bitmap regs
)
29346 if (flag_split_stack
)
29347 bitmap_set_bit (regs
, 12);
29350 /* Emit -fsplit-stack dynamic stack allocation space check. */
29353 rs6000_split_stack_space_check (rtx size
, rtx label
)
29355 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
29356 rtx limit
= gen_reg_rtx (Pmode
);
29357 rtx requested
= gen_reg_rtx (Pmode
);
29358 rtx cmp
= gen_reg_rtx (CCUNSmode
);
29361 emit_insn (gen_load_split_stack_limit (limit
));
29362 if (CONST_INT_P (size
))
29363 emit_insn (gen_add3_insn (requested
, sp
, GEN_INT (-INTVAL (size
))));
29366 size
= force_reg (Pmode
, size
);
29367 emit_move_insn (requested
, gen_rtx_MINUS (Pmode
, sp
, size
));
29369 emit_insn (gen_rtx_SET (cmp
, gen_rtx_COMPARE (CCUNSmode
, requested
, limit
)));
29370 jump
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
29371 gen_rtx_GEU (VOIDmode
, cmp
, const0_rtx
),
29372 gen_rtx_LABEL_REF (VOIDmode
, label
),
29374 jump
= emit_jump_insn (gen_rtx_SET (pc_rtx
, jump
));
29375 JUMP_LABEL (jump
) = label
;
29378 /* A C compound statement that outputs the assembler code for a thunk
29379 function, used to implement C++ virtual function calls with
29380 multiple inheritance. The thunk acts as a wrapper around a virtual
29381 function, adjusting the implicit object parameter before handing
29382 control off to the real function.
29384 First, emit code to add the integer DELTA to the location that
29385 contains the incoming first argument. Assume that this argument
29386 contains a pointer, and is the one used to pass the `this' pointer
29387 in C++. This is the incoming argument *before* the function
29388 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29389 values of all other incoming arguments.
29391 After the addition, emit code to jump to FUNCTION, which is a
29392 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29393 not touch the return address. Hence returning from FUNCTION will
29394 return to whoever called the current `thunk'.
29396 The effect must be as if FUNCTION had been called directly with the
29397 adjusted first argument. This macro is responsible for emitting
29398 all of the code for a thunk function; output_function_prologue()
29399 and output_function_epilogue() are not invoked.
29401 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29402 been extracted from it.) It might possibly be useful on some
29403 targets, but probably not.
29405 If you do not define this macro, the target-independent code in the
29406 C++ frontend will generate a less efficient heavyweight thunk that
29407 calls FUNCTION instead of jumping to it. The generic approach does
29408 not support varargs. */
29411 rs6000_output_mi_thunk (FILE *file
, tree thunk_fndecl ATTRIBUTE_UNUSED
,
29412 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
29415 rtx this_rtx
, funexp
;
29418 reload_completed
= 1;
29419 epilogue_completed
= 1;
29421 /* Mark the end of the (empty) prologue. */
29422 emit_note (NOTE_INSN_PROLOGUE_END
);
29424 /* Find the "this" pointer. If the function returns a structure,
29425 the structure return pointer is in r3. */
29426 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
29427 this_rtx
= gen_rtx_REG (Pmode
, 4);
29429 this_rtx
= gen_rtx_REG (Pmode
, 3);
29431 /* Apply the constant offset, if required. */
29433 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, GEN_INT (delta
)));
29435 /* Apply the offset from the vtable, if required. */
29438 rtx vcall_offset_rtx
= GEN_INT (vcall_offset
);
29439 rtx tmp
= gen_rtx_REG (Pmode
, 12);
29441 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, this_rtx
));
29442 if (((unsigned HOST_WIDE_INT
) vcall_offset
) + 0x8000 >= 0x10000)
29444 emit_insn (gen_add3_insn (tmp
, tmp
, vcall_offset_rtx
));
29445 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, tmp
));
29449 rtx loc
= gen_rtx_PLUS (Pmode
, tmp
, vcall_offset_rtx
);
29451 emit_move_insn (tmp
, gen_rtx_MEM (Pmode
, loc
));
29453 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, tmp
));
29456 /* Generate a tail call to the target function. */
29457 if (!TREE_USED (function
))
29459 assemble_external (function
);
29460 TREE_USED (function
) = 1;
29462 funexp
= XEXP (DECL_RTL (function
), 0);
29463 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29466 if (MACHOPIC_INDIRECT
)
29467 funexp
= machopic_indirect_call_target (funexp
);
29470 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29471 generate sibcall RTL explicitly. */
29472 insn
= emit_call_insn (
29473 gen_rtx_PARALLEL (VOIDmode
,
29475 gen_rtx_CALL (VOIDmode
,
29476 funexp
, const0_rtx
),
29477 gen_rtx_USE (VOIDmode
, const0_rtx
),
29478 simple_return_rtx
)));
29479 SIBLING_CALL_P (insn
) = 1;
29482 /* Run just enough of rest_of_compilation to get the insns emitted.
29483 There's not really enough bulk here to make other passes such as
29484 instruction scheduling worth while. Note that use_thunk calls
29485 assemble_start_function and assemble_end_function. */
29486 insn
= get_insns ();
29487 shorten_branches (insn
);
29488 final_start_function (insn
, file
, 1);
29489 final (insn
, file
, 1);
29490 final_end_function ();
29492 reload_completed
= 0;
29493 epilogue_completed
= 0;
29496 /* A quick summary of the various types of 'constant-pool tables'
29499 Target Flags Name One table per
29500 AIX (none) AIX TOC object file
29501 AIX -mfull-toc AIX TOC object file
29502 AIX -mminimal-toc AIX minimal TOC translation unit
29503 SVR4/EABI (none) SVR4 SDATA object file
29504 SVR4/EABI -fpic SVR4 pic object file
29505 SVR4/EABI -fPIC SVR4 PIC translation unit
29506 SVR4/EABI -mrelocatable EABI TOC function
29507 SVR4/EABI -maix AIX TOC object file
29508 SVR4/EABI -maix -mminimal-toc
29509 AIX minimal TOC translation unit
29511 Name Reg. Set by entries contains:
29512 made by addrs? fp? sum?
29514 AIX TOC 2 crt0 as Y option option
29515 AIX minimal TOC 30 prolog gcc Y Y option
29516 SVR4 SDATA 13 crt0 gcc N Y N
29517 SVR4 pic 30 prolog ld Y not yet N
29518 SVR4 PIC 30 prolog gcc Y option option
29519 EABI TOC 30 prolog gcc Y option option
29523 /* Hash functions for the hash table. */
29526 rs6000_hash_constant (rtx k
)
29528 enum rtx_code code
= GET_CODE (k
);
29529 machine_mode mode
= GET_MODE (k
);
29530 unsigned result
= (code
<< 3) ^ mode
;
29531 const char *format
;
29534 format
= GET_RTX_FORMAT (code
);
29535 flen
= strlen (format
);
29541 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
29543 case CONST_WIDE_INT
:
29546 flen
= CONST_WIDE_INT_NUNITS (k
);
29547 for (i
= 0; i
< flen
; i
++)
29548 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
29553 if (mode
!= VOIDmode
)
29554 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
29566 for (; fidx
< flen
; fidx
++)
29567 switch (format
[fidx
])
29572 const char *str
= XSTR (k
, fidx
);
29573 len
= strlen (str
);
29574 result
= result
* 613 + len
;
29575 for (i
= 0; i
< len
; i
++)
29576 result
= result
* 613 + (unsigned) str
[i
];
29581 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
29585 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
29588 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
29589 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
29593 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
29594 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
29601 gcc_unreachable ();
29608 toc_hasher::hash (toc_hash_struct
*thc
)
29610 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
29613 /* Compare H1 and H2 for equivalence. */
29616 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
29621 if (h1
->key_mode
!= h2
->key_mode
)
29624 return rtx_equal_p (r1
, r2
);
29627 /* These are the names given by the C++ front-end to vtables, and
29628 vtable-like objects. Ideally, this logic should not be here;
29629 instead, there should be some programmatic way of inquiring as
29630 to whether or not an object is a vtable. */
29632 #define VTABLE_NAME_P(NAME) \
29633 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29634 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29635 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29636 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29637 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29639 #ifdef NO_DOLLAR_IN_LABEL
29640 /* Return a GGC-allocated character string translating dollar signs in
29641 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29644 rs6000_xcoff_strip_dollar (const char *name
)
29650 q
= (const char *) strchr (name
, '$');
29652 if (q
== 0 || q
== name
)
29655 len
= strlen (name
);
29656 strip
= XALLOCAVEC (char, len
+ 1);
29657 strcpy (strip
, name
);
29658 p
= strip
+ (q
- name
);
29662 p
= strchr (p
+ 1, '$');
29665 return ggc_alloc_string (strip
, len
);
29670 rs6000_output_symbol_ref (FILE *file
, rtx x
)
29672 const char *name
= XSTR (x
, 0);
29674 /* Currently C++ toc references to vtables can be emitted before it
29675 is decided whether the vtable is public or private. If this is
29676 the case, then the linker will eventually complain that there is
29677 a reference to an unknown section. Thus, for vtables only,
29678 we emit the TOC reference to reference the identifier and not the
29680 if (VTABLE_NAME_P (name
))
29682 RS6000_OUTPUT_BASENAME (file
, name
);
29685 assemble_name (file
, name
);
29688 /* Output a TOC entry. We derive the entry name from what is being
29692 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
29695 const char *name
= buf
;
29697 HOST_WIDE_INT offset
= 0;
29699 gcc_assert (!TARGET_NO_TOC
);
29701 /* When the linker won't eliminate them, don't output duplicate
29702 TOC entries (this happens on AIX if there is any kind of TOC,
29703 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29705 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
29707 struct toc_hash_struct
*h
;
29709 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29710 time because GGC is not initialized at that point. */
29711 if (toc_hash_table
== NULL
)
29712 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
29714 h
= ggc_alloc
<toc_hash_struct
> ();
29716 h
->key_mode
= mode
;
29717 h
->labelno
= labelno
;
29719 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
29720 if (*found
== NULL
)
29722 else /* This is indeed a duplicate.
29723 Set this label equal to that label. */
29725 fputs ("\t.set ", file
);
29726 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
29727 fprintf (file
, "%d,", labelno
);
29728 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
29729 fprintf (file
, "%d\n", ((*found
)->labelno
));
29732 if (TARGET_XCOFF
&& GET_CODE (x
) == SYMBOL_REF
29733 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
29734 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
29736 fputs ("\t.set ", file
);
29737 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
29738 fprintf (file
, "%d,", labelno
);
29739 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
29740 fprintf (file
, "%d\n", ((*found
)->labelno
));
29747 /* If we're going to put a double constant in the TOC, make sure it's
29748 aligned properly when strict alignment is on. */
29749 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
29750 && STRICT_ALIGNMENT
29751 && GET_MODE_BITSIZE (mode
) >= 64
29752 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
29753 ASM_OUTPUT_ALIGN (file
, 3);
29756 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
29758 /* Handle FP constants specially. Note that if we have a minimal
29759 TOC, things we put here aren't actually in the TOC, so we can allow
29761 if (GET_CODE (x
) == CONST_DOUBLE
&&
29762 (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
29763 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
29767 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
29768 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
29770 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
29774 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29775 fputs (DOUBLE_INT_ASM_OP
, file
);
29777 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29778 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
29779 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
29780 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
29781 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
29782 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
29783 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
29784 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
29789 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29790 fputs ("\t.long ", file
);
29792 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29793 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
29794 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
29795 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29796 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
29797 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
29801 else if (GET_CODE (x
) == CONST_DOUBLE
&&
29802 (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
29806 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
29807 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
29809 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
29813 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29814 fputs (DOUBLE_INT_ASM_OP
, file
);
29816 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
29817 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
29818 fprintf (file
, "0x%lx%08lx\n",
29819 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
29820 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
29825 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29826 fputs ("\t.long ", file
);
29828 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
29829 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
29830 fprintf (file
, "0x%lx,0x%lx\n",
29831 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
29835 else if (GET_CODE (x
) == CONST_DOUBLE
&&
29836 (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
29840 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
29841 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
29843 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
29847 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29848 fputs (DOUBLE_INT_ASM_OP
, file
);
29850 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
29851 if (WORDS_BIG_ENDIAN
)
29852 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
29854 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
29859 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29860 fputs ("\t.long ", file
);
29862 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
29863 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
29867 else if (GET_MODE (x
) == VOIDmode
&& GET_CODE (x
) == CONST_INT
)
29869 unsigned HOST_WIDE_INT low
;
29870 HOST_WIDE_INT high
;
29872 low
= INTVAL (x
) & 0xffffffff;
29873 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
29875 /* TOC entries are always Pmode-sized, so when big-endian
29876 smaller integer constants in the TOC need to be padded.
29877 (This is still a win over putting the constants in
29878 a separate constant pool, because then we'd have
29879 to have both a TOC entry _and_ the actual constant.)
29881 For a 32-bit target, CONST_INT values are loaded and shifted
29882 entirely within `low' and can be stored in one TOC entry. */
29884 /* It would be easy to make this work, but it doesn't now. */
29885 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
29887 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
29890 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
29891 high
= (HOST_WIDE_INT
) low
>> 32;
29897 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29898 fputs (DOUBLE_INT_ASM_OP
, file
);
29900 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
29901 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
29902 fprintf (file
, "0x%lx%08lx\n",
29903 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
29908 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
29910 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29911 fputs ("\t.long ", file
);
29913 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
29914 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
29915 fprintf (file
, "0x%lx,0x%lx\n",
29916 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
29920 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29921 fputs ("\t.long ", file
);
29923 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
29924 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
29930 if (GET_CODE (x
) == CONST
)
29932 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
29933 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
);
29935 base
= XEXP (XEXP (x
, 0), 0);
29936 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
29939 switch (GET_CODE (base
))
29942 name
= XSTR (base
, 0);
29946 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
29947 CODE_LABEL_NUMBER (XEXP (base
, 0)));
29951 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
29955 gcc_unreachable ();
29958 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
29959 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
29962 fputs ("\t.tc ", file
);
29963 RS6000_OUTPUT_BASENAME (file
, name
);
29966 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
29968 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
29970 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29971 after other TOC symbols, reducing overflow of small TOC access
29972 to [TC] symbols. */
29973 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
29974 ? "[TE]," : "[TC],", file
);
29977 /* Currently C++ toc references to vtables can be emitted before it
29978 is decided whether the vtable is public or private. If this is
29979 the case, then the linker will eventually complain that there is
29980 a TOC reference to an unknown section. Thus, for vtables only,
29981 we emit the TOC reference to reference the symbol and not the
29983 if (VTABLE_NAME_P (name
))
29985 RS6000_OUTPUT_BASENAME (file
, name
);
29987 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
29988 else if (offset
> 0)
29989 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
29992 output_addr_const (file
, x
);
29995 if (TARGET_XCOFF
&& GET_CODE (base
) == SYMBOL_REF
)
29997 switch (SYMBOL_REF_TLS_MODEL (base
))
30001 case TLS_MODEL_LOCAL_EXEC
:
30002 fputs ("@le", file
);
30004 case TLS_MODEL_INITIAL_EXEC
:
30005 fputs ("@ie", file
);
30007 /* Use global-dynamic for local-dynamic. */
30008 case TLS_MODEL_GLOBAL_DYNAMIC
:
30009 case TLS_MODEL_LOCAL_DYNAMIC
:
30011 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
30012 fputs ("\t.tc .", file
);
30013 RS6000_OUTPUT_BASENAME (file
, name
);
30014 fputs ("[TC],", file
);
30015 output_addr_const (file
, x
);
30016 fputs ("@m", file
);
30019 gcc_unreachable ();
30027 /* Output an assembler pseudo-op to write an ASCII string of N characters
30028 starting at P to FILE.
30030 On the RS/6000, we have to do this using the .byte operation and
30031 write out special characters outside the quoted string.
30032 Also, the assembler is broken; very long strings are truncated,
30033 so we must artificially break them up early. */
30036 output_ascii (FILE *file
, const char *p
, int n
)
30039 int i
, count_string
;
30040 const char *for_string
= "\t.byte \"";
30041 const char *for_decimal
= "\t.byte ";
30042 const char *to_close
= NULL
;
30045 for (i
= 0; i
< n
; i
++)
30048 if (c
>= ' ' && c
< 0177)
30051 fputs (for_string
, file
);
30054 /* Write two quotes to get one. */
30062 for_decimal
= "\"\n\t.byte ";
30066 if (count_string
>= 512)
30068 fputs (to_close
, file
);
30070 for_string
= "\t.byte \"";
30071 for_decimal
= "\t.byte ";
30079 fputs (for_decimal
, file
);
30080 fprintf (file
, "%d", c
);
30082 for_string
= "\n\t.byte \"";
30083 for_decimal
= ", ";
30089 /* Now close the string if we have written one. Then end the line. */
30091 fputs (to_close
, file
);
30094 /* Generate a unique section name for FILENAME for a section type
30095 represented by SECTION_DESC. Output goes into BUF.
30097 SECTION_DESC can be any string, as long as it is different for each
30098 possible section type.
30100 We name the section in the same manner as xlc. The name begins with an
30101 underscore followed by the filename (after stripping any leading directory
30102 names) with the last period replaced by the string SECTION_DESC. If
30103 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30107 rs6000_gen_section_name (char **buf
, const char *filename
,
30108 const char *section_desc
)
30110 const char *q
, *after_last_slash
, *last_period
= 0;
30114 after_last_slash
= filename
;
30115 for (q
= filename
; *q
; q
++)
30118 after_last_slash
= q
+ 1;
30119 else if (*q
== '.')
30123 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
30124 *buf
= (char *) xmalloc (len
);
30129 for (q
= after_last_slash
; *q
; q
++)
30131 if (q
== last_period
)
30133 strcpy (p
, section_desc
);
30134 p
+= strlen (section_desc
);
30138 else if (ISALNUM (*q
))
30142 if (last_period
== 0)
30143 strcpy (p
, section_desc
);
30148 /* Emit profile function. */
30151 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
30153 /* Non-standard profiling for kernels, which just saves LR then calls
30154 _mcount without worrying about arg saves. The idea is to change
30155 the function prologue as little as possible as it isn't easy to
30156 account for arg save/restore code added just for _mcount. */
30157 if (TARGET_PROFILE_KERNEL
)
30160 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
30162 #ifndef NO_PROFILE_COUNTERS
30163 # define NO_PROFILE_COUNTERS 0
30165 if (NO_PROFILE_COUNTERS
)
30166 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
30167 LCT_NORMAL
, VOIDmode
, 0);
30171 const char *label_name
;
30174 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
30175 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
30176 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
30178 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
30179 LCT_NORMAL
, VOIDmode
, 1, fun
, Pmode
);
30182 else if (DEFAULT_ABI
== ABI_DARWIN
)
30184 const char *mcount_name
= RS6000_MCOUNT
;
30185 int caller_addr_regno
= LR_REGNO
;
30187 /* Be conservative and always set this, at least for now. */
30188 crtl
->uses_pic_offset_table
= 1;
30191 /* For PIC code, set up a stub and collect the caller's address
30192 from r0, which is where the prologue puts it. */
30193 if (MACHOPIC_INDIRECT
30194 && crtl
->uses_pic_offset_table
)
30195 caller_addr_regno
= 0;
30197 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
30198 LCT_NORMAL
, VOIDmode
, 1,
30199 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
30203 /* Write function profiler code. */
30206 output_function_profiler (FILE *file
, int labelno
)
30210 switch (DEFAULT_ABI
)
30213 gcc_unreachable ();
30218 warning (0, "no profiling of 64-bit code for this ABI");
30221 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
30222 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
30223 if (NO_PROFILE_COUNTERS
)
30225 asm_fprintf (file
, "\tstw %s,4(%s)\n",
30226 reg_names
[0], reg_names
[1]);
30228 else if (TARGET_SECURE_PLT
&& flag_pic
)
30230 if (TARGET_LINK_STACK
)
30233 get_ppc476_thunk_name (name
);
30234 asm_fprintf (file
, "\tbl %s\n", name
);
30237 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
30238 asm_fprintf (file
, "\tstw %s,4(%s)\n",
30239 reg_names
[0], reg_names
[1]);
30240 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
30241 asm_fprintf (file
, "\taddis %s,%s,",
30242 reg_names
[12], reg_names
[12]);
30243 assemble_name (file
, buf
);
30244 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
30245 assemble_name (file
, buf
);
30246 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
30248 else if (flag_pic
== 1)
30250 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
30251 asm_fprintf (file
, "\tstw %s,4(%s)\n",
30252 reg_names
[0], reg_names
[1]);
30253 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
30254 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
30255 assemble_name (file
, buf
);
30256 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
30258 else if (flag_pic
> 1)
30260 asm_fprintf (file
, "\tstw %s,4(%s)\n",
30261 reg_names
[0], reg_names
[1]);
30262 /* Now, we need to get the address of the label. */
30263 if (TARGET_LINK_STACK
)
30266 get_ppc476_thunk_name (name
);
30267 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
30268 assemble_name (file
, buf
);
30269 fputs ("-.\n1:", file
);
30270 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
30271 asm_fprintf (file
, "\taddi %s,%s,4\n",
30272 reg_names
[11], reg_names
[11]);
30276 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
30277 assemble_name (file
, buf
);
30278 fputs ("-.\n1:", file
);
30279 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
30281 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
30282 reg_names
[0], reg_names
[11]);
30283 asm_fprintf (file
, "\tadd %s,%s,%s\n",
30284 reg_names
[0], reg_names
[0], reg_names
[11]);
30288 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
30289 assemble_name (file
, buf
);
30290 fputs ("@ha\n", file
);
30291 asm_fprintf (file
, "\tstw %s,4(%s)\n",
30292 reg_names
[0], reg_names
[1]);
30293 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
30294 assemble_name (file
, buf
);
30295 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
30298 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30299 fprintf (file
, "\tbl %s%s\n",
30300 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
30306 /* Don't do anything, done in output_profile_hook (). */
30313 /* The following variable value is the last issued insn. */
30315 static rtx_insn
*last_scheduled_insn
;
30317 /* The following variable helps to balance issuing of load and
30318 store instructions */
30320 static int load_store_pendulum
;
30322 /* The following variable helps pair divide insns during scheduling. */
30323 static int divide_cnt
;
30324 /* The following variable helps pair and alternate vector and vector load
30325 insns during scheduling. */
30326 static int vec_pairing
;
30329 /* Power4 load update and store update instructions are cracked into a
30330 load or store and an integer insn which are executed in the same cycle.
30331 Branches have their own dispatch slot which does not count against the
30332 GCC issue rate, but it changes the program flow so there are no other
30333 instructions to issue in this cycle. */
30336 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
30338 last_scheduled_insn
= insn
;
30339 if (GET_CODE (PATTERN (insn
)) == USE
30340 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
30342 cached_can_issue_more
= more
;
30343 return cached_can_issue_more
;
30346 if (insn_terminates_group_p (insn
, current_group
))
30348 cached_can_issue_more
= 0;
30349 return cached_can_issue_more
;
30352 /* If no reservation, but reach here */
30353 if (recog_memoized (insn
) < 0)
30356 if (rs6000_sched_groups
)
30358 if (is_microcoded_insn (insn
))
30359 cached_can_issue_more
= 0;
30360 else if (is_cracked_insn (insn
))
30361 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
30363 cached_can_issue_more
= more
- 1;
30365 return cached_can_issue_more
;
30368 if (rs6000_cpu_attr
== CPU_CELL
&& is_nonpipeline_insn (insn
))
30371 cached_can_issue_more
= more
- 1;
30372 return cached_can_issue_more
;
30376 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
30378 int r
= rs6000_variable_issue_1 (insn
, more
);
30380 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
30384 /* Adjust the cost of a scheduling dependency. Return the new cost of
30385 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30388 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
30391 enum attr_type attr_type
;
30393 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
30400 /* Data dependency; DEP_INSN writes a register that INSN reads
30401 some cycles later. */
30403 /* Separate a load from a narrower, dependent store. */
30404 if ((rs6000_sched_groups
|| rs6000_cpu_attr
== CPU_POWER9
)
30405 && GET_CODE (PATTERN (insn
)) == SET
30406 && GET_CODE (PATTERN (dep_insn
)) == SET
30407 && GET_CODE (XEXP (PATTERN (insn
), 1)) == MEM
30408 && GET_CODE (XEXP (PATTERN (dep_insn
), 0)) == MEM
30409 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
30410 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
30413 attr_type
= get_attr_type (insn
);
30418 /* Tell the first scheduling pass about the latency between
30419 a mtctr and bctr (and mtlr and br/blr). The first
30420 scheduling pass will not know about this latency since
30421 the mtctr instruction, which has the latency associated
30422 to it, will be generated by reload. */
30425 /* Leave some extra cycles between a compare and its
30426 dependent branch, to inhibit expensive mispredicts. */
30427 if ((rs6000_cpu_attr
== CPU_PPC603
30428 || rs6000_cpu_attr
== CPU_PPC604
30429 || rs6000_cpu_attr
== CPU_PPC604E
30430 || rs6000_cpu_attr
== CPU_PPC620
30431 || rs6000_cpu_attr
== CPU_PPC630
30432 || rs6000_cpu_attr
== CPU_PPC750
30433 || rs6000_cpu_attr
== CPU_PPC7400
30434 || rs6000_cpu_attr
== CPU_PPC7450
30435 || rs6000_cpu_attr
== CPU_PPCE5500
30436 || rs6000_cpu_attr
== CPU_PPCE6500
30437 || rs6000_cpu_attr
== CPU_POWER4
30438 || rs6000_cpu_attr
== CPU_POWER5
30439 || rs6000_cpu_attr
== CPU_POWER7
30440 || rs6000_cpu_attr
== CPU_POWER8
30441 || rs6000_cpu_attr
== CPU_POWER9
30442 || rs6000_cpu_attr
== CPU_CELL
)
30443 && recog_memoized (dep_insn
)
30444 && (INSN_CODE (dep_insn
) >= 0))
30446 switch (get_attr_type (dep_insn
))
30449 case TYPE_FPCOMPARE
:
30450 case TYPE_CR_LOGICAL
:
30451 case TYPE_DELAYED_CR
:
30455 if (get_attr_dot (dep_insn
) == DOT_YES
)
30460 if (get_attr_dot (dep_insn
) == DOT_YES
30461 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
30472 if ((rs6000_cpu
== PROCESSOR_POWER6
)
30473 && recog_memoized (dep_insn
)
30474 && (INSN_CODE (dep_insn
) >= 0))
30477 if (GET_CODE (PATTERN (insn
)) != SET
)
30478 /* If this happens, we have to extend this to schedule
30479 optimally. Return default for now. */
30482 /* Adjust the cost for the case where the value written
30483 by a fixed point operation is used as the address
30484 gen value on a store. */
30485 switch (get_attr_type (dep_insn
))
30490 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
30491 return get_attr_sign_extend (dep_insn
)
30492 == SIGN_EXTEND_YES
? 6 : 4;
30497 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
30498 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
30508 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
30516 if (get_attr_update (dep_insn
) == UPDATE_YES
30517 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
30523 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
30529 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
30530 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
30540 if ((rs6000_cpu
== PROCESSOR_POWER6
)
30541 && recog_memoized (dep_insn
)
30542 && (INSN_CODE (dep_insn
) >= 0))
30545 /* Adjust the cost for the case where the value written
30546 by a fixed point instruction is used within the address
30547 gen portion of a subsequent load(u)(x) */
30548 switch (get_attr_type (dep_insn
))
30553 if (set_to_load_agen (dep_insn
, insn
))
30554 return get_attr_sign_extend (dep_insn
)
30555 == SIGN_EXTEND_YES
? 6 : 4;
30560 if (set_to_load_agen (dep_insn
, insn
))
30561 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
30571 if (set_to_load_agen (dep_insn
, insn
))
30579 if (get_attr_update (dep_insn
) == UPDATE_YES
30580 && set_to_load_agen (dep_insn
, insn
))
30586 if (set_to_load_agen (dep_insn
, insn
))
30592 if (set_to_load_agen (dep_insn
, insn
))
30593 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
30603 if ((rs6000_cpu
== PROCESSOR_POWER6
)
30604 && get_attr_update (insn
) == UPDATE_NO
30605 && recog_memoized (dep_insn
)
30606 && (INSN_CODE (dep_insn
) >= 0)
30607 && (get_attr_type (dep_insn
) == TYPE_MFFGPR
))
30614 /* Fall out to return default cost. */
30618 case REG_DEP_OUTPUT
:
30619 /* Output dependency; DEP_INSN writes a register that INSN writes some
30621 if ((rs6000_cpu
== PROCESSOR_POWER6
)
30622 && recog_memoized (dep_insn
)
30623 && (INSN_CODE (dep_insn
) >= 0))
30625 attr_type
= get_attr_type (insn
);
30630 case TYPE_FPSIMPLE
:
30631 if (get_attr_type (dep_insn
) == TYPE_FP
30632 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
30636 if (get_attr_update (insn
) == UPDATE_NO
30637 && get_attr_type (dep_insn
) == TYPE_MFFGPR
)
30644 /* Fall through, no cost for output dependency. */
30648 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30653 gcc_unreachable ();
30659 /* Debug version of rs6000_adjust_cost. */
30662 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
30663 int cost
, unsigned int dw
)
30665 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
30673 default: dep
= "unknown depencency"; break;
30674 case REG_DEP_TRUE
: dep
= "data dependency"; break;
30675 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
30676 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
30680 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30681 "%s, insn:\n", ret
, cost
, dep
);
30689 /* The function returns a true if INSN is microcoded.
30690 Return false otherwise. */
30693 is_microcoded_insn (rtx_insn
*insn
)
30695 if (!insn
|| !NONDEBUG_INSN_P (insn
)
30696 || GET_CODE (PATTERN (insn
)) == USE
30697 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
30700 if (rs6000_cpu_attr
== CPU_CELL
)
30701 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
30703 if (rs6000_sched_groups
30704 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
30706 enum attr_type type
= get_attr_type (insn
);
30707 if ((type
== TYPE_LOAD
30708 && get_attr_update (insn
) == UPDATE_YES
30709 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
30710 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
30711 && get_attr_update (insn
) == UPDATE_YES
30712 && get_attr_indexed (insn
) == INDEXED_YES
)
30713 || type
== TYPE_MFCR
)
30720 /* The function returns true if INSN is cracked into 2 instructions
30721 by the processor (and therefore occupies 2 issue slots). */
30724 is_cracked_insn (rtx_insn
*insn
)
30726 if (!insn
|| !NONDEBUG_INSN_P (insn
)
30727 || GET_CODE (PATTERN (insn
)) == USE
30728 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
30731 if (rs6000_sched_groups
30732 && (rs6000_cpu
== PROCESSOR_POWER4
|| rs6000_cpu
== PROCESSOR_POWER5
))
30734 enum attr_type type
= get_attr_type (insn
);
30735 if ((type
== TYPE_LOAD
30736 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
30737 && get_attr_update (insn
) == UPDATE_NO
)
30738 || (type
== TYPE_LOAD
30739 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
30740 && get_attr_update (insn
) == UPDATE_YES
30741 && get_attr_indexed (insn
) == INDEXED_NO
)
30742 || (type
== TYPE_STORE
30743 && get_attr_update (insn
) == UPDATE_YES
30744 && get_attr_indexed (insn
) == INDEXED_NO
)
30745 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
30746 && get_attr_update (insn
) == UPDATE_YES
)
30747 || type
== TYPE_DELAYED_CR
30748 || (type
== TYPE_EXTS
30749 && get_attr_dot (insn
) == DOT_YES
)
30750 || (type
== TYPE_SHIFT
30751 && get_attr_dot (insn
) == DOT_YES
30752 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
30753 || (type
== TYPE_MUL
30754 && get_attr_dot (insn
) == DOT_YES
)
30755 || type
== TYPE_DIV
30756 || (type
== TYPE_INSERT
30757 && get_attr_size (insn
) == SIZE_32
))
30764 /* The function returns true if INSN can be issued only from
30765 the branch slot. */
30768 is_branch_slot_insn (rtx_insn
*insn
)
30770 if (!insn
|| !NONDEBUG_INSN_P (insn
)
30771 || GET_CODE (PATTERN (insn
)) == USE
30772 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
30775 if (rs6000_sched_groups
)
30777 enum attr_type type
= get_attr_type (insn
);
30778 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
30786 /* The function returns true if out_inst sets a value that is
30787 used in the address generation computation of in_insn */
30789 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
30791 rtx out_set
, in_set
;
30793 /* For performance reasons, only handle the simple case where
30794 both loads are a single_set. */
30795 out_set
= single_set (out_insn
);
30798 in_set
= single_set (in_insn
);
30800 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
30806 /* Try to determine base/offset/size parts of the given MEM.
30807 Return true if successful, false if all the values couldn't
30810 This function only looks for REG or REG+CONST address forms.
30811 REG+REG address form will return false. */
30814 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
30815 HOST_WIDE_INT
*size
)
30818 if MEM_SIZE_KNOWN_P (mem
)
30819 *size
= MEM_SIZE (mem
);
30823 addr_rtx
= (XEXP (mem
, 0));
30824 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
30825 addr_rtx
= XEXP (addr_rtx
, 1);
30828 while (GET_CODE (addr_rtx
) == PLUS
30829 && CONST_INT_P (XEXP (addr_rtx
, 1)))
30831 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
30832 addr_rtx
= XEXP (addr_rtx
, 0);
30834 if (!REG_P (addr_rtx
))
30841 /* The function returns true if the target storage location of
30842 mem1 is adjacent to the target storage location of mem2 */
30843 /* Return 1 if memory locations are adjacent. */
30846 adjacent_mem_locations (rtx mem1
, rtx mem2
)
30849 HOST_WIDE_INT off1
, size1
, off2
, size2
;
30851 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
30852 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
30853 return ((REGNO (reg1
) == REGNO (reg2
))
30854 && ((off1
+ size1
== off2
)
30855 || (off2
+ size2
== off1
)));
30860 /* This function returns true if it can be determined that the two MEM
30861 locations overlap by at least 1 byte based on base reg/offset/size. */
30864 mem_locations_overlap (rtx mem1
, rtx mem2
)
30867 HOST_WIDE_INT off1
, size1
, off2
, size2
;
30869 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
30870 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
30871 return ((REGNO (reg1
) == REGNO (reg2
))
30872 && (((off1
<= off2
) && (off1
+ size1
> off2
))
30873 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
30878 /* A C statement (sans semicolon) to update the integer scheduling
30879 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30880 INSN earlier, reduce the priority to execute INSN later. Do not
30881 define this macro if you do not need to adjust the scheduling
30882 priorities of insns. */
30885 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
30887 rtx load_mem
, str_mem
;
30888 /* On machines (like the 750) which have asymmetric integer units,
30889 where one integer unit can do multiply and divides and the other
30890 can't, reduce the priority of multiply/divide so it is scheduled
30891 before other integer operations. */
30894 if (! INSN_P (insn
))
30897 if (GET_CODE (PATTERN (insn
)) == USE
)
30900 switch (rs6000_cpu_attr
) {
30902 switch (get_attr_type (insn
))
30909 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
30910 priority
, priority
);
30911 if (priority
>= 0 && priority
< 0x01000000)
30918 if (insn_must_be_first_in_group (insn
)
30919 && reload_completed
30920 && current_sched_info
->sched_max_insns_priority
30921 && rs6000_sched_restricted_insns_priority
)
30924 /* Prioritize insns that can be dispatched only in the first
30926 if (rs6000_sched_restricted_insns_priority
== 1)
30927 /* Attach highest priority to insn. This means that in
30928 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30929 precede 'priority' (critical path) considerations. */
30930 return current_sched_info
->sched_max_insns_priority
;
30931 else if (rs6000_sched_restricted_insns_priority
== 2)
30932 /* Increase priority of insn by a minimal amount. This means that in
30933 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30934 considerations precede dispatch-slot restriction considerations. */
30935 return (priority
+ 1);
30938 if (rs6000_cpu
== PROCESSOR_POWER6
30939 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
30940 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
30941 /* Attach highest priority to insn if the scheduler has just issued two
30942 stores and this instruction is a load, or two loads and this instruction
30943 is a store. Power6 wants loads and stores scheduled alternately
30945 return current_sched_info
->sched_max_insns_priority
;
30950 /* Return true if the instruction is nonpipelined on the Cell. */
30952 is_nonpipeline_insn (rtx_insn
*insn
)
30954 enum attr_type type
;
30955 if (!insn
|| !NONDEBUG_INSN_P (insn
)
30956 || GET_CODE (PATTERN (insn
)) == USE
30957 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
30960 type
= get_attr_type (insn
);
30961 if (type
== TYPE_MUL
30962 || type
== TYPE_DIV
30963 || type
== TYPE_SDIV
30964 || type
== TYPE_DDIV
30965 || type
== TYPE_SSQRT
30966 || type
== TYPE_DSQRT
30967 || type
== TYPE_MFCR
30968 || type
== TYPE_MFCRF
30969 || type
== TYPE_MFJMPR
)
30977 /* Return how many instructions the machine can issue per cycle. */
30980 rs6000_issue_rate (void)
30982 /* Unless scheduling for register pressure, use issue rate of 1 for
30983 first scheduling pass to decrease degradation. */
30984 if (!reload_completed
&& !flag_sched_pressure
)
30987 switch (rs6000_cpu_attr
) {
30989 case CPU_PPC601
: /* ? */
30999 case CPU_PPCE300C2
:
31000 case CPU_PPCE300C3
:
31001 case CPU_PPCE500MC
:
31002 case CPU_PPCE500MC64
:
31027 /* Return how many instructions to look ahead for better insn
31031 rs6000_use_sched_lookahead (void)
31033 switch (rs6000_cpu_attr
)
31040 return (reload_completed
? 8 : 0);
31047 /* We are choosing insn from the ready queue. Return zero if INSN can be
31050 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
31052 if (ready_index
== 0)
31055 if (rs6000_cpu_attr
!= CPU_CELL
)
31058 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
31060 if (!reload_completed
31061 || is_nonpipeline_insn (insn
)
31062 || is_microcoded_insn (insn
))
31068 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31069 and return true. */
31072 find_mem_ref (rtx pat
, rtx
*mem_ref
)
31077 /* stack_tie does not produce any real memory traffic. */
31078 if (tie_operand (pat
, VOIDmode
))
31081 if (GET_CODE (pat
) == MEM
)
31087 /* Recursively process the pattern. */
31088 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
31090 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
31094 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
31097 else if (fmt
[i
] == 'E')
31098 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
31100 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
31108 /* Determine if PAT is a PATTERN of a load insn. */
31111 is_load_insn1 (rtx pat
, rtx
*load_mem
)
31113 if (!pat
|| pat
== NULL_RTX
)
31116 if (GET_CODE (pat
) == SET
)
31117 return find_mem_ref (SET_SRC (pat
), load_mem
);
31119 if (GET_CODE (pat
) == PARALLEL
)
31123 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
31124 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
31131 /* Determine if INSN loads from memory. */
31134 is_load_insn (rtx insn
, rtx
*load_mem
)
31136 if (!insn
|| !INSN_P (insn
))
31142 return is_load_insn1 (PATTERN (insn
), load_mem
);
31145 /* Determine if PAT is a PATTERN of a store insn. */
31148 is_store_insn1 (rtx pat
, rtx
*str_mem
)
31150 if (!pat
|| pat
== NULL_RTX
)
31153 if (GET_CODE (pat
) == SET
)
31154 return find_mem_ref (SET_DEST (pat
), str_mem
);
31156 if (GET_CODE (pat
) == PARALLEL
)
31160 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
31161 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
31168 /* Determine if INSN stores to memory. */
31171 is_store_insn (rtx insn
, rtx
*str_mem
)
31173 if (!insn
|| !INSN_P (insn
))
31176 return is_store_insn1 (PATTERN (insn
), str_mem
);
31179 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31182 is_power9_pairable_vec_type (enum attr_type type
)
31186 case TYPE_VECSIMPLE
:
31187 case TYPE_VECCOMPLEX
:
31191 case TYPE_VECFLOAT
:
31193 case TYPE_VECDOUBLE
:
31201 /* Returns whether the dependence between INSN and NEXT is considered
31202 costly by the given target. */
31205 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
31209 rtx load_mem
, str_mem
;
31211 /* If the flag is not enabled - no dependence is considered costly;
31212 allow all dependent insns in the same group.
31213 This is the most aggressive option. */
31214 if (rs6000_sched_costly_dep
== no_dep_costly
)
31217 /* If the flag is set to 1 - a dependence is always considered costly;
31218 do not allow dependent instructions in the same group.
31219 This is the most conservative option. */
31220 if (rs6000_sched_costly_dep
== all_deps_costly
)
31223 insn
= DEP_PRO (dep
);
31224 next
= DEP_CON (dep
);
31226 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
31227 && is_load_insn (next
, &load_mem
)
31228 && is_store_insn (insn
, &str_mem
))
31229 /* Prevent load after store in the same group. */
31232 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
31233 && is_load_insn (next
, &load_mem
)
31234 && is_store_insn (insn
, &str_mem
)
31235 && DEP_TYPE (dep
) == REG_DEP_TRUE
31236 && mem_locations_overlap(str_mem
, load_mem
))
31237 /* Prevent load after store in the same group if it is a true
31241 /* The flag is set to X; dependences with latency >= X are considered costly,
31242 and will not be scheduled in the same group. */
31243 if (rs6000_sched_costly_dep
<= max_dep_latency
31244 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
31250 /* Return the next insn after INSN that is found before TAIL is reached,
31251 skipping any "non-active" insns - insns that will not actually occupy
31252 an issue slot. Return NULL_RTX if such an insn is not found. */
31255 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
31257 if (insn
== NULL_RTX
|| insn
== tail
)
31262 insn
= NEXT_INSN (insn
);
31263 if (insn
== NULL_RTX
|| insn
== tail
)
31267 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
31268 || (NONJUMP_INSN_P (insn
)
31269 && GET_CODE (PATTERN (insn
)) != USE
31270 && GET_CODE (PATTERN (insn
)) != CLOBBER
31271 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
31277 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31280 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
31285 enum attr_type type
, type2
;
31287 type
= get_attr_type (last_scheduled_insn
);
31289 /* Try to issue fixed point divides back-to-back in pairs so they will be
31290 routed to separate execution units and execute in parallel. */
31291 if (type
== TYPE_DIV
&& divide_cnt
== 0)
31293 /* First divide has been scheduled. */
31296 /* Scan the ready list looking for another divide, if found move it
31297 to the end of the list so it is chosen next. */
31301 if (recog_memoized (ready
[pos
]) >= 0
31302 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
31305 for (i
= pos
; i
< lastpos
; i
++)
31306 ready
[i
] = ready
[i
+ 1];
31307 ready
[lastpos
] = tmp
;
31315 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31318 /* The best dispatch throughput for vector and vector load insns can be
31319 achieved by interleaving a vector and vector load such that they'll
31320 dispatch to the same superslice. If this pairing cannot be achieved
31321 then it is best to pair vector insns together and vector load insns
31324 To aid in this pairing, vec_pairing maintains the current state with
31325 the following values:
31327 0 : Initial state, no vecload/vector pairing has been started.
31329 1 : A vecload or vector insn has been issued and a candidate for
31330 pairing has been found and moved to the end of the ready
31332 if (type
== TYPE_VECLOAD
)
31334 /* Issued a vecload. */
31335 if (vec_pairing
== 0)
31337 int vecload_pos
= -1;
31338 /* We issued a single vecload, look for a vector insn to pair it
31339 with. If one isn't found, try to pair another vecload. */
31343 if (recog_memoized (ready
[pos
]) >= 0)
31345 type2
= get_attr_type (ready
[pos
]);
31346 if (is_power9_pairable_vec_type (type2
))
31348 /* Found a vector insn to pair with, move it to the
31349 end of the ready list so it is scheduled next. */
31351 for (i
= pos
; i
< lastpos
; i
++)
31352 ready
[i
] = ready
[i
+ 1];
31353 ready
[lastpos
] = tmp
;
31355 return cached_can_issue_more
;
31357 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
31358 /* Remember position of first vecload seen. */
31363 if (vecload_pos
>= 0)
31365 /* Didn't find a vector to pair with but did find a vecload,
31366 move it to the end of the ready list. */
31367 tmp
= ready
[vecload_pos
];
31368 for (i
= vecload_pos
; i
< lastpos
; i
++)
31369 ready
[i
] = ready
[i
+ 1];
31370 ready
[lastpos
] = tmp
;
31372 return cached_can_issue_more
;
31376 else if (is_power9_pairable_vec_type (type
))
31378 /* Issued a vector operation. */
31379 if (vec_pairing
== 0)
31382 /* We issued a single vector insn, look for a vecload to pair it
31383 with. If one isn't found, try to pair another vector. */
31387 if (recog_memoized (ready
[pos
]) >= 0)
31389 type2
= get_attr_type (ready
[pos
]);
31390 if (type2
== TYPE_VECLOAD
)
31392 /* Found a vecload insn to pair with, move it to the
31393 end of the ready list so it is scheduled next. */
31395 for (i
= pos
; i
< lastpos
; i
++)
31396 ready
[i
] = ready
[i
+ 1];
31397 ready
[lastpos
] = tmp
;
31399 return cached_can_issue_more
;
31401 else if (is_power9_pairable_vec_type (type2
)
31403 /* Remember position of first vector insn seen. */
31410 /* Didn't find a vecload to pair with but did find a vector
31411 insn, move it to the end of the ready list. */
31412 tmp
= ready
[vec_pos
];
31413 for (i
= vec_pos
; i
< lastpos
; i
++)
31414 ready
[i
] = ready
[i
+ 1];
31415 ready
[lastpos
] = tmp
;
31417 return cached_can_issue_more
;
31422 /* We've either finished a vec/vecload pair, couldn't find an insn to
31423 continue the current pair, or the last insn had nothing to do with
31424 with pairing. In any case, reset the state. */
31428 return cached_can_issue_more
;
31431 /* We are about to begin issuing insns for this clock cycle. */
31434 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
31435 rtx_insn
**ready ATTRIBUTE_UNUSED
,
31436 int *pn_ready ATTRIBUTE_UNUSED
,
31437 int clock_var ATTRIBUTE_UNUSED
)
31439 int n_ready
= *pn_ready
;
31442 fprintf (dump
, "// rs6000_sched_reorder :\n");
31444 /* Reorder the ready list, if the second to last ready insn
31445 is a nonepipeline insn. */
31446 if (rs6000_cpu_attr
== CPU_CELL
&& n_ready
> 1)
31448 if (is_nonpipeline_insn (ready
[n_ready
- 1])
31449 && (recog_memoized (ready
[n_ready
- 2]) > 0))
31450 /* Simply swap first two insns. */
31451 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
31454 if (rs6000_cpu
== PROCESSOR_POWER6
)
31455 load_store_pendulum
= 0;
31457 return rs6000_issue_rate ();
31460 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31463 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
31464 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
31467 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
31469 /* For Power6, we need to handle some special cases to try and keep the
31470 store queue from overflowing and triggering expensive flushes.
31472 This code monitors how load and store instructions are being issued
31473 and skews the ready list one way or the other to increase the likelihood
31474 that a desired instruction is issued at the proper time.
31476 A couple of things are done. First, we maintain a "load_store_pendulum"
31477 to track the current state of load/store issue.
31479 - If the pendulum is at zero, then no loads or stores have been
31480 issued in the current cycle so we do nothing.
31482 - If the pendulum is 1, then a single load has been issued in this
31483 cycle and we attempt to locate another load in the ready list to
31486 - If the pendulum is -2, then two stores have already been
31487 issued in this cycle, so we increase the priority of the first load
31488 in the ready list to increase it's likelihood of being chosen first
31491 - If the pendulum is -1, then a single store has been issued in this
31492 cycle and we attempt to locate another store in the ready list to
31493 issue with it, preferring a store to an adjacent memory location to
31494 facilitate store pairing in the store queue.
31496 - If the pendulum is 2, then two loads have already been
31497 issued in this cycle, so we increase the priority of the first store
31498 in the ready list to increase it's likelihood of being chosen first
31501 - If the pendulum < -2 or > 2, then do nothing.
31503 Note: This code covers the most common scenarios. There exist non
31504 load/store instructions which make use of the LSU and which
31505 would need to be accounted for to strictly model the behavior
31506 of the machine. Those instructions are currently unaccounted
31507 for to help minimize compile time overhead of this code.
31509 if (rs6000_cpu
== PROCESSOR_POWER6
&& last_scheduled_insn
)
31514 rtx load_mem
, str_mem
;
31516 if (is_store_insn (last_scheduled_insn
, &str_mem
))
31517 /* Issuing a store, swing the load_store_pendulum to the left */
31518 load_store_pendulum
--;
31519 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
31520 /* Issuing a load, swing the load_store_pendulum to the right */
31521 load_store_pendulum
++;
31523 return cached_can_issue_more
;
31525 /* If the pendulum is balanced, or there is only one instruction on
31526 the ready list, then all is well, so return. */
31527 if ((load_store_pendulum
== 0) || (*pn_ready
<= 1))
31528 return cached_can_issue_more
;
31530 if (load_store_pendulum
== 1)
31532 /* A load has been issued in this cycle. Scan the ready list
31533 for another load to issue with it */
31538 if (is_load_insn (ready
[pos
], &load_mem
))
31540 /* Found a load. Move it to the head of the ready list,
31541 and adjust it's priority so that it is more likely to
31544 for (i
=pos
; i
<*pn_ready
-1; i
++)
31545 ready
[i
] = ready
[i
+ 1];
31546 ready
[*pn_ready
-1] = tmp
;
31548 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
31549 INSN_PRIORITY (tmp
)++;
31555 else if (load_store_pendulum
== -2)
31557 /* Two stores have been issued in this cycle. Increase the
31558 priority of the first load in the ready list to favor it for
31559 issuing in the next cycle. */
31564 if (is_load_insn (ready
[pos
], &load_mem
)
31566 && INSN_PRIORITY_KNOWN (ready
[pos
]))
31568 INSN_PRIORITY (ready
[pos
])++;
31570 /* Adjust the pendulum to account for the fact that a load
31571 was found and increased in priority. This is to prevent
31572 increasing the priority of multiple loads */
31573 load_store_pendulum
--;
31580 else if (load_store_pendulum
== -1)
31582 /* A store has been issued in this cycle. Scan the ready list for
31583 another store to issue with it, preferring a store to an adjacent
31585 int first_store_pos
= -1;
31591 if (is_store_insn (ready
[pos
], &str_mem
))
31594 /* Maintain the index of the first store found on the
31596 if (first_store_pos
== -1)
31597 first_store_pos
= pos
;
31599 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
31600 && adjacent_mem_locations (str_mem
, str_mem2
))
31602 /* Found an adjacent store. Move it to the head of the
31603 ready list, and adjust it's priority so that it is
31604 more likely to stay there */
31606 for (i
=pos
; i
<*pn_ready
-1; i
++)
31607 ready
[i
] = ready
[i
+ 1];
31608 ready
[*pn_ready
-1] = tmp
;
31610 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
31611 INSN_PRIORITY (tmp
)++;
31613 first_store_pos
= -1;
31621 if (first_store_pos
>= 0)
31623 /* An adjacent store wasn't found, but a non-adjacent store was,
31624 so move the non-adjacent store to the front of the ready
31625 list, and adjust its priority so that it is more likely to
31627 tmp
= ready
[first_store_pos
];
31628 for (i
=first_store_pos
; i
<*pn_ready
-1; i
++)
31629 ready
[i
] = ready
[i
+ 1];
31630 ready
[*pn_ready
-1] = tmp
;
31631 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp
))
31632 INSN_PRIORITY (tmp
)++;
31635 else if (load_store_pendulum
== 2)
31637 /* Two loads have been issued in this cycle. Increase the priority
31638 of the first store in the ready list to favor it for issuing in
31644 if (is_store_insn (ready
[pos
], &str_mem
)
31646 && INSN_PRIORITY_KNOWN (ready
[pos
]))
31648 INSN_PRIORITY (ready
[pos
])++;
31650 /* Adjust the pendulum to account for the fact that a store
31651 was found and increased in priority. This is to prevent
31652 increasing the priority of multiple stores */
31653 load_store_pendulum
++;
31662 /* Do Power9 dependent reordering if necessary. */
31663 if (rs6000_cpu
== PROCESSOR_POWER9
&& last_scheduled_insn
31664 && recog_memoized (last_scheduled_insn
) >= 0)
31665 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
31667 return cached_can_issue_more
;
31670 /* Return whether the presence of INSN causes a dispatch group termination
31671 of group WHICH_GROUP.
31673 If WHICH_GROUP == current_group, this function will return true if INSN
31674 causes the termination of the current group (i.e, the dispatch group to
31675 which INSN belongs). This means that INSN will be the last insn in the
31676 group it belongs to.
31678 If WHICH_GROUP == previous_group, this function will return true if INSN
31679 causes the termination of the previous group (i.e, the dispatch group that
31680 precedes the group to which INSN belongs). This means that INSN will be
31681 the first insn in the group it belongs to). */
31684 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
31691 first
= insn_must_be_first_in_group (insn
);
31692 last
= insn_must_be_last_in_group (insn
);
31697 if (which_group
== current_group
)
31699 else if (which_group
== previous_group
)
31707 insn_must_be_first_in_group (rtx_insn
*insn
)
31709 enum attr_type type
;
31713 || DEBUG_INSN_P (insn
)
31714 || GET_CODE (PATTERN (insn
)) == USE
31715 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
31718 switch (rs6000_cpu
)
31720 case PROCESSOR_POWER5
:
31721 if (is_cracked_insn (insn
))
31724 case PROCESSOR_POWER4
:
31725 if (is_microcoded_insn (insn
))
31728 if (!rs6000_sched_groups
)
31731 type
= get_attr_type (insn
);
31738 case TYPE_DELAYED_CR
:
31739 case TYPE_CR_LOGICAL
:
31752 case PROCESSOR_POWER6
:
31753 type
= get_attr_type (insn
);
31762 case TYPE_FPCOMPARE
:
31773 if (get_attr_dot (insn
) == DOT_NO
31774 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
31779 if (get_attr_size (insn
) == SIZE_32
)
31787 if (get_attr_update (insn
) == UPDATE_YES
)
31795 case PROCESSOR_POWER7
:
31796 type
= get_attr_type (insn
);
31800 case TYPE_CR_LOGICAL
:
31814 if (get_attr_dot (insn
) == DOT_YES
)
31819 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
31820 || get_attr_update (insn
) == UPDATE_YES
)
31827 if (get_attr_update (insn
) == UPDATE_YES
)
31835 case PROCESSOR_POWER8
:
31836 type
= get_attr_type (insn
);
31840 case TYPE_CR_LOGICAL
:
31841 case TYPE_DELAYED_CR
:
31849 case TYPE_VECSTORE
:
31856 if (get_attr_dot (insn
) == DOT_YES
)
31861 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
31862 || get_attr_update (insn
) == UPDATE_YES
)
31867 if (get_attr_update (insn
) == UPDATE_YES
31868 && get_attr_indexed (insn
) == INDEXED_YES
)
31884 insn_must_be_last_in_group (rtx_insn
*insn
)
31886 enum attr_type type
;
31890 || DEBUG_INSN_P (insn
)
31891 || GET_CODE (PATTERN (insn
)) == USE
31892 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
31895 switch (rs6000_cpu
) {
31896 case PROCESSOR_POWER4
:
31897 case PROCESSOR_POWER5
:
31898 if (is_microcoded_insn (insn
))
31901 if (is_branch_slot_insn (insn
))
31905 case PROCESSOR_POWER6
:
31906 type
= get_attr_type (insn
);
31914 case TYPE_FPCOMPARE
:
31925 if (get_attr_dot (insn
) == DOT_NO
31926 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
31931 if (get_attr_size (insn
) == SIZE_32
)
31939 case PROCESSOR_POWER7
:
31940 type
= get_attr_type (insn
);
31950 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
31951 && get_attr_update (insn
) == UPDATE_YES
)
31956 if (get_attr_update (insn
) == UPDATE_YES
31957 && get_attr_indexed (insn
) == INDEXED_YES
)
31965 case PROCESSOR_POWER8
:
31966 type
= get_attr_type (insn
);
31978 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
31979 && get_attr_update (insn
) == UPDATE_YES
)
31984 if (get_attr_update (insn
) == UPDATE_YES
31985 && get_attr_indexed (insn
) == INDEXED_YES
)
32000 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32001 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32004 is_costly_group (rtx
*group_insns
, rtx next_insn
)
32007 int issue_rate
= rs6000_issue_rate ();
32009 for (i
= 0; i
< issue_rate
; i
++)
32011 sd_iterator_def sd_it
;
32013 rtx insn
= group_insns
[i
];
32018 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
32020 rtx next
= DEP_CON (dep
);
32022 if (next
== next_insn
32023 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
32031 /* Utility of the function redefine_groups.
32032 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32033 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32034 to keep it "far" (in a separate group) from GROUP_INSNS, following
32035 one of the following schemes, depending on the value of the flag
32036 -minsert_sched_nops = X:
32037 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32038 in order to force NEXT_INSN into a separate group.
32039 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32040 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32041 insertion (has a group just ended, how many vacant issue slots remain in the
32042 last group, and how many dispatch groups were encountered so far). */
32045 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
32046 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
32051 int issue_rate
= rs6000_issue_rate ();
32052 bool end
= *group_end
;
32055 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
32056 return can_issue_more
;
32058 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
32059 return can_issue_more
;
32061 force
= is_costly_group (group_insns
, next_insn
);
32063 return can_issue_more
;
32065 if (sched_verbose
> 6)
32066 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
32067 *group_count
,can_issue_more
);
32069 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
32072 can_issue_more
= 0;
32074 /* Since only a branch can be issued in the last issue_slot, it is
32075 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32076 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32077 in this case the last nop will start a new group and the branch
32078 will be forced to the new group. */
32079 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
32082 /* Do we have a special group ending nop? */
32083 if (rs6000_cpu_attr
== CPU_POWER6
|| rs6000_cpu_attr
== CPU_POWER7
32084 || rs6000_cpu_attr
== CPU_POWER8
)
32086 nop
= gen_group_ending_nop ();
32087 emit_insn_before (nop
, next_insn
);
32088 can_issue_more
= 0;
32091 while (can_issue_more
> 0)
32094 emit_insn_before (nop
, next_insn
);
32102 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
32104 int n_nops
= rs6000_sched_insert_nops
;
32106 /* Nops can't be issued from the branch slot, so the effective
32107 issue_rate for nops is 'issue_rate - 1'. */
32108 if (can_issue_more
== 0)
32109 can_issue_more
= issue_rate
;
32111 if (can_issue_more
== 0)
32113 can_issue_more
= issue_rate
- 1;
32116 for (i
= 0; i
< issue_rate
; i
++)
32118 group_insns
[i
] = 0;
32125 emit_insn_before (nop
, next_insn
);
32126 if (can_issue_more
== issue_rate
- 1) /* new group begins */
32129 if (can_issue_more
== 0)
32131 can_issue_more
= issue_rate
- 1;
32134 for (i
= 0; i
< issue_rate
; i
++)
32136 group_insns
[i
] = 0;
32142 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32145 /* Is next_insn going to start a new group? */
32148 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
32149 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
32150 || (can_issue_more
< issue_rate
&&
32151 insn_terminates_group_p (next_insn
, previous_group
)));
32152 if (*group_end
&& end
)
32155 if (sched_verbose
> 6)
32156 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
32157 *group_count
, can_issue_more
);
32158 return can_issue_more
;
32161 return can_issue_more
;
32164 /* This function tries to synch the dispatch groups that the compiler "sees"
32165 with the dispatch groups that the processor dispatcher is expected to
32166 form in practice. It tries to achieve this synchronization by forcing the
32167 estimated processor grouping on the compiler (as opposed to the function
32168 'pad_goups' which tries to force the scheduler's grouping on the processor).
32170 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32171 examines the (estimated) dispatch groups that will be formed by the processor
32172 dispatcher. It marks these group boundaries to reflect the estimated
32173 processor grouping, overriding the grouping that the scheduler had marked.
32174 Depending on the value of the flag '-minsert-sched-nops' this function can
32175 force certain insns into separate groups or force a certain distance between
32176 them by inserting nops, for example, if there exists a "costly dependence"
32179 The function estimates the group boundaries that the processor will form as
32180 follows: It keeps track of how many vacant issue slots are available after
32181 each insn. A subsequent insn will start a new group if one of the following
32183 - no more vacant issue slots remain in the current dispatch group.
32184 - only the last issue slot, which is the branch slot, is vacant, but the next
32185 insn is not a branch.
32186 - only the last 2 or less issue slots, including the branch slot, are vacant,
32187 which means that a cracked insn (which occupies two issue slots) can't be
32188 issued in this group.
32189 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32190 start a new group. */
32193 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
32196 rtx_insn
*insn
, *next_insn
;
32198 int can_issue_more
;
32201 int group_count
= 0;
32205 issue_rate
= rs6000_issue_rate ();
32206 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
32207 for (i
= 0; i
< issue_rate
; i
++)
32209 group_insns
[i
] = 0;
32211 can_issue_more
= issue_rate
;
32213 insn
= get_next_active_insn (prev_head_insn
, tail
);
32216 while (insn
!= NULL_RTX
)
32218 slot
= (issue_rate
- can_issue_more
);
32219 group_insns
[slot
] = insn
;
32221 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
32222 if (insn_terminates_group_p (insn
, current_group
))
32223 can_issue_more
= 0;
32225 next_insn
= get_next_active_insn (insn
, tail
);
32226 if (next_insn
== NULL_RTX
)
32227 return group_count
+ 1;
32229 /* Is next_insn going to start a new group? */
32231 = (can_issue_more
== 0
32232 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
32233 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
32234 || (can_issue_more
< issue_rate
&&
32235 insn_terminates_group_p (next_insn
, previous_group
)));
32237 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
32238 next_insn
, &group_end
, can_issue_more
,
32244 can_issue_more
= 0;
32245 for (i
= 0; i
< issue_rate
; i
++)
32247 group_insns
[i
] = 0;
32251 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
32252 PUT_MODE (next_insn
, VOIDmode
);
32253 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
32254 PUT_MODE (next_insn
, TImode
);
32257 if (can_issue_more
== 0)
32258 can_issue_more
= issue_rate
;
32261 return group_count
;
32264 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32265 dispatch group boundaries that the scheduler had marked. Pad with nops
32266 any dispatch groups which have vacant issue slots, in order to force the
32267 scheduler's grouping on the processor dispatcher. The function
32268 returns the number of dispatch groups found. */
32271 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
32274 rtx_insn
*insn
, *next_insn
;
32277 int can_issue_more
;
32279 int group_count
= 0;
32281 /* Initialize issue_rate. */
32282 issue_rate
= rs6000_issue_rate ();
32283 can_issue_more
= issue_rate
;
32285 insn
= get_next_active_insn (prev_head_insn
, tail
);
32286 next_insn
= get_next_active_insn (insn
, tail
);
32288 while (insn
!= NULL_RTX
)
32291 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
32293 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
32295 if (next_insn
== NULL_RTX
)
32300 /* If the scheduler had marked group termination at this location
32301 (between insn and next_insn), and neither insn nor next_insn will
32302 force group termination, pad the group with nops to force group
32305 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
32306 && !insn_terminates_group_p (insn
, current_group
)
32307 && !insn_terminates_group_p (next_insn
, previous_group
))
32309 if (!is_branch_slot_insn (next_insn
))
32312 while (can_issue_more
)
32315 emit_insn_before (nop
, next_insn
);
32320 can_issue_more
= issue_rate
;
32325 next_insn
= get_next_active_insn (insn
, tail
);
32328 return group_count
;
32331 /* We're beginning a new block. Initialize data structures as necessary. */
32334 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
32335 int sched_verbose ATTRIBUTE_UNUSED
,
32336 int max_ready ATTRIBUTE_UNUSED
)
32338 last_scheduled_insn
= NULL
;
32339 load_store_pendulum
= 0;
32344 /* The following function is called at the end of scheduling BB.
32345 After reload, it inserts nops at insn group bundling. */
32348 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
32353 fprintf (dump
, "=== Finishing schedule.\n");
32355 if (reload_completed
&& rs6000_sched_groups
)
32357 /* Do not run sched_finish hook when selective scheduling enabled. */
32358 if (sel_sched_p ())
32361 if (rs6000_sched_insert_nops
== sched_finish_none
)
32364 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
32365 n_groups
= pad_groups (dump
, sched_verbose
,
32366 current_sched_info
->prev_head
,
32367 current_sched_info
->next_tail
);
32369 n_groups
= redefine_groups (dump
, sched_verbose
,
32370 current_sched_info
->prev_head
,
32371 current_sched_info
->next_tail
);
32373 if (sched_verbose
>= 6)
32375 fprintf (dump
, "ngroups = %d\n", n_groups
);
32376 print_rtl (dump
, current_sched_info
->prev_head
);
32377 fprintf (dump
, "Done finish_sched\n");
32382 struct rs6000_sched_context
32384 short cached_can_issue_more
;
32385 rtx_insn
*last_scheduled_insn
;
32386 int load_store_pendulum
;
32391 typedef struct rs6000_sched_context rs6000_sched_context_def
;
32392 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
32394 /* Allocate store for new scheduling context. */
32396 rs6000_alloc_sched_context (void)
32398 return xmalloc (sizeof (rs6000_sched_context_def
));
32401 /* If CLEAN_P is true then initializes _SC with clean data,
32402 and from the global context otherwise. */
32404 rs6000_init_sched_context (void *_sc
, bool clean_p
)
32406 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
32410 sc
->cached_can_issue_more
= 0;
32411 sc
->last_scheduled_insn
= NULL
;
32412 sc
->load_store_pendulum
= 0;
32413 sc
->divide_cnt
= 0;
32414 sc
->vec_pairing
= 0;
32418 sc
->cached_can_issue_more
= cached_can_issue_more
;
32419 sc
->last_scheduled_insn
= last_scheduled_insn
;
32420 sc
->load_store_pendulum
= load_store_pendulum
;
32421 sc
->divide_cnt
= divide_cnt
;
32422 sc
->vec_pairing
= vec_pairing
;
32426 /* Sets the global scheduling context to the one pointed to by _SC. */
32428 rs6000_set_sched_context (void *_sc
)
32430 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
32432 gcc_assert (sc
!= NULL
);
32434 cached_can_issue_more
= sc
->cached_can_issue_more
;
32435 last_scheduled_insn
= sc
->last_scheduled_insn
;
32436 load_store_pendulum
= sc
->load_store_pendulum
;
32437 divide_cnt
= sc
->divide_cnt
;
32438 vec_pairing
= sc
->vec_pairing
;
32443 rs6000_free_sched_context (void *_sc
)
32445 gcc_assert (_sc
!= NULL
);
32451 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
32453 switch (get_attr_type (insn
))
32468 /* Length in units of the trampoline for entering a nested function. */
32471 rs6000_trampoline_size (void)
32475 switch (DEFAULT_ABI
)
32478 gcc_unreachable ();
32481 ret
= (TARGET_32BIT
) ? 12 : 24;
32485 gcc_assert (!TARGET_32BIT
);
32491 ret
= (TARGET_32BIT
) ? 40 : 48;
32498 /* Emit RTL insns to initialize the variable parts of a trampoline.
32499 FNADDR is an RTX for the address of the function's pure code.
32500 CXT is an RTX for the static chain value for the function. */
32503 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
32505 int regsize
= (TARGET_32BIT
) ? 4 : 8;
32506 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
32507 rtx ctx_reg
= force_reg (Pmode
, cxt
);
32508 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
32510 switch (DEFAULT_ABI
)
32513 gcc_unreachable ();
32515 /* Under AIX, just build the 3 word function descriptor */
32518 rtx fnmem
, fn_reg
, toc_reg
;
32520 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
32521 error ("You cannot take the address of a nested function if you use "
32522 "the -mno-pointers-to-nested-functions option.");
32524 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
32525 fn_reg
= gen_reg_rtx (Pmode
);
32526 toc_reg
= gen_reg_rtx (Pmode
);
32528 /* Macro to shorten the code expansions below. */
32529 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32531 m_tramp
= replace_equiv_address (m_tramp
, addr
);
32533 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
32534 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
32535 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
32536 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
32537 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
32543 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
32548 LCT_NORMAL
, VOIDmode
, 4,
32550 GEN_INT (rs6000_trampoline_size ()), SImode
,
32558 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32559 identifier as an argument, so the front end shouldn't look it up. */
32562 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
32564 return is_attribute_p ("altivec", attr_id
);
32567 /* Handle the "altivec" attribute. The attribute may have
32568 arguments as follows:
32570 __attribute__((altivec(vector__)))
32571 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32572 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32574 and may appear more than once (e.g., 'vector bool char') in a
32575 given declaration. */
32578 rs6000_handle_altivec_attribute (tree
*node
,
32579 tree name ATTRIBUTE_UNUSED
,
32581 int flags ATTRIBUTE_UNUSED
,
32582 bool *no_add_attrs
)
32584 tree type
= *node
, result
= NULL_TREE
;
32588 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
32589 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
32590 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
32593 while (POINTER_TYPE_P (type
)
32594 || TREE_CODE (type
) == FUNCTION_TYPE
32595 || TREE_CODE (type
) == METHOD_TYPE
32596 || TREE_CODE (type
) == ARRAY_TYPE
)
32597 type
= TREE_TYPE (type
);
32599 mode
= TYPE_MODE (type
);
32601 /* Check for invalid AltiVec type qualifiers. */
32602 if (type
== long_double_type_node
)
32603 error ("use of %<long double%> in AltiVec types is invalid");
32604 else if (type
== boolean_type_node
)
32605 error ("use of boolean types in AltiVec types is invalid");
32606 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
32607 error ("use of %<complex%> in AltiVec types is invalid");
32608 else if (DECIMAL_FLOAT_MODE_P (mode
))
32609 error ("use of decimal floating point types in AltiVec types is invalid");
32610 else if (!TARGET_VSX
)
32612 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
32615 error ("use of %<long%> in AltiVec types is invalid for "
32616 "64-bit code without -mvsx");
32617 else if (rs6000_warn_altivec_long
)
32618 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32621 else if (type
== long_long_unsigned_type_node
32622 || type
== long_long_integer_type_node
)
32623 error ("use of %<long long%> in AltiVec types is invalid without "
32625 else if (type
== double_type_node
)
32626 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
32629 switch (altivec_type
)
32632 unsigned_p
= TYPE_UNSIGNED (type
);
32636 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
32639 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
32642 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
32645 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
32648 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
32650 case SFmode
: result
= V4SF_type_node
; break;
32651 case DFmode
: result
= V2DF_type_node
; break;
32652 /* If the user says 'vector int bool', we may be handed the 'bool'
32653 attribute _before_ the 'vector' attribute, and so select the
32654 proper type in the 'b' case below. */
32655 case V4SImode
: case V8HImode
: case V16QImode
: case V4SFmode
:
32656 case V2DImode
: case V2DFmode
:
32664 case DImode
: case V2DImode
: result
= bool_V2DI_type_node
; break;
32665 case SImode
: case V4SImode
: result
= bool_V4SI_type_node
; break;
32666 case HImode
: case V8HImode
: result
= bool_V8HI_type_node
; break;
32667 case QImode
: case V16QImode
: result
= bool_V16QI_type_node
;
32674 case V8HImode
: result
= pixel_V8HI_type_node
;
32680 /* Propagate qualifiers attached to the element type
32681 onto the vector type. */
32682 if (result
&& result
!= type
&& TYPE_QUALS (type
))
32683 result
= build_qualified_type (result
, TYPE_QUALS (type
));
32685 *no_add_attrs
= true; /* No need to hang on to the attribute. */
32688 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
32693 /* AltiVec defines four built-in scalar types that serve as vector
32694 elements; we must teach the compiler how to mangle them. */
32696 static const char *
32697 rs6000_mangle_type (const_tree type
)
32699 type
= TYPE_MAIN_VARIANT (type
);
32701 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
32702 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
32705 if (type
== bool_char_type_node
) return "U6__boolc";
32706 if (type
== bool_short_type_node
) return "U6__bools";
32707 if (type
== pixel_type_node
) return "u7__pixel";
32708 if (type
== bool_int_type_node
) return "U6__booli";
32709 if (type
== bool_long_type_node
) return "U6__booll";
32711 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32712 "g" for IBM extended double, no matter whether it is long double (using
32713 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32714 if (TARGET_FLOAT128_TYPE
)
32716 if (type
== ieee128_float_type_node
)
32717 return "U10__float128";
32719 if (type
== ibm128_float_type_node
)
32722 if (type
== long_double_type_node
&& TARGET_LONG_DOUBLE_128
)
32723 return (TARGET_IEEEQUAD
) ? "U10__float128" : "g";
32726 /* Mangle IBM extended float long double as `g' (__float128) on
32727 powerpc*-linux where long-double-64 previously was the default. */
32728 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
32730 && TARGET_LONG_DOUBLE_128
32731 && !TARGET_IEEEQUAD
)
32734 /* For all other types, use normal C++ mangling. */
32738 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32739 struct attribute_spec.handler. */
32742 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
32743 tree args ATTRIBUTE_UNUSED
,
32744 int flags ATTRIBUTE_UNUSED
,
32745 bool *no_add_attrs
)
32747 if (TREE_CODE (*node
) != FUNCTION_TYPE
32748 && TREE_CODE (*node
) != FIELD_DECL
32749 && TREE_CODE (*node
) != TYPE_DECL
)
32751 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32753 *no_add_attrs
= true;
32759 /* Set longcall attributes on all functions declared when
32760 rs6000_default_long_calls is true. */
32762 rs6000_set_default_type_attributes (tree type
)
32764 if (rs6000_default_long_calls
32765 && (TREE_CODE (type
) == FUNCTION_TYPE
32766 || TREE_CODE (type
) == METHOD_TYPE
))
32767 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
32769 TYPE_ATTRIBUTES (type
));
32772 darwin_set_default_type_attributes (type
);
32776 /* Return a reference suitable for calling a function with the
32777 longcall attribute. */
32780 rs6000_longcall_ref (rtx call_ref
)
32782 const char *call_name
;
32785 if (GET_CODE (call_ref
) != SYMBOL_REF
)
32788 /* System V adds '.' to the internal name, so skip them. */
32789 call_name
= XSTR (call_ref
, 0);
32790 if (*call_name
== '.')
32792 while (*call_name
== '.')
32795 node
= get_identifier (call_name
);
32796 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
32799 return force_reg (Pmode
, call_ref
);
32802 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32803 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32806 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32807 struct attribute_spec.handler. */
32809 rs6000_handle_struct_attribute (tree
*node
, tree name
,
32810 tree args ATTRIBUTE_UNUSED
,
32811 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32814 if (DECL_P (*node
))
32816 if (TREE_CODE (*node
) == TYPE_DECL
)
32817 type
= &TREE_TYPE (*node
);
32822 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
32823 || TREE_CODE (*type
) == UNION_TYPE
)))
32825 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
32826 *no_add_attrs
= true;
32829 else if ((is_attribute_p ("ms_struct", name
)
32830 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32831 || ((is_attribute_p ("gcc_struct", name
)
32832 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32834 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32836 *no_add_attrs
= true;
32843 rs6000_ms_bitfield_layout_p (const_tree record_type
)
32845 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
32846 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32847 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
32850 #ifdef USING_ELFOS_H
32852 /* A get_unnamed_section callback, used for switching to toc_section. */
32855 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
32857 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
32858 && TARGET_MINIMAL_TOC
)
32860 if (!toc_initialized
)
32862 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
32863 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
32864 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
32865 fprintf (asm_out_file
, "\t.tc ");
32866 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
32867 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
32868 fprintf (asm_out_file
, "\n");
32870 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
32871 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
32872 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
32873 fprintf (asm_out_file
, " = .+32768\n");
32874 toc_initialized
= 1;
32877 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
32879 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
32881 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
32882 if (!toc_initialized
)
32884 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
32885 toc_initialized
= 1;
32890 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
32891 if (!toc_initialized
)
32893 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
32894 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
32895 fprintf (asm_out_file
, " = .+32768\n");
32896 toc_initialized
= 1;
32901 /* Implement TARGET_ASM_INIT_SECTIONS. */
32904 rs6000_elf_asm_init_sections (void)
32907 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
32910 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
32911 SDATA2_SECTION_ASM_OP
);
32914 /* Implement TARGET_SELECT_RTX_SECTION. */
32917 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
32918 unsigned HOST_WIDE_INT align
)
32920 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
32921 return toc_section
;
32923 return default_elf_select_rtx_section (mode
, x
, align
);
32926 /* For a SYMBOL_REF, set generic flags and then perform some
32927 target-specific processing.
32929 When the AIX ABI is requested on a non-AIX system, replace the
32930 function name with the real name (with a leading .) rather than the
32931 function descriptor name. This saves a lot of overriding code to
32932 read the prefixes. */
32934 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
32936 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
32938 default_encode_section_info (decl
, rtl
, first
);
32941 && TREE_CODE (decl
) == FUNCTION_DECL
32943 && DEFAULT_ABI
== ABI_AIX
)
32945 rtx sym_ref
= XEXP (rtl
, 0);
32946 size_t len
= strlen (XSTR (sym_ref
, 0));
32947 char *str
= XALLOCAVEC (char, len
+ 2);
32949 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
32950 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
32955 compare_section_name (const char *section
, const char *templ
)
32959 len
= strlen (templ
);
32960 return (strncmp (section
, templ
, len
) == 0
32961 && (section
[len
] == 0 || section
[len
] == '.'));
32965 rs6000_elf_in_small_data_p (const_tree decl
)
32967 if (rs6000_sdata
== SDATA_NONE
)
32970 /* We want to merge strings, so we never consider them small data. */
32971 if (TREE_CODE (decl
) == STRING_CST
)
32974 /* Functions are never in the small data area. */
32975 if (TREE_CODE (decl
) == FUNCTION_DECL
)
32978 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
32980 const char *section
= DECL_SECTION_NAME (decl
);
32981 if (compare_section_name (section
, ".sdata")
32982 || compare_section_name (section
, ".sdata2")
32983 || compare_section_name (section
, ".gnu.linkonce.s")
32984 || compare_section_name (section
, ".sbss")
32985 || compare_section_name (section
, ".sbss2")
32986 || compare_section_name (section
, ".gnu.linkonce.sb")
32987 || strcmp (section
, ".PPC.EMB.sdata0") == 0
32988 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
32993 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
32996 && size
<= g_switch_value
32997 /* If it's not public, and we're not going to reference it there,
32998 there's no need to put it in the small data section. */
32999 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
33006 #endif /* USING_ELFOS_H */
33008 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33011 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
33013 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
33016 /* Do not place thread-local symbols refs in the object blocks. */
33019 rs6000_use_blocks_for_decl_p (const_tree decl
)
33021 return !DECL_THREAD_LOCAL_P (decl
);
33024 /* Return a REG that occurs in ADDR with coefficient 1.
33025 ADDR can be effectively incremented by incrementing REG.
33027 r0 is special and we must not select it as an address
33028 register by this routine since our caller will try to
33029 increment the returned register via an "la" instruction. */
33032 find_addr_reg (rtx addr
)
33034 while (GET_CODE (addr
) == PLUS
)
33036 if (GET_CODE (XEXP (addr
, 0)) == REG
33037 && REGNO (XEXP (addr
, 0)) != 0)
33038 addr
= XEXP (addr
, 0);
33039 else if (GET_CODE (XEXP (addr
, 1)) == REG
33040 && REGNO (XEXP (addr
, 1)) != 0)
33041 addr
= XEXP (addr
, 1);
33042 else if (CONSTANT_P (XEXP (addr
, 0)))
33043 addr
= XEXP (addr
, 1);
33044 else if (CONSTANT_P (XEXP (addr
, 1)))
33045 addr
= XEXP (addr
, 0);
33047 gcc_unreachable ();
33049 gcc_assert (GET_CODE (addr
) == REG
&& REGNO (addr
) != 0);
33054 rs6000_fatal_bad_address (rtx op
)
33056 fatal_insn ("bad address", op
);
33061 typedef struct branch_island_d
{
33062 tree function_name
;
33068 static vec
<branch_island
, va_gc
> *branch_islands
;
33070 /* Remember to generate a branch island for far calls to the given
33074 add_compiler_branch_island (tree label_name
, tree function_name
,
33077 branch_island bi
= {function_name
, label_name
, line_number
};
33078 vec_safe_push (branch_islands
, bi
);
33081 /* Generate far-jump branch islands for everything recorded in
33082 branch_islands. Invoked immediately after the last instruction of
33083 the epilogue has been emitted; the branch islands must be appended
33084 to, and contiguous with, the function body. Mach-O stubs are
33085 generated in machopic_output_stub(). */
33088 macho_branch_islands (void)
33092 while (!vec_safe_is_empty (branch_islands
))
33094 branch_island
*bi
= &branch_islands
->last ();
33095 const char *label
= IDENTIFIER_POINTER (bi
->label_name
);
33096 const char *name
= IDENTIFIER_POINTER (bi
->function_name
);
33097 char name_buf
[512];
33098 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33099 if (name
[0] == '*' || name
[0] == '&')
33100 strcpy (name_buf
, name
+1);
33104 strcpy (name_buf
+1, name
);
33106 strcpy (tmp_buf
, "\n");
33107 strcat (tmp_buf
, label
);
33108 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33109 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
33110 dbxout_stabd (N_SLINE
, bi
->line_number
);
33111 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33114 if (TARGET_LINK_STACK
)
33117 get_ppc476_thunk_name (name
);
33118 strcat (tmp_buf
, ":\n\tmflr r0\n\tbl ");
33119 strcat (tmp_buf
, name
);
33120 strcat (tmp_buf
, "\n");
33121 strcat (tmp_buf
, label
);
33122 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
33126 strcat (tmp_buf
, ":\n\tmflr r0\n\tbcl 20,31,");
33127 strcat (tmp_buf
, label
);
33128 strcat (tmp_buf
, "_pic\n");
33129 strcat (tmp_buf
, label
);
33130 strcat (tmp_buf
, "_pic:\n\tmflr r11\n");
33133 strcat (tmp_buf
, "\taddis r11,r11,ha16(");
33134 strcat (tmp_buf
, name_buf
);
33135 strcat (tmp_buf
, " - ");
33136 strcat (tmp_buf
, label
);
33137 strcat (tmp_buf
, "_pic)\n");
33139 strcat (tmp_buf
, "\tmtlr r0\n");
33141 strcat (tmp_buf
, "\taddi r12,r11,lo16(");
33142 strcat (tmp_buf
, name_buf
);
33143 strcat (tmp_buf
, " - ");
33144 strcat (tmp_buf
, label
);
33145 strcat (tmp_buf
, "_pic)\n");
33147 strcat (tmp_buf
, "\tmtctr r12\n\tbctr\n");
33151 strcat (tmp_buf
, ":\nlis r12,hi16(");
33152 strcat (tmp_buf
, name_buf
);
33153 strcat (tmp_buf
, ")\n\tori r12,r12,lo16(");
33154 strcat (tmp_buf
, name_buf
);
33155 strcat (tmp_buf
, ")\n\tmtctr r12\n\tbctr");
33157 output_asm_insn (tmp_buf
, 0);
33158 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33159 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
33160 dbxout_stabd (N_SLINE
, bi
->line_number
);
33161 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33162 branch_islands
->pop ();
33166 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33167 already there or not. */
33170 no_previous_def (tree function_name
)
33175 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
33176 if (function_name
== bi
->function_name
)
33181 /* GET_PREV_LABEL gets the label name from the previous definition of
33185 get_prev_label (tree function_name
)
33190 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
33191 if (function_name
== bi
->function_name
)
33192 return bi
->label_name
;
33196 /* INSN is either a function call or a millicode call. It may have an
33197 unconditional jump in its delay slot.
33199 CALL_DEST is the routine we are calling. */
33202 output_call (rtx_insn
*insn
, rtx
*operands
, int dest_operand_number
,
33203 int cookie_operand_number
)
33205 static char buf
[256];
33206 if (darwin_emit_branch_islands
33207 && GET_CODE (operands
[dest_operand_number
]) == SYMBOL_REF
33208 && (INTVAL (operands
[cookie_operand_number
]) & CALL_LONG
))
33211 tree funname
= get_identifier (XSTR (operands
[dest_operand_number
], 0));
33213 if (no_previous_def (funname
))
33215 rtx label_rtx
= gen_label_rtx ();
33216 char *label_buf
, temp_buf
[256];
33217 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
33218 CODE_LABEL_NUMBER (label_rtx
));
33219 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
33220 labelname
= get_identifier (label_buf
);
33221 add_compiler_branch_island (labelname
, funname
, insn_line (insn
));
33224 labelname
= get_prev_label (funname
);
33226 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33227 instruction will reach 'foo', otherwise link as 'bl L42'".
33228 "L42" should be a 'branch island', that will do a far jump to
33229 'foo'. Branch islands are generated in
33230 macho_branch_islands(). */
33231 sprintf (buf
, "jbsr %%z%d,%.246s",
33232 dest_operand_number
, IDENTIFIER_POINTER (labelname
));
33235 sprintf (buf
, "bl %%z%d", dest_operand_number
);
33239 /* Generate PIC and indirect symbol stubs. */
33242 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
33244 unsigned int length
;
33245 char *symbol_name
, *lazy_ptr_name
;
33246 char *local_label_0
;
33247 static int label
= 0;
33249 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33250 symb
= (*targetm
.strip_name_encoding
) (symb
);
33253 length
= strlen (symb
);
33254 symbol_name
= XALLOCAVEC (char, length
+ 32);
33255 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
33257 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
33258 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
33261 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
33263 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
33267 fprintf (file
, "\t.align 5\n");
33269 fprintf (file
, "%s:\n", stub
);
33270 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
33273 local_label_0
= XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33274 sprintf (local_label_0
, "\"L%011d$spb\"", label
);
33276 fprintf (file
, "\tmflr r0\n");
33277 if (TARGET_LINK_STACK
)
33280 get_ppc476_thunk_name (name
);
33281 fprintf (file
, "\tbl %s\n", name
);
33282 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
33286 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
33287 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
33289 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
33290 lazy_ptr_name
, local_label_0
);
33291 fprintf (file
, "\tmtlr r0\n");
33292 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
33293 (TARGET_64BIT
? "ldu" : "lwzu"),
33294 lazy_ptr_name
, local_label_0
);
33295 fprintf (file
, "\tmtctr r12\n");
33296 fprintf (file
, "\tbctr\n");
33300 fprintf (file
, "\t.align 4\n");
33302 fprintf (file
, "%s:\n", stub
);
33303 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
33305 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
33306 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
33307 (TARGET_64BIT
? "ldu" : "lwzu"),
33309 fprintf (file
, "\tmtctr r12\n");
33310 fprintf (file
, "\tbctr\n");
33313 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
33314 fprintf (file
, "%s:\n", lazy_ptr_name
);
33315 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
33316 fprintf (file
, "%sdyld_stub_binding_helper\n",
33317 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
33320 /* Legitimize PIC addresses. If the address is already
33321 position-independent, we return ORIG. Newly generated
33322 position-independent addresses go into a reg. This is REG if non
33323 zero, otherwise we allocate register(s) as necessary. */
33325 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33328 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
33333 if (reg
== NULL
&& ! reload_in_progress
&& ! reload_completed
)
33334 reg
= gen_reg_rtx (Pmode
);
33336 if (GET_CODE (orig
) == CONST
)
33340 if (GET_CODE (XEXP (orig
, 0)) == PLUS
33341 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
33344 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
33346 /* Use a different reg for the intermediate value, as
33347 it will be marked UNCHANGING. */
33348 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
33349 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
33352 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
33355 if (GET_CODE (offset
) == CONST_INT
)
33357 if (SMALL_INT (offset
))
33358 return plus_constant (Pmode
, base
, INTVAL (offset
));
33359 else if (! reload_in_progress
&& ! reload_completed
)
33360 offset
= force_reg (Pmode
, offset
);
33363 rtx mem
= force_const_mem (Pmode
, orig
);
33364 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
33367 return gen_rtx_PLUS (Pmode
, base
, offset
);
33370 /* Fall back on generic machopic code. */
33371 return machopic_legitimize_pic_address (orig
, mode
, reg
);
33374 /* Output a .machine directive for the Darwin assembler, and call
33375 the generic start_file routine. */
33378 rs6000_darwin_file_start (void)
33380 static const struct
33384 HOST_WIDE_INT if_set
;
33386 { "ppc64", "ppc64", MASK_64BIT
},
33387 { "970", "ppc970", MASK_PPC_GPOPT
| MASK_MFCRF
| MASK_POWERPC64
},
33388 { "power4", "ppc970", 0 },
33389 { "G5", "ppc970", 0 },
33390 { "7450", "ppc7450", 0 },
33391 { "7400", "ppc7400", MASK_ALTIVEC
},
33392 { "G4", "ppc7400", 0 },
33393 { "750", "ppc750", 0 },
33394 { "740", "ppc750", 0 },
33395 { "G3", "ppc750", 0 },
33396 { "604e", "ppc604e", 0 },
33397 { "604", "ppc604", 0 },
33398 { "603e", "ppc603", 0 },
33399 { "603", "ppc603", 0 },
33400 { "601", "ppc601", 0 },
33401 { NULL
, "ppc", 0 } };
33402 const char *cpu_id
= "";
33405 rs6000_file_start ();
33406 darwin_file_start ();
33408 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33410 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
33411 cpu_id
= rs6000_default_cpu
;
33413 if (global_options_set
.x_rs6000_cpu_index
)
33414 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
33416 /* Look through the mapping array. Pick the first name that either
33417 matches the argument, has a bit set in IF_SET that is also set
33418 in the target flags, or has a NULL name. */
33421 while (mapping
[i
].arg
!= NULL
33422 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
33423 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
33426 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
33429 #endif /* TARGET_MACHO */
33433 rs6000_elf_reloc_rw_mask (void)
33437 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
33443 /* Record an element in the table of global constructors. SYMBOL is
33444 a SYMBOL_REF of the function to be called; PRIORITY is a number
33445 between 0 and MAX_INIT_PRIORITY.
33447 This differs from default_named_section_asm_out_constructor in
33448 that we have special handling for -mrelocatable. */
33450 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
33452 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
33454 const char *section
= ".ctors";
33457 if (priority
!= DEFAULT_INIT_PRIORITY
)
33459 sprintf (buf
, ".ctors.%.5u",
33460 /* Invert the numbering so the linker puts us in the proper
33461 order; constructors are run from right to left, and the
33462 linker sorts in increasing order. */
33463 MAX_INIT_PRIORITY
- priority
);
33467 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
33468 assemble_align (POINTER_SIZE
);
33470 if (DEFAULT_ABI
== ABI_V4
33471 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
33473 fputs ("\t.long (", asm_out_file
);
33474 output_addr_const (asm_out_file
, symbol
);
33475 fputs (")@fixup\n", asm_out_file
);
33478 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
33481 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
33483 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
33485 const char *section
= ".dtors";
33488 if (priority
!= DEFAULT_INIT_PRIORITY
)
33490 sprintf (buf
, ".dtors.%.5u",
33491 /* Invert the numbering so the linker puts us in the proper
33492 order; constructors are run from right to left, and the
33493 linker sorts in increasing order. */
33494 MAX_INIT_PRIORITY
- priority
);
33498 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
33499 assemble_align (POINTER_SIZE
);
33501 if (DEFAULT_ABI
== ABI_V4
33502 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
33504 fputs ("\t.long (", asm_out_file
);
33505 output_addr_const (asm_out_file
, symbol
);
33506 fputs (")@fixup\n", asm_out_file
);
33509 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
33513 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
33515 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
33517 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
33518 ASM_OUTPUT_LABEL (file
, name
);
33519 fputs (DOUBLE_INT_ASM_OP
, file
);
33520 rs6000_output_function_entry (file
, name
);
33521 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
33524 fputs ("\t.size\t", file
);
33525 assemble_name (file
, name
);
33526 fputs (",24\n\t.type\t.", file
);
33527 assemble_name (file
, name
);
33528 fputs (",@function\n", file
);
33529 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
33531 fputs ("\t.globl\t.", file
);
33532 assemble_name (file
, name
);
33537 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
33538 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
33539 rs6000_output_function_entry (file
, name
);
33540 fputs (":\n", file
);
33544 if (DEFAULT_ABI
== ABI_V4
33545 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
33546 && !TARGET_SECURE_PLT
33547 && (!constant_pool_empty_p () || crtl
->profile
)
33552 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
33554 fprintf (file
, "\t.long ");
33555 assemble_name (file
, toc_label_name
);
33558 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
33559 assemble_name (file
, buf
);
33563 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
33564 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
33566 if (TARGET_CMODEL
== CMODEL_LARGE
&& rs6000_global_entry_point_needed_p ())
33570 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
33572 fprintf (file
, "\t.quad .TOC.-");
33573 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
33574 assemble_name (file
, buf
);
33578 if (DEFAULT_ABI
== ABI_AIX
)
33580 const char *desc_name
, *orig_name
;
33582 orig_name
= (*targetm
.strip_name_encoding
) (name
);
33583 desc_name
= orig_name
;
33584 while (*desc_name
== '.')
33587 if (TREE_PUBLIC (decl
))
33588 fprintf (file
, "\t.globl %s\n", desc_name
);
33590 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
33591 fprintf (file
, "%s:\n", desc_name
);
33592 fprintf (file
, "\t.long %s\n", orig_name
);
33593 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
33594 fputs ("\t.long 0\n", file
);
33595 fprintf (file
, "\t.previous\n");
33597 ASM_OUTPUT_LABEL (file
, name
);
33600 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
33602 rs6000_elf_file_end (void)
33604 #ifdef HAVE_AS_GNU_ATTRIBUTE
33605 /* ??? The value emitted depends on options active at file end.
33606 Assume anyone using #pragma or attributes that might change
33607 options knows what they are doing. */
33608 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
33609 && rs6000_passes_float
)
33615 else if (TARGET_SF_FPR
)
33619 if (rs6000_passes_long_double
)
33621 if (!TARGET_LONG_DOUBLE_128
)
33623 else if (TARGET_IEEEQUAD
)
33628 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
33630 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
33632 if (rs6000_passes_vector
)
33633 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
33634 (TARGET_ALTIVEC_ABI
? 2 : 1));
33635 if (rs6000_returns_struct
)
33636 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
33637 aix_struct_return
? 2 : 1);
33640 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33641 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
33642 file_end_indicate_exec_stack ();
33645 if (flag_split_stack
)
33646 file_end_indicate_split_stack ();
33650 /* We have expanded a CPU builtin, so we need to emit a reference to
33651 the special symbol that LIBC uses to declare it supports the
33652 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33653 switch_to_section (data_section
);
33654 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
33655 fprintf (asm_out_file
, "\t%s %s\n",
33656 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
33663 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33664 #define HAVE_XCOFF_DWARF_EXTRAS 0
33667 static enum unwind_info_type
33668 rs6000_xcoff_debug_unwind_info (void)
33674 rs6000_xcoff_asm_output_anchor (rtx symbol
)
33678 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
33679 SYMBOL_REF_BLOCK_OFFSET (symbol
));
33680 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
33681 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
33682 fprintf (asm_out_file
, ",");
33683 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
33684 fprintf (asm_out_file
, "\n");
33688 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
33690 fputs (GLOBAL_ASM_OP
, stream
);
33691 RS6000_OUTPUT_BASENAME (stream
, name
);
33692 putc ('\n', stream
);
33695 /* A get_unnamed_decl callback, used for read-only sections. PTR
33696 points to the section string variable. */
33699 rs6000_xcoff_output_readonly_section_asm_op (const void *directive
)
33701 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
33702 *(const char *const *) directive
,
33703 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
33706 /* Likewise for read-write sections. */
33709 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive
)
33711 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
33712 *(const char *const *) directive
,
33713 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
33717 rs6000_xcoff_output_tls_section_asm_op (const void *directive
)
33719 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
33720 *(const char *const *) directive
,
33721 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
33724 /* A get_unnamed_section callback, used for switching to toc_section. */
33727 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
33729 if (TARGET_MINIMAL_TOC
)
33731 /* toc_section is always selected at least once from
33732 rs6000_xcoff_file_start, so this is guaranteed to
33733 always be defined once and only once in each file. */
33734 if (!toc_initialized
)
33736 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
33737 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
33738 toc_initialized
= 1;
33740 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
33741 (TARGET_32BIT
? "" : ",3"));
33744 fputs ("\t.toc\n", asm_out_file
);
33747 /* Implement TARGET_ASM_INIT_SECTIONS. */
33750 rs6000_xcoff_asm_init_sections (void)
33752 read_only_data_section
33753 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
33754 &xcoff_read_only_section_name
);
33756 private_data_section
33757 = get_unnamed_section (SECTION_WRITE
,
33758 rs6000_xcoff_output_readwrite_section_asm_op
,
33759 &xcoff_private_data_section_name
);
33762 = get_unnamed_section (SECTION_TLS
,
33763 rs6000_xcoff_output_tls_section_asm_op
,
33764 &xcoff_tls_data_section_name
);
33766 tls_private_data_section
33767 = get_unnamed_section (SECTION_TLS
,
33768 rs6000_xcoff_output_tls_section_asm_op
,
33769 &xcoff_private_data_section_name
);
33771 read_only_private_data_section
33772 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
33773 &xcoff_private_data_section_name
);
33776 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
33778 readonly_data_section
= read_only_data_section
;
33782 rs6000_xcoff_reloc_rw_mask (void)
33788 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
33789 tree decl ATTRIBUTE_UNUSED
)
33792 static const char * const suffix
[5] = { "PR", "RO", "RW", "TL", "XO" };
33794 if (flags
& SECTION_EXCLUDE
)
33796 else if (flags
& SECTION_DEBUG
)
33798 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
33801 else if (flags
& SECTION_CODE
)
33803 else if (flags
& SECTION_TLS
)
33805 else if (flags
& SECTION_WRITE
)
33810 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
33811 (flags
& SECTION_CODE
) ? "." : "",
33812 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
33815 #define IN_NAMED_SECTION(DECL) \
33816 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33817 && DECL_SECTION_NAME (DECL) != NULL)
33820 rs6000_xcoff_select_section (tree decl
, int reloc
,
33821 unsigned HOST_WIDE_INT align
)
33823 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33825 if (align
> BIGGEST_ALIGNMENT
)
33827 resolve_unique_section (decl
, reloc
, true);
33828 if (IN_NAMED_SECTION (decl
))
33829 return get_named_section (decl
, NULL
, reloc
);
33832 if (decl_readonly_section (decl
, reloc
))
33834 if (TREE_PUBLIC (decl
))
33835 return read_only_data_section
;
33837 return read_only_private_data_section
;
33842 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
33844 if (TREE_PUBLIC (decl
))
33845 return tls_data_section
;
33846 else if (bss_initializer_p (decl
))
33848 /* Convert to COMMON to emit in BSS. */
33849 DECL_COMMON (decl
) = 1;
33850 return tls_comm_section
;
33853 return tls_private_data_section
;
33857 if (TREE_PUBLIC (decl
))
33858 return data_section
;
33860 return private_data_section
;
33865 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
33869 /* Use select_section for private data and uninitialized data with
33870 alignment <= BIGGEST_ALIGNMENT. */
33871 if (!TREE_PUBLIC (decl
)
33872 || DECL_COMMON (decl
)
33873 || (DECL_INITIAL (decl
) == NULL_TREE
33874 && DECL_ALIGN (decl
) <= BIGGEST_ALIGNMENT
)
33875 || DECL_INITIAL (decl
) == error_mark_node
33876 || (flag_zero_initialized_in_bss
33877 && initializer_zerop (DECL_INITIAL (decl
))))
33880 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
33881 name
= (*targetm
.strip_name_encoding
) (name
);
33882 set_decl_section_name (decl
, name
);
33885 /* Select section for constant in constant pool.
33887 On RS/6000, all constants are in the private read-only data area.
33888 However, if this is being placed in the TOC it must be output as a
33892 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
33893 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
33895 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
33896 return toc_section
;
33898 return read_only_private_data_section
;
33901 /* Remove any trailing [DS] or the like from the symbol name. */
33903 static const char *
33904 rs6000_xcoff_strip_name_encoding (const char *name
)
33909 len
= strlen (name
);
33910 if (name
[len
- 1] == ']')
33911 return ggc_alloc_string (name
, len
- 4);
33916 /* Section attributes. AIX is always PIC. */
33918 static unsigned int
33919 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
33921 unsigned int align
;
33922 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
33924 /* Align to at least UNIT size. */
33925 if ((flags
& SECTION_CODE
) != 0 || !decl
|| !DECL_P (decl
))
33926 align
= MIN_UNITS_PER_WORD
;
33928 /* Increase alignment of large objects if not already stricter. */
33929 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
33930 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
33931 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
33933 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
33936 /* Output at beginning of assembler file.
33938 Initialize the section names for the RS/6000 at this point.
33940 Specify filename, including full path, to assembler.
33942 We want to go into the TOC section so at least one .toc will be emitted.
33943 Also, in order to output proper .bs/.es pairs, we need at least one static
33944 [RW] section emitted.
33946 Finally, declare mcount when profiling to make the assembler happy. */
33949 rs6000_xcoff_file_start (void)
33951 rs6000_gen_section_name (&xcoff_bss_section_name
,
33952 main_input_filename
, ".bss_");
33953 rs6000_gen_section_name (&xcoff_private_data_section_name
,
33954 main_input_filename
, ".rw_");
33955 rs6000_gen_section_name (&xcoff_read_only_section_name
,
33956 main_input_filename
, ".ro_");
33957 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
33958 main_input_filename
, ".tls_");
33959 rs6000_gen_section_name (&xcoff_tbss_section_name
,
33960 main_input_filename
, ".tbss_[UL]");
33962 fputs ("\t.file\t", asm_out_file
);
33963 output_quoted_string (asm_out_file
, main_input_filename
);
33964 fputc ('\n', asm_out_file
);
33965 if (write_symbols
!= NO_DEBUG
)
33966 switch_to_section (private_data_section
);
33967 switch_to_section (toc_section
);
33968 switch_to_section (text_section
);
33970 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
33971 rs6000_file_start ();
33974 /* Output at end of assembler file.
33975 On the RS/6000, referencing data should automatically pull in text. */
33978 rs6000_xcoff_file_end (void)
33980 switch_to_section (text_section
);
33981 fputs ("_section_.text:\n", asm_out_file
);
33982 switch_to_section (data_section
);
33983 fputs (TARGET_32BIT
33984 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33988 struct declare_alias_data
33991 bool function_descriptor
;
33994 /* Declare alias N. A helper function for for_node_and_aliases. */
33997 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
33999 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
34000 /* Main symbol is output specially, because varasm machinery does part of
34001 the job for us - we do not need to declare .globl/lglobs and such. */
34002 if (!n
->alias
|| n
->weakref
)
34005 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
34008 /* Prevent assemble_alias from trying to use .set pseudo operation
34009 that does not behave as expected by the middle-end. */
34010 TREE_ASM_WRITTEN (n
->decl
) = true;
34012 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
34013 char *buffer
= (char *) alloca (strlen (name
) + 2);
34015 int dollar_inside
= 0;
34017 strcpy (buffer
, name
);
34018 p
= strchr (buffer
, '$');
34022 p
= strchr (p
+ 1, '$');
34024 if (TREE_PUBLIC (n
->decl
))
34026 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
34028 if (dollar_inside
) {
34029 if (data
->function_descriptor
)
34030 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
34031 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
34033 if (data
->function_descriptor
)
34035 fputs ("\t.globl .", data
->file
);
34036 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
34037 putc ('\n', data
->file
);
34039 fputs ("\t.globl ", data
->file
);
34040 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
34041 putc ('\n', data
->file
);
34043 #ifdef ASM_WEAKEN_DECL
34044 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
34045 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
34052 if (data
->function_descriptor
)
34053 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
34054 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
34056 if (data
->function_descriptor
)
34058 fputs ("\t.lglobl .", data
->file
);
34059 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
34060 putc ('\n', data
->file
);
34062 fputs ("\t.lglobl ", data
->file
);
34063 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
34064 putc ('\n', data
->file
);
34066 if (data
->function_descriptor
)
34067 fputs (".", data
->file
);
34068 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
34069 fputs (":\n", data
->file
);
34074 #ifdef HAVE_GAS_HIDDEN
34075 /* Helper function to calculate visibility of a DECL
34076 and return the value as a const string. */
34078 static const char *
34079 rs6000_xcoff_visibility (tree decl
)
34081 static const char * const visibility_types
[] = {
34082 "", ",protected", ",hidden", ",internal"
34085 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
34087 if (TREE_CODE (decl
) == FUNCTION_DECL
34088 && cgraph_node::get (decl
)
34089 && cgraph_node::get (decl
)->instrumentation_clone
34090 && cgraph_node::get (decl
)->instrumented_version
)
34091 vis
= DECL_VISIBILITY (cgraph_node::get (decl
)->instrumented_version
->decl
);
34093 return visibility_types
[vis
];
34098 /* This macro produces the initial definition of a function name.
34099 On the RS/6000, we need to place an extra '.' in the function name and
34100 output the function descriptor.
34101 Dollar signs are converted to underscores.
34103 The csect for the function will have already been created when
34104 text_section was selected. We do have to go back to that csect, however.
34106 The third and fourth parameters to the .function pseudo-op (16 and 044)
34107 are placeholders which no longer have any use.
34109 Because AIX assembler's .set command has unexpected semantics, we output
34110 all aliases as alternative labels in front of the definition. */
34113 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
34115 char *buffer
= (char *) alloca (strlen (name
) + 1);
34117 int dollar_inside
= 0;
34118 struct declare_alias_data data
= {file
, false};
34120 strcpy (buffer
, name
);
34121 p
= strchr (buffer
, '$');
34125 p
= strchr (p
+ 1, '$');
34127 if (TREE_PUBLIC (decl
))
34129 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
34131 if (dollar_inside
) {
34132 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
34133 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
34135 fputs ("\t.globl .", file
);
34136 RS6000_OUTPUT_BASENAME (file
, buffer
);
34137 #ifdef HAVE_GAS_HIDDEN
34138 fputs (rs6000_xcoff_visibility (decl
), file
);
34145 if (dollar_inside
) {
34146 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
34147 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
34149 fputs ("\t.lglobl .", file
);
34150 RS6000_OUTPUT_BASENAME (file
, buffer
);
34153 fputs ("\t.csect ", file
);
34154 RS6000_OUTPUT_BASENAME (file
, buffer
);
34155 fputs (TARGET_32BIT
? "[DS]\n" : "[DS],3\n", file
);
34156 RS6000_OUTPUT_BASENAME (file
, buffer
);
34157 fputs (":\n", file
);
34158 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
34160 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
34161 RS6000_OUTPUT_BASENAME (file
, buffer
);
34162 fputs (", TOC[tc0], 0\n", file
);
34164 switch_to_section (function_section (decl
));
34166 RS6000_OUTPUT_BASENAME (file
, buffer
);
34167 fputs (":\n", file
);
34168 data
.function_descriptor
= true;
34169 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
34171 if (!DECL_IGNORED_P (decl
))
34173 if (write_symbols
== DBX_DEBUG
|| write_symbols
== XCOFF_DEBUG
)
34174 xcoffout_declare_function (file
, decl
, buffer
);
34175 else if (write_symbols
== DWARF2_DEBUG
)
34177 name
= (*targetm
.strip_name_encoding
) (name
);
34178 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
34185 /* Output assembly language to globalize a symbol from a DECL,
34186 possibly with visibility. */
34189 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
34191 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
34192 fputs (GLOBAL_ASM_OP
, stream
);
34193 RS6000_OUTPUT_BASENAME (stream
, name
);
34194 #ifdef HAVE_GAS_HIDDEN
34195 fputs (rs6000_xcoff_visibility (decl
), stream
);
34197 putc ('\n', stream
);
34200 /* Output assembly language to define a symbol as COMMON from a DECL,
34201 possibly with visibility. */
34204 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
34205 tree decl ATTRIBUTE_UNUSED
,
34207 unsigned HOST_WIDE_INT size
,
34208 unsigned HOST_WIDE_INT align
)
34210 unsigned HOST_WIDE_INT align2
= 2;
34213 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
34217 fputs (COMMON_ASM_OP
, stream
);
34218 RS6000_OUTPUT_BASENAME (stream
, name
);
34221 "," HOST_WIDE_INT_PRINT_UNSIGNED
"," HOST_WIDE_INT_PRINT_UNSIGNED
,
34224 #ifdef HAVE_GAS_HIDDEN
34225 fputs (rs6000_xcoff_visibility (decl
), stream
);
34227 putc ('\n', stream
);
34230 /* This macro produces the initial definition of a object (variable) name.
34231 Because AIX assembler's .set command has unexpected semantics, we output
34232 all aliases as alternative labels in front of the definition. */
34235 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
34237 struct declare_alias_data data
= {file
, false};
34238 RS6000_OUTPUT_BASENAME (file
, name
);
34239 fputs (":\n", file
);
34240 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
34244 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34247 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
34249 fputs (integer_asm_op (size
, FALSE
), file
);
34250 assemble_name (file
, label
);
34251 fputs ("-$", file
);
34254 /* Output a symbol offset relative to the dbase for the current object.
34255 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34258 __gcc_unwind_dbase is embedded in all executables/libraries through
34259 libgcc/config/rs6000/crtdbase.S. */
34262 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
34264 fputs (integer_asm_op (size
, FALSE
), file
);
34265 assemble_name (file
, label
);
34266 fputs("-__gcc_unwind_dbase", file
);
34271 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
34275 const char *symname
;
34277 default_encode_section_info (decl
, rtl
, first
);
34279 /* Careful not to prod global register variables. */
34282 symbol
= XEXP (rtl
, 0);
34283 if (GET_CODE (symbol
) != SYMBOL_REF
)
34286 flags
= SYMBOL_REF_FLAGS (symbol
);
34288 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
34289 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
34291 SYMBOL_REF_FLAGS (symbol
) = flags
;
34293 /* Append mapping class to extern decls. */
34294 symname
= XSTR (symbol
, 0);
34295 if (decl
/* sync condition with assemble_external () */
34296 && DECL_P (decl
) && DECL_EXTERNAL (decl
) && TREE_PUBLIC (decl
)
34297 && ((TREE_CODE (decl
) == VAR_DECL
&& !DECL_THREAD_LOCAL_P (decl
))
34298 || TREE_CODE (decl
) == FUNCTION_DECL
)
34299 && symname
[strlen (symname
) - 1] != ']')
34301 char *newname
= (char *) alloca (strlen (symname
) + 5);
34302 strcpy (newname
, symname
);
34303 strcat (newname
, (TREE_CODE (decl
) == FUNCTION_DECL
34304 ? "[DS]" : "[UA]"));
34305 XSTR (symbol
, 0) = ggc_strdup (newname
);
34308 #endif /* HAVE_AS_TLS */
34309 #endif /* TARGET_XCOFF */
34312 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
34313 const char *name
, const char *val
)
34315 fputs ("\t.weak\t", stream
);
34316 RS6000_OUTPUT_BASENAME (stream
, name
);
34317 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
34318 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
34321 fputs ("[DS]", stream
);
34322 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34324 fputs (rs6000_xcoff_visibility (decl
), stream
);
34326 fputs ("\n\t.weak\t.", stream
);
34327 RS6000_OUTPUT_BASENAME (stream
, name
);
34329 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34331 fputs (rs6000_xcoff_visibility (decl
), stream
);
34333 fputc ('\n', stream
);
34336 #ifdef ASM_OUTPUT_DEF
34337 ASM_OUTPUT_DEF (stream
, name
, val
);
34339 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
34340 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
34342 fputs ("\t.set\t.", stream
);
34343 RS6000_OUTPUT_BASENAME (stream
, name
);
34344 fputs (",.", stream
);
34345 RS6000_OUTPUT_BASENAME (stream
, val
);
34346 fputc ('\n', stream
);
34352 /* Return true if INSN should not be copied. */
34355 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
34357 return recog_memoized (insn
) >= 0
34358 && get_attr_cannot_copy (insn
);
34361 /* Compute a (partial) cost for rtx X. Return true if the complete
34362 cost has been computed, and false if subexpressions should be
34363 scanned. In either case, *TOTAL contains the cost result. */
34366 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
34367 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
34369 int code
= GET_CODE (x
);
34373 /* On the RS/6000, if it is valid in the insn, it is free. */
34375 if (((outer_code
== SET
34376 || outer_code
== PLUS
34377 || outer_code
== MINUS
)
34378 && (satisfies_constraint_I (x
)
34379 || satisfies_constraint_L (x
)))
34380 || (outer_code
== AND
34381 && (satisfies_constraint_K (x
)
34383 ? satisfies_constraint_L (x
)
34384 : satisfies_constraint_J (x
))))
34385 || ((outer_code
== IOR
|| outer_code
== XOR
)
34386 && (satisfies_constraint_K (x
)
34388 ? satisfies_constraint_L (x
)
34389 : satisfies_constraint_J (x
))))
34390 || outer_code
== ASHIFT
34391 || outer_code
== ASHIFTRT
34392 || outer_code
== LSHIFTRT
34393 || outer_code
== ROTATE
34394 || outer_code
== ROTATERT
34395 || outer_code
== ZERO_EXTRACT
34396 || (outer_code
== MULT
34397 && satisfies_constraint_I (x
))
34398 || ((outer_code
== DIV
|| outer_code
== UDIV
34399 || outer_code
== MOD
|| outer_code
== UMOD
)
34400 && exact_log2 (INTVAL (x
)) >= 0)
34401 || (outer_code
== COMPARE
34402 && (satisfies_constraint_I (x
)
34403 || satisfies_constraint_K (x
)))
34404 || ((outer_code
== EQ
|| outer_code
== NE
)
34405 && (satisfies_constraint_I (x
)
34406 || satisfies_constraint_K (x
)
34408 ? satisfies_constraint_L (x
)
34409 : satisfies_constraint_J (x
))))
34410 || (outer_code
== GTU
34411 && satisfies_constraint_I (x
))
34412 || (outer_code
== LTU
34413 && satisfies_constraint_P (x
)))
34418 else if ((outer_code
== PLUS
34419 && reg_or_add_cint_operand (x
, VOIDmode
))
34420 || (outer_code
== MINUS
34421 && reg_or_sub_cint_operand (x
, VOIDmode
))
34422 || ((outer_code
== SET
34423 || outer_code
== IOR
34424 || outer_code
== XOR
)
34426 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
34428 *total
= COSTS_N_INSNS (1);
34434 case CONST_WIDE_INT
:
34438 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34442 /* When optimizing for size, MEM should be slightly more expensive
34443 than generating address, e.g., (plus (reg) (const)).
34444 L1 cache latency is about two instructions. */
34445 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34446 if (SLOW_UNALIGNED_ACCESS (mode
, MEM_ALIGN (x
)))
34447 *total
+= COSTS_N_INSNS (100);
34456 if (FLOAT_MODE_P (mode
))
34457 *total
= rs6000_cost
->fp
;
34459 *total
= COSTS_N_INSNS (1);
34463 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
34464 && satisfies_constraint_I (XEXP (x
, 1)))
34466 if (INTVAL (XEXP (x
, 1)) >= -256
34467 && INTVAL (XEXP (x
, 1)) <= 255)
34468 *total
= rs6000_cost
->mulsi_const9
;
34470 *total
= rs6000_cost
->mulsi_const
;
34472 else if (mode
== SFmode
)
34473 *total
= rs6000_cost
->fp
;
34474 else if (FLOAT_MODE_P (mode
))
34475 *total
= rs6000_cost
->dmul
;
34476 else if (mode
== DImode
)
34477 *total
= rs6000_cost
->muldi
;
34479 *total
= rs6000_cost
->mulsi
;
34483 if (mode
== SFmode
)
34484 *total
= rs6000_cost
->fp
;
34486 *total
= rs6000_cost
->dmul
;
34491 if (FLOAT_MODE_P (mode
))
34493 *total
= mode
== DFmode
? rs6000_cost
->ddiv
34494 : rs6000_cost
->sdiv
;
34501 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
34502 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
34504 if (code
== DIV
|| code
== MOD
)
34506 *total
= COSTS_N_INSNS (2);
34509 *total
= COSTS_N_INSNS (1);
34513 if (GET_MODE (XEXP (x
, 1)) == DImode
)
34514 *total
= rs6000_cost
->divdi
;
34516 *total
= rs6000_cost
->divsi
;
34518 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34519 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
34520 *total
+= COSTS_N_INSNS (2);
34524 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
34528 *total
= COSTS_N_INSNS (4);
34532 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
34536 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
34540 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
34543 *total
= COSTS_N_INSNS (1);
34547 if (CONST_INT_P (XEXP (x
, 1)))
34549 rtx left
= XEXP (x
, 0);
34550 rtx_code left_code
= GET_CODE (left
);
34552 /* rotate-and-mask: 1 insn. */
34553 if ((left_code
== ROTATE
34554 || left_code
== ASHIFT
34555 || left_code
== LSHIFTRT
)
34556 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
34558 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
34559 if (!CONST_INT_P (XEXP (left
, 1)))
34560 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
34561 *total
+= COSTS_N_INSNS (1);
34565 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34566 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
34567 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
34568 || (val
& 0xffff) == val
34569 || (val
& 0xffff0000) == val
34570 || ((val
& 0xffff) == 0 && mode
== SImode
))
34572 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
34573 *total
+= COSTS_N_INSNS (1);
34578 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
34580 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
34581 *total
+= COSTS_N_INSNS (2);
34586 *total
= COSTS_N_INSNS (1);
34591 *total
= COSTS_N_INSNS (1);
34597 *total
= COSTS_N_INSNS (1);
34601 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34602 the sign extend and shift separately within the insn. */
34603 if (TARGET_EXTSWSLI
&& mode
== DImode
34604 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
34605 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
34616 /* Handle mul_highpart. */
34617 if (outer_code
== TRUNCATE
34618 && GET_CODE (XEXP (x
, 0)) == MULT
)
34620 if (mode
== DImode
)
34621 *total
= rs6000_cost
->muldi
;
34623 *total
= rs6000_cost
->mulsi
;
34626 else if (outer_code
== AND
)
34629 *total
= COSTS_N_INSNS (1);
34634 if (GET_CODE (XEXP (x
, 0)) == MEM
)
34637 *total
= COSTS_N_INSNS (1);
34643 if (!FLOAT_MODE_P (mode
))
34645 *total
= COSTS_N_INSNS (1);
34651 case UNSIGNED_FLOAT
:
34654 case FLOAT_TRUNCATE
:
34655 *total
= rs6000_cost
->fp
;
34659 if (mode
== DFmode
)
34660 *total
= rs6000_cost
->sfdf_convert
;
34662 *total
= rs6000_cost
->fp
;
34666 switch (XINT (x
, 1))
34669 *total
= rs6000_cost
->fp
;
34681 *total
= COSTS_N_INSNS (1);
34684 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
34686 *total
= rs6000_cost
->fp
;
34695 /* Carry bit requires mode == Pmode.
34696 NEG or PLUS already counted so only add one. */
34698 && (outer_code
== NEG
|| outer_code
== PLUS
))
34700 *total
= COSTS_N_INSNS (1);
34703 if (outer_code
== SET
)
34705 if (XEXP (x
, 1) == const0_rtx
)
34707 if (TARGET_ISEL
&& !TARGET_MFCRF
)
34708 *total
= COSTS_N_INSNS (8);
34710 *total
= COSTS_N_INSNS (2);
34715 *total
= COSTS_N_INSNS (3);
34724 if (outer_code
== SET
&& (XEXP (x
, 1) == const0_rtx
))
34726 if (TARGET_ISEL
&& !TARGET_MFCRF
)
34727 *total
= COSTS_N_INSNS (8);
34729 *total
= COSTS_N_INSNS (2);
34733 if (outer_code
== COMPARE
)
34747 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34750 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
34751 int opno
, int *total
, bool speed
)
34753 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
34756 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34757 "opno = %d, total = %d, speed = %s, x:\n",
34758 ret
? "complete" : "scan inner",
34759 GET_MODE_NAME (mode
),
34760 GET_RTX_NAME (outer_code
),
34763 speed
? "true" : "false");
34770 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34773 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
34774 addr_space_t as
, bool speed
)
34776 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
34778 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34779 ret
, speed
? "true" : "false");
34786 /* A C expression returning the cost of moving data from a register of class
34787 CLASS1 to one of CLASS2. */
34790 rs6000_register_move_cost (machine_mode mode
,
34791 reg_class_t from
, reg_class_t to
)
34795 if (TARGET_DEBUG_COST
)
34798 /* Moves from/to GENERAL_REGS. */
34799 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
34800 || reg_classes_intersect_p (from
, GENERAL_REGS
))
34802 reg_class_t rclass
= from
;
34804 if (! reg_classes_intersect_p (to
, GENERAL_REGS
))
34807 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
34808 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
34809 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
34811 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34813 else if (rclass
== CR_REGS
)
34816 /* For those processors that have slow LR/CTR moves, make them more
34817 expensive than memory in order to bias spills to memory .*/
34818 else if ((rs6000_cpu
== PROCESSOR_POWER6
34819 || rs6000_cpu
== PROCESSOR_POWER7
34820 || rs6000_cpu
== PROCESSOR_POWER8
34821 || rs6000_cpu
== PROCESSOR_POWER9
)
34822 && reg_classes_intersect_p (rclass
, LINK_OR_CTR_REGS
))
34823 ret
= 6 * hard_regno_nregs
[0][mode
];
34826 /* A move will cost one instruction per GPR moved. */
34827 ret
= 2 * hard_regno_nregs
[0][mode
];
34830 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34831 else if (VECTOR_MEM_VSX_P (mode
)
34832 && reg_classes_intersect_p (to
, VSX_REGS
)
34833 && reg_classes_intersect_p (from
, VSX_REGS
))
34834 ret
= 2 * hard_regno_nregs
[FIRST_FPR_REGNO
][mode
];
34836 /* Moving between two similar registers is just one instruction. */
34837 else if (reg_classes_intersect_p (to
, from
))
34838 ret
= (FLOAT128_2REG_P (mode
)) ? 4 : 2;
34840 /* Everything else has to go through GENERAL_REGS. */
34842 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
34843 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
34845 if (TARGET_DEBUG_COST
)
34847 if (dbg_cost_ctrl
== 1)
34849 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34850 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
34851 reg_class_names
[to
]);
34858 /* A C expressions returning the cost of moving data of MODE from a register to
34862 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
34863 bool in ATTRIBUTE_UNUSED
)
34867 if (TARGET_DEBUG_COST
)
34870 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
34871 ret
= 4 * hard_regno_nregs
[0][mode
];
34872 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
34873 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
34874 ret
= 4 * hard_regno_nregs
[32][mode
];
34875 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
34876 ret
= 4 * hard_regno_nregs
[FIRST_ALTIVEC_REGNO
][mode
];
34878 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
34880 if (TARGET_DEBUG_COST
)
34882 if (dbg_cost_ctrl
== 1)
34884 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34885 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
34892 /* Returns a code for a target-specific builtin that implements
34893 reciprocal of the function, or NULL_TREE if not available. */
34896 rs6000_builtin_reciprocal (tree fndecl
)
34898 switch (DECL_FUNCTION_CODE (fndecl
))
34900 case VSX_BUILTIN_XVSQRTDP
:
34901 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode
))
34904 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_2DF
];
34906 case VSX_BUILTIN_XVSQRTSP
:
34907 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode
))
34910 return rs6000_builtin_decls
[VSX_BUILTIN_RSQRT_4SF
];
34917 /* Load up a constant. If the mode is a vector mode, splat the value across
34918 all of the vector elements. */
34921 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
34925 if (mode
== SFmode
|| mode
== DFmode
)
34927 rtx d
= const_double_from_real_value (dconst
, mode
);
34928 reg
= force_reg (mode
, d
);
34930 else if (mode
== V4SFmode
)
34932 rtx d
= const_double_from_real_value (dconst
, SFmode
);
34933 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
34934 reg
= gen_reg_rtx (mode
);
34935 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
34937 else if (mode
== V2DFmode
)
34939 rtx d
= const_double_from_real_value (dconst
, DFmode
);
34940 rtvec v
= gen_rtvec (2, d
, d
);
34941 reg
= gen_reg_rtx (mode
);
34942 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
34945 gcc_unreachable ();
34950 /* Generate an FMA instruction. */
34953 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
34955 machine_mode mode
= GET_MODE (target
);
34958 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
34959 gcc_assert (dst
!= NULL
);
34962 emit_move_insn (target
, dst
);
34965 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34968 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
34970 machine_mode mode
= GET_MODE (dst
);
34973 /* This is a tad more complicated, since the fnma_optab is for
34974 a different expression: fma(-m1, m2, a), which is the same
34975 thing except in the case of signed zeros.
34977 Fortunately we know that if FMA is supported that FNMSUB is
34978 also supported in the ISA. Just expand it directly. */
34980 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
34982 r
= gen_rtx_NEG (mode
, a
);
34983 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
34984 r
= gen_rtx_NEG (mode
, r
);
34985 emit_insn (gen_rtx_SET (dst
, r
));
34988 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34989 add a reg_note saying that this was a division. Support both scalar and
34990 vector divide. Assumes no trapping math and finite arguments. */
34993 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
34995 machine_mode mode
= GET_MODE (dst
);
34996 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
34999 /* Low precision estimates guarantee 5 bits of accuracy. High
35000 precision estimates guarantee 14 bits of accuracy. SFmode
35001 requires 23 bits of accuracy. DFmode requires 52 bits of
35002 accuracy. Each pass at least doubles the accuracy, leading
35003 to the following. */
35004 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
35005 if (mode
== DFmode
|| mode
== V2DFmode
)
35008 enum insn_code code
= optab_handler (smul_optab
, mode
);
35009 insn_gen_fn gen_mul
= GEN_FCN (code
);
35011 gcc_assert (code
!= CODE_FOR_nothing
);
35013 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
35015 /* x0 = 1./d estimate */
35016 x0
= gen_reg_rtx (mode
);
35017 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
35020 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35023 /* e0 = 1. - d * x0 */
35024 e0
= gen_reg_rtx (mode
);
35025 rs6000_emit_nmsub (e0
, d
, x0
, one
);
35027 /* x1 = x0 + e0 * x0 */
35028 x1
= gen_reg_rtx (mode
);
35029 rs6000_emit_madd (x1
, e0
, x0
, x0
);
35031 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
35032 ++i
, xprev
= xnext
, eprev
= enext
) {
35034 /* enext = eprev * eprev */
35035 enext
= gen_reg_rtx (mode
);
35036 emit_insn (gen_mul (enext
, eprev
, eprev
));
35038 /* xnext = xprev + enext * xprev */
35039 xnext
= gen_reg_rtx (mode
);
35040 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
35046 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35048 /* u = n * xprev */
35049 u
= gen_reg_rtx (mode
);
35050 emit_insn (gen_mul (u
, n
, xprev
));
35052 /* v = n - (d * u) */
35053 v
= gen_reg_rtx (mode
);
35054 rs6000_emit_nmsub (v
, d
, u
, n
);
35056 /* dst = (v * xprev) + u */
35057 rs6000_emit_madd (dst
, v
, xprev
, u
);
35060 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
35063 /* Goldschmidt's Algorithm for single/double-precision floating point
35064 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35067 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
35069 machine_mode mode
= GET_MODE (src
);
35070 rtx e
= gen_reg_rtx (mode
);
35071 rtx g
= gen_reg_rtx (mode
);
35072 rtx h
= gen_reg_rtx (mode
);
35074 /* Low precision estimates guarantee 5 bits of accuracy. High
35075 precision estimates guarantee 14 bits of accuracy. SFmode
35076 requires 23 bits of accuracy. DFmode requires 52 bits of
35077 accuracy. Each pass at least doubles the accuracy, leading
35078 to the following. */
35079 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
35080 if (mode
== DFmode
|| mode
== V2DFmode
)
35085 enum insn_code code
= optab_handler (smul_optab
, mode
);
35086 insn_gen_fn gen_mul
= GEN_FCN (code
);
35088 gcc_assert (code
!= CODE_FOR_nothing
);
35090 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
35092 /* e = rsqrt estimate */
35093 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
35096 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35099 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
35101 if (mode
== SFmode
)
35103 rtx target
= emit_conditional_move (e
, GT
, src
, zero
, mode
,
35106 emit_move_insn (e
, target
);
35110 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
35111 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
35115 /* g = sqrt estimate. */
35116 emit_insn (gen_mul (g
, e
, src
));
35117 /* h = 1/(2*sqrt) estimate. */
35118 emit_insn (gen_mul (h
, e
, mhalf
));
35124 rtx t
= gen_reg_rtx (mode
);
35125 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
35126 /* Apply correction directly to 1/rsqrt estimate. */
35127 rs6000_emit_madd (dst
, e
, t
, e
);
35131 for (i
= 0; i
< passes
; i
++)
35133 rtx t1
= gen_reg_rtx (mode
);
35134 rtx g1
= gen_reg_rtx (mode
);
35135 rtx h1
= gen_reg_rtx (mode
);
35137 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
35138 rs6000_emit_madd (g1
, g
, t1
, g
);
35139 rs6000_emit_madd (h1
, h
, t1
, h
);
35144 /* Multiply by 2 for 1/rsqrt. */
35145 emit_insn (gen_add3_insn (dst
, h
, h
));
35150 rtx t
= gen_reg_rtx (mode
);
35151 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
35152 rs6000_emit_madd (dst
, g
, t
, g
);
35158 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35159 (Power7) targets. DST is the target, and SRC is the argument operand. */
35162 rs6000_emit_popcount (rtx dst
, rtx src
)
35164 machine_mode mode
= GET_MODE (dst
);
35167 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35168 if (TARGET_POPCNTD
)
35170 if (mode
== SImode
)
35171 emit_insn (gen_popcntdsi2 (dst
, src
));
35173 emit_insn (gen_popcntddi2 (dst
, src
));
35177 tmp1
= gen_reg_rtx (mode
);
35179 if (mode
== SImode
)
35181 emit_insn (gen_popcntbsi2 (tmp1
, src
));
35182 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
35184 tmp2
= force_reg (SImode
, tmp2
);
35185 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
35189 emit_insn (gen_popcntbdi2 (tmp1
, src
));
35190 tmp2
= expand_mult (DImode
, tmp1
,
35191 GEN_INT ((HOST_WIDE_INT
)
35192 0x01010101 << 32 | 0x01010101),
35194 tmp2
= force_reg (DImode
, tmp2
);
35195 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
35200 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35201 target, and SRC is the argument operand. */
35204 rs6000_emit_parity (rtx dst
, rtx src
)
35206 machine_mode mode
= GET_MODE (dst
);
35209 tmp
= gen_reg_rtx (mode
);
35211 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35214 if (mode
== SImode
)
35216 emit_insn (gen_popcntbsi2 (tmp
, src
));
35217 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
35221 emit_insn (gen_popcntbdi2 (tmp
, src
));
35222 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
35227 if (mode
== SImode
)
35229 /* Is mult+shift >= shift+xor+shift+xor? */
35230 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
35232 rtx tmp1
, tmp2
, tmp3
, tmp4
;
35234 tmp1
= gen_reg_rtx (SImode
);
35235 emit_insn (gen_popcntbsi2 (tmp1
, src
));
35237 tmp2
= gen_reg_rtx (SImode
);
35238 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
35239 tmp3
= gen_reg_rtx (SImode
);
35240 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
35242 tmp4
= gen_reg_rtx (SImode
);
35243 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
35244 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
35247 rs6000_emit_popcount (tmp
, src
);
35248 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
35252 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35253 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
35255 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
35257 tmp1
= gen_reg_rtx (DImode
);
35258 emit_insn (gen_popcntbdi2 (tmp1
, src
));
35260 tmp2
= gen_reg_rtx (DImode
);
35261 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
35262 tmp3
= gen_reg_rtx (DImode
);
35263 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
35265 tmp4
= gen_reg_rtx (DImode
);
35266 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
35267 tmp5
= gen_reg_rtx (DImode
);
35268 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
35270 tmp6
= gen_reg_rtx (DImode
);
35271 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
35272 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
35275 rs6000_emit_popcount (tmp
, src
);
35276 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
35280 /* Expand an Altivec constant permutation for little endian mode.
35281 There are two issues: First, the two input operands must be
35282 swapped so that together they form a double-wide array in LE
35283 order. Second, the vperm instruction has surprising behavior
35284 in LE mode: it interprets the elements of the source vectors
35285 in BE mode ("left to right") and interprets the elements of
35286 the destination vector in LE mode ("right to left"). To
35287 correct for this, we must subtract each element of the permute
35288 control vector from 31.
35290 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35291 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35292 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35293 serve as the permute control vector. Then, in BE mode,
35297 places the desired result in vr9. However, in LE mode the
35298 vector contents will be
35300 vr10 = 00000003 00000002 00000001 00000000
35301 vr11 = 00000007 00000006 00000005 00000004
35303 The result of the vperm using the same permute control vector is
35305 vr9 = 05000000 07000000 01000000 03000000
35307 That is, the leftmost 4 bytes of vr10 are interpreted as the
35308 source for the rightmost 4 bytes of vr9, and so on.
35310 If we change the permute control vector to
35312 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35320 vr9 = 00000006 00000004 00000002 00000000. */
35323 altivec_expand_vec_perm_const_le (rtx operands
[4])
35327 rtx constv
, unspec
;
35328 rtx target
= operands
[0];
35329 rtx op0
= operands
[1];
35330 rtx op1
= operands
[2];
35331 rtx sel
= operands
[3];
35333 /* Unpack and adjust the constant selector. */
35334 for (i
= 0; i
< 16; ++i
)
35336 rtx e
= XVECEXP (sel
, 0, i
);
35337 unsigned int elt
= 31 - (INTVAL (e
) & 31);
35338 perm
[i
] = GEN_INT (elt
);
35341 /* Expand to a permute, swapping the inputs and using the
35342 adjusted selector. */
35344 op0
= force_reg (V16QImode
, op0
);
35346 op1
= force_reg (V16QImode
, op1
);
35348 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
35349 constv
= force_reg (V16QImode
, constv
);
35350 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
35352 if (!REG_P (target
))
35354 rtx tmp
= gen_reg_rtx (V16QImode
);
35355 emit_move_insn (tmp
, unspec
);
35359 emit_move_insn (target
, unspec
);
35362 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35363 permute control vector. But here it's not a constant, so we must
35364 generate a vector NAND or NOR to do the adjustment. */
35367 altivec_expand_vec_perm_le (rtx operands
[4])
35369 rtx notx
, iorx
, unspec
;
35370 rtx target
= operands
[0];
35371 rtx op0
= operands
[1];
35372 rtx op1
= operands
[2];
35373 rtx sel
= operands
[3];
35375 rtx norreg
= gen_reg_rtx (V16QImode
);
35376 machine_mode mode
= GET_MODE (target
);
35378 /* Get everything in regs so the pattern matches. */
35380 op0
= force_reg (mode
, op0
);
35382 op1
= force_reg (mode
, op1
);
35384 sel
= force_reg (V16QImode
, sel
);
35385 if (!REG_P (target
))
35386 tmp
= gen_reg_rtx (mode
);
35388 if (TARGET_P9_VECTOR
)
35390 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op0
, op1
, sel
),
35395 /* Invert the selector with a VNAND if available, else a VNOR.
35396 The VNAND is preferred for future fusion opportunities. */
35397 notx
= gen_rtx_NOT (V16QImode
, sel
);
35398 iorx
= (TARGET_P8_VECTOR
35399 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
35400 : gen_rtx_AND (V16QImode
, notx
, notx
));
35401 emit_insn (gen_rtx_SET (norreg
, iorx
));
35403 /* Permute with operands reversed and adjusted selector. */
35404 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
35408 /* Copy into target, possibly by way of a register. */
35409 if (!REG_P (target
))
35411 emit_move_insn (tmp
, unspec
);
35415 emit_move_insn (target
, unspec
);
35418 /* Expand an Altivec constant permutation. Return true if we match
35419 an efficient implementation; false to fall back to VPERM. */
35422 altivec_expand_vec_perm_const (rtx operands
[4])
35424 struct altivec_perm_insn
{
35425 HOST_WIDE_INT mask
;
35426 enum insn_code impl
;
35427 unsigned char perm
[16];
35429 static const struct altivec_perm_insn patterns
[] = {
35430 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuhum_direct
,
35431 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35432 { OPTION_MASK_ALTIVEC
, CODE_FOR_altivec_vpkuwum_direct
,
35433 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35434 { OPTION_MASK_ALTIVEC
,
35435 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
35436 : CODE_FOR_altivec_vmrglb_direct
),
35437 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35438 { OPTION_MASK_ALTIVEC
,
35439 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
35440 : CODE_FOR_altivec_vmrglh_direct
),
35441 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35442 { OPTION_MASK_ALTIVEC
,
35443 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct
35444 : CODE_FOR_altivec_vmrglw_direct
),
35445 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35446 { OPTION_MASK_ALTIVEC
,
35447 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
35448 : CODE_FOR_altivec_vmrghb_direct
),
35449 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35450 { OPTION_MASK_ALTIVEC
,
35451 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
35452 : CODE_FOR_altivec_vmrghh_direct
),
35453 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35454 { OPTION_MASK_ALTIVEC
,
35455 (BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct
35456 : CODE_FOR_altivec_vmrghw_direct
),
35457 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35458 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgew_v4si
,
35459 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35460 { OPTION_MASK_P8_VECTOR
, CODE_FOR_p8_vmrgow
,
35461 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35464 unsigned int i
, j
, elt
, which
;
35465 unsigned char perm
[16];
35466 rtx target
, op0
, op1
, sel
, x
;
35469 target
= operands
[0];
35474 /* Unpack the constant selector. */
35475 for (i
= which
= 0; i
< 16; ++i
)
35477 rtx e
= XVECEXP (sel
, 0, i
);
35478 elt
= INTVAL (e
) & 31;
35479 which
|= (elt
< 16 ? 1 : 2);
35483 /* Simplify the constant selector based on operands. */
35487 gcc_unreachable ();
35491 if (!rtx_equal_p (op0
, op1
))
35496 for (i
= 0; i
< 16; ++i
)
35508 /* Look for splat patterns. */
35513 for (i
= 0; i
< 16; ++i
)
35514 if (perm
[i
] != elt
)
35518 if (!BYTES_BIG_ENDIAN
)
35520 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
35526 for (i
= 0; i
< 16; i
+= 2)
35527 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
35531 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
35532 x
= gen_reg_rtx (V8HImode
);
35533 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
35535 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
35542 for (i
= 0; i
< 16; i
+= 4)
35544 || perm
[i
+ 1] != elt
+ 1
35545 || perm
[i
+ 2] != elt
+ 2
35546 || perm
[i
+ 3] != elt
+ 3)
35550 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
35551 x
= gen_reg_rtx (V4SImode
);
35552 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
35554 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
35560 /* Look for merge and pack patterns. */
35561 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
35565 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
35568 elt
= patterns
[j
].perm
[0];
35569 if (perm
[0] == elt
)
35571 else if (perm
[0] == elt
+ 16)
35575 for (i
= 1; i
< 16; ++i
)
35577 elt
= patterns
[j
].perm
[i
];
35579 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
35580 else if (one_vec
&& elt
>= 16)
35582 if (perm
[i
] != elt
)
35587 enum insn_code icode
= patterns
[j
].impl
;
35588 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
35589 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
35591 /* For little-endian, don't use vpkuwum and vpkuhum if the
35592 underlying vector type is not V4SI and V8HI, respectively.
35593 For example, using vpkuwum with a V8HI picks up the even
35594 halfwords (BE numbering) when the even halfwords (LE
35595 numbering) are what we need. */
35596 if (!BYTES_BIG_ENDIAN
35597 && icode
== CODE_FOR_altivec_vpkuwum_direct
35598 && ((GET_CODE (op0
) == REG
35599 && GET_MODE (op0
) != V4SImode
)
35600 || (GET_CODE (op0
) == SUBREG
35601 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
35603 if (!BYTES_BIG_ENDIAN
35604 && icode
== CODE_FOR_altivec_vpkuhum_direct
35605 && ((GET_CODE (op0
) == REG
35606 && GET_MODE (op0
) != V8HImode
)
35607 || (GET_CODE (op0
) == SUBREG
35608 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
35611 /* For little-endian, the two input operands must be swapped
35612 (or swapped back) to ensure proper right-to-left numbering
35614 if (swapped
^ !BYTES_BIG_ENDIAN
)
35615 std::swap (op0
, op1
);
35616 if (imode
!= V16QImode
)
35618 op0
= gen_lowpart (imode
, op0
);
35619 op1
= gen_lowpart (imode
, op1
);
35621 if (omode
== V16QImode
)
35624 x
= gen_reg_rtx (omode
);
35625 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
35626 if (omode
!= V16QImode
)
35627 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
35632 if (!BYTES_BIG_ENDIAN
)
35634 altivec_expand_vec_perm_const_le (operands
);
35641 /* Expand a Paired Single or VSX Permute Doubleword constant permutation.
35642 Return true if we match an efficient implementation. */
35645 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
35646 unsigned char perm0
, unsigned char perm1
)
35650 /* If both selectors come from the same operand, fold to single op. */
35651 if ((perm0
& 2) == (perm1
& 2))
35658 /* If both operands are equal, fold to simpler permutation. */
35659 if (rtx_equal_p (op0
, op1
))
35662 perm1
= (perm1
& 1) + 2;
35664 /* If the first selector comes from the second operand, swap. */
35665 else if (perm0
& 2)
35671 std::swap (op0
, op1
);
35673 /* If the second selector does not come from the second operand, fail. */
35674 else if ((perm1
& 2) == 0)
35678 if (target
!= NULL
)
35680 machine_mode vmode
, dmode
;
35683 vmode
= GET_MODE (target
);
35684 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
35685 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4);
35686 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
35687 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
35688 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
35689 emit_insn (gen_rtx_SET (target
, x
));
35695 rs6000_expand_vec_perm_const (rtx operands
[4])
35697 rtx target
, op0
, op1
, sel
;
35698 unsigned char perm0
, perm1
;
35700 target
= operands
[0];
35705 /* Unpack the constant selector. */
35706 perm0
= INTVAL (XVECEXP (sel
, 0, 0)) & 3;
35707 perm1
= INTVAL (XVECEXP (sel
, 0, 1)) & 3;
35709 return rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, perm0
, perm1
);
35712 /* Test whether a constant permutation is supported. */
35715 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode
,
35716 const unsigned char *sel
)
35718 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35719 if (TARGET_ALTIVEC
)
35722 /* Check for ps_merge* or evmerge* insns. */
35723 if (TARGET_PAIRED_FLOAT
&& vmode
== V2SFmode
)
35725 rtx op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
35726 rtx op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
35727 return rs6000_expand_vec_perm_const_1 (NULL
, op0
, op1
, sel
[0], sel
[1]);
35733 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
35736 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
35737 machine_mode vmode
, unsigned nelt
, rtx perm
[])
35739 machine_mode imode
;
35743 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
)
35745 imode
= mode_for_size (GET_MODE_UNIT_BITSIZE (vmode
), MODE_INT
, 0);
35746 imode
= mode_for_vector (imode
, nelt
);
35749 x
= gen_rtx_CONST_VECTOR (imode
, gen_rtvec_v (nelt
, perm
));
35750 x
= expand_vec_perm (vmode
, op0
, op1
, x
, target
);
35752 emit_move_insn (target
, x
);
35755 /* Expand an extract even operation. */
35758 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
35760 machine_mode vmode
= GET_MODE (target
);
35761 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
35764 for (i
= 0; i
< nelt
; i
++)
35765 perm
[i
] = GEN_INT (i
* 2);
35767 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
35770 /* Expand a vector interleave operation. */
35773 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
35775 machine_mode vmode
= GET_MODE (target
);
35776 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
35779 high
= (highp
? 0 : nelt
/ 2);
35780 for (i
= 0; i
< nelt
/ 2; i
++)
35782 perm
[i
* 2] = GEN_INT (i
+ high
);
35783 perm
[i
* 2 + 1] = GEN_INT (i
+ nelt
+ high
);
35786 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, nelt
, perm
);
35789 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35791 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
35793 HOST_WIDE_INT
hwi_scale (scale
);
35794 REAL_VALUE_TYPE r_pow
;
35795 rtvec v
= rtvec_alloc (2);
35797 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
35798 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
35799 elt
= const_double_from_real_value (r_pow
, DFmode
);
35800 RTVEC_ELT (v
, 0) = elt
;
35801 RTVEC_ELT (v
, 1) = elt
;
35802 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
35803 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
35806 /* Return an RTX representing where to find the function value of a
35807 function returning MODE. */
35809 rs6000_complex_function_value (machine_mode mode
)
35811 unsigned int regno
;
35813 machine_mode inner
= GET_MODE_INNER (mode
);
35814 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
35816 if (TARGET_FLOAT128_TYPE
35818 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
35819 regno
= ALTIVEC_ARG_RETURN
;
35821 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
35822 regno
= FP_ARG_RETURN
;
35826 regno
= GP_ARG_RETURN
;
35828 /* 32-bit is OK since it'll go in r3/r4. */
35829 if (TARGET_32BIT
&& inner_bytes
>= 4)
35830 return gen_rtx_REG (mode
, regno
);
35833 if (inner_bytes
>= 8)
35834 return gen_rtx_REG (mode
, regno
);
35836 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
35838 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
35839 GEN_INT (inner_bytes
));
35840 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
35843 /* Return an rtx describing a return value of MODE as a PARALLEL
35844 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35845 stride REG_STRIDE. */
35848 rs6000_parallel_return (machine_mode mode
,
35849 int n_elts
, machine_mode elt_mode
,
35850 unsigned int regno
, unsigned int reg_stride
)
35852 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
35855 for (i
= 0; i
< n_elts
; i
++)
35857 rtx r
= gen_rtx_REG (elt_mode
, regno
);
35858 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
35859 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
35860 regno
+= reg_stride
;
35866 /* Target hook for TARGET_FUNCTION_VALUE.
35868 An integer value is in r3 and a floating-point value is in fp1,
35869 unless -msoft-float. */
35872 rs6000_function_value (const_tree valtype
,
35873 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
35874 bool outgoing ATTRIBUTE_UNUSED
)
35877 unsigned int regno
;
35878 machine_mode elt_mode
;
35881 /* Special handling for structs in darwin64. */
35883 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
35885 CUMULATIVE_ARGS valcum
;
35889 valcum
.fregno
= FP_ARG_MIN_REG
;
35890 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
35891 /* Do a trial code generation as if this were going to be passed as
35892 an argument; if any part goes in memory, we return NULL. */
35893 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
35896 /* Otherwise fall through to standard ABI rules. */
35899 mode
= TYPE_MODE (valtype
);
35901 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35902 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
35904 int first_reg
, n_regs
;
35906 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
35908 /* _Decimal128 must use even/odd register pairs. */
35909 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
35910 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
35914 first_reg
= ALTIVEC_ARG_RETURN
;
35918 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
35921 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35922 if (TARGET_32BIT
&& TARGET_POWERPC64
)
35931 int count
= GET_MODE_SIZE (mode
) / 4;
35932 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
35935 if ((INTEGRAL_TYPE_P (valtype
)
35936 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
35937 || POINTER_TYPE_P (valtype
))
35938 mode
= TARGET_32BIT
? SImode
: DImode
;
35940 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
35941 /* _Decimal128 must use an even/odd register pair. */
35942 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
35943 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
35944 && !FLOAT128_VECTOR_P (mode
)
35945 && ((TARGET_SINGLE_FLOAT
&& (mode
== SFmode
)) || TARGET_DOUBLE_FLOAT
))
35946 regno
= FP_ARG_RETURN
;
35947 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
35948 && targetm
.calls
.split_complex_arg
)
35949 return rs6000_complex_function_value (mode
);
35950 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35951 return register is used in both cases, and we won't see V2DImode/V2DFmode
35952 for pure altivec, combine the two cases. */
35953 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| FLOAT128_VECTOR_P (mode
))
35954 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
35955 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
35956 regno
= ALTIVEC_ARG_RETURN
;
35958 regno
= GP_ARG_RETURN
;
35960 return gen_rtx_REG (mode
, regno
);
35963 /* Define how to find the value returned by a library function
35964 assuming the value has mode MODE. */
35966 rs6000_libcall_value (machine_mode mode
)
35968 unsigned int regno
;
35970 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35971 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
35972 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
35974 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
35975 /* _Decimal128 must use an even/odd register pair. */
35976 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
35977 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
)
35978 && TARGET_HARD_FLOAT
35979 && ((TARGET_SINGLE_FLOAT
&& mode
== SFmode
) || TARGET_DOUBLE_FLOAT
))
35980 regno
= FP_ARG_RETURN
;
35981 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35982 return register is used in both cases, and we won't see V2DImode/V2DFmode
35983 for pure altivec, combine the two cases. */
35984 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
35985 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
35986 regno
= ALTIVEC_ARG_RETURN
;
35987 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
35988 return rs6000_complex_function_value (mode
);
35990 regno
= GP_ARG_RETURN
;
35992 return gen_rtx_REG (mode
, regno
);
35996 /* Return true if we use LRA instead of reload pass. */
35998 rs6000_lra_p (void)
36003 /* Compute register pressure classes. We implement the target hook to avoid
36004 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
36005 lead to incorrect estimates of number of available registers and therefor
36006 increased register pressure/spill. */
36008 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
36013 pressure_classes
[n
++] = GENERAL_REGS
;
36015 pressure_classes
[n
++] = VSX_REGS
;
36018 if (TARGET_ALTIVEC
)
36019 pressure_classes
[n
++] = ALTIVEC_REGS
;
36020 if (TARGET_HARD_FLOAT
)
36021 pressure_classes
[n
++] = FLOAT_REGS
;
36023 pressure_classes
[n
++] = CR_REGS
;
36024 pressure_classes
[n
++] = SPECIAL_REGS
;
36029 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36030 Frame pointer elimination is automatically handled.
36032 For the RS/6000, if frame pointer elimination is being done, we would like
36033 to convert ap into fp, not sp.
36035 We need r30 if -mminimal-toc was specified, and there are constant pool
36039 rs6000_can_eliminate (const int from
, const int to
)
36041 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
36042 ? ! frame_pointer_needed
36043 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
36044 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC
36045 || constant_pool_empty_p ()
36049 /* Define the offset between two registers, FROM to be eliminated and its
36050 replacement TO, at the start of a routine. */
36052 rs6000_initial_elimination_offset (int from
, int to
)
36054 rs6000_stack_t
*info
= rs6000_stack_info ();
36055 HOST_WIDE_INT offset
;
36057 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
36058 offset
= info
->push_p
? 0 : -info
->total_size
;
36059 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
36061 offset
= info
->push_p
? 0 : -info
->total_size
;
36062 if (FRAME_GROWS_DOWNWARD
)
36063 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
36065 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
36066 offset
= FRAME_GROWS_DOWNWARD
36067 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
36069 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
36070 offset
= info
->total_size
;
36071 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
36072 offset
= info
->push_p
? info
->total_size
: 0;
36073 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
36076 gcc_unreachable ();
36081 /* Fill in sizes of registers used by unwinder. */
36084 rs6000_init_dwarf_reg_sizes_extra (tree address
)
36086 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
36089 machine_mode mode
= TYPE_MODE (char_type_node
);
36090 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
36091 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
36092 rtx value
= gen_int_mode (16, mode
);
36094 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36095 The unwinder still needs to know the size of Altivec registers. */
36097 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
36099 int column
= DWARF_REG_TO_UNWIND_COLUMN
36100 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
36101 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
36103 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
36108 /* Map internal gcc register numbers to debug format register numbers.
36109 FORMAT specifies the type of debug register number to use:
36110 0 -- debug information, except for frame-related sections
36111 1 -- DWARF .debug_frame section
36112 2 -- DWARF .eh_frame section */
36115 rs6000_dbx_register_number (unsigned int regno
, unsigned int format
)
36117 /* Except for the above, we use the internal number for non-DWARF
36118 debug information, and also for .eh_frame. */
36119 if ((format
== 0 && write_symbols
!= DWARF2_DEBUG
) || format
== 2)
36122 /* On some platforms, we use the standard DWARF register
36123 numbering for .debug_info and .debug_frame. */
36124 #ifdef RS6000_USE_DWARF_NUMBERING
36127 if (regno
== LR_REGNO
)
36129 if (regno
== CTR_REGNO
)
36131 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36132 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36133 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36134 to the DWARF reg for CR. */
36135 if (format
== 1 && regno
== CR2_REGNO
)
36137 if (CR_REGNO_P (regno
))
36138 return regno
- CR0_REGNO
+ 86;
36139 if (regno
== CA_REGNO
)
36140 return 101; /* XER */
36141 if (ALTIVEC_REGNO_P (regno
))
36142 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
36143 if (regno
== VRSAVE_REGNO
)
36145 if (regno
== VSCR_REGNO
)
36151 /* target hook eh_return_filter_mode */
36152 static machine_mode
36153 rs6000_eh_return_filter_mode (void)
36155 return TARGET_32BIT
? SImode
: word_mode
;
36158 /* Target hook for scalar_mode_supported_p. */
36160 rs6000_scalar_mode_supported_p (machine_mode mode
)
36162 /* -m32 does not support TImode. This is the default, from
36163 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36164 same ABI as for -m32. But default_scalar_mode_supported_p allows
36165 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36166 for -mpowerpc64. */
36167 if (TARGET_32BIT
&& mode
== TImode
)
36170 if (DECIMAL_FLOAT_MODE_P (mode
))
36171 return default_decimal_float_supported_p ();
36172 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
36175 return default_scalar_mode_supported_p (mode
);
36178 /* Target hook for vector_mode_supported_p. */
36180 rs6000_vector_mode_supported_p (machine_mode mode
)
36183 if (TARGET_PAIRED_FLOAT
&& PAIRED_VECTOR_MODE (mode
))
36186 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36187 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36189 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
36196 /* Target hook for floatn_mode. */
36197 static machine_mode
36198 rs6000_floatn_mode (int n
, bool extended
)
36208 if (TARGET_FLOAT128_KEYWORD
)
36209 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
36217 /* Those are the only valid _FloatNx types. */
36218 gcc_unreachable ();
36232 if (TARGET_FLOAT128_KEYWORD
)
36233 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
36244 /* Target hook for c_mode_for_suffix. */
36245 static machine_mode
36246 rs6000_c_mode_for_suffix (char suffix
)
36248 if (TARGET_FLOAT128_TYPE
)
36250 if (suffix
== 'q' || suffix
== 'Q')
36251 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
36253 /* At the moment, we are not defining a suffix for IBM extended double.
36254 If/when the default for -mabi=ieeelongdouble is changed, and we want
36255 to support __ibm128 constants in legacy library code, we may need to
36256 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36257 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36258 __float80 constants. */
36264 /* Target hook for invalid_arg_for_unprototyped_fn. */
36265 static const char *
36266 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
36268 return (!rs6000_darwin64_abi
36270 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
36271 && (funcdecl
== NULL_TREE
36272 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
36273 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
36274 ? N_("AltiVec argument passed to unprototyped function")
36278 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36279 setup by using __stack_chk_fail_local hidden function instead of
36280 calling __stack_chk_fail directly. Otherwise it is better to call
36281 __stack_chk_fail directly. */
36283 static tree ATTRIBUTE_UNUSED
36284 rs6000_stack_protect_fail (void)
36286 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
36287 ? default_hidden_stack_protect_fail ()
36288 : default_external_stack_protect_fail ();
36291 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36294 static unsigned HOST_WIDE_INT
36295 rs6000_asan_shadow_offset (void)
36297 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
36301 /* Mask options that we want to support inside of attribute((target)) and
36302 #pragma GCC target operations. Note, we do not include things like
36303 64/32-bit, endianness, hard/soft floating point, etc. that would have
36304 different calling sequences. */
36306 struct rs6000_opt_mask
{
36307 const char *name
; /* option name */
36308 HOST_WIDE_INT mask
; /* mask to set */
36309 bool invert
; /* invert sense of mask */
36310 bool valid_target
; /* option is a target option */
36313 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
36315 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
36316 { "cmpb", OPTION_MASK_CMPB
, false, true },
36317 { "crypto", OPTION_MASK_CRYPTO
, false, true },
36318 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
36319 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
36320 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
36322 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, false },
36323 { "float128-type", OPTION_MASK_FLOAT128_TYPE
, false, false },
36324 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, false },
36325 { "fprnd", OPTION_MASK_FPRND
, false, true },
36326 { "hard-dfp", OPTION_MASK_DFP
, false, true },
36327 { "htm", OPTION_MASK_HTM
, false, true },
36328 { "isel", OPTION_MASK_ISEL
, false, true },
36329 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
36330 { "mfpgpr", OPTION_MASK_MFPGPR
, false, true },
36331 { "modulo", OPTION_MASK_MODULO
, false, true },
36332 { "mulhw", OPTION_MASK_MULHW
, false, true },
36333 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
36334 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
36335 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
36336 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
36337 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
36338 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
36339 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR
, false, true },
36340 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR
, false, true },
36341 { "power9-fusion", OPTION_MASK_P9_FUSION
, false, true },
36342 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
36343 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
36344 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
36345 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
36346 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
36347 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
36348 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
36349 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
36350 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
36351 { "string", OPTION_MASK_STRING
, false, true },
36352 { "toc-fusion", OPTION_MASK_TOC_FUSION
, false, true },
36353 { "update", OPTION_MASK_NO_UPDATE
, true , true },
36354 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI
, false, true },
36355 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF
, false, true },
36356 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF
, false, true },
36357 { "vsx", OPTION_MASK_VSX
, false, true },
36358 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER
, false, true },
36359 { "vsx-timode", OPTION_MASK_VSX_TIMODE
, false, true },
36360 #ifdef OPTION_MASK_64BIT
36362 { "aix64", OPTION_MASK_64BIT
, false, false },
36363 { "aix32", OPTION_MASK_64BIT
, true, false },
36365 { "64", OPTION_MASK_64BIT
, false, false },
36366 { "32", OPTION_MASK_64BIT
, true, false },
36369 #ifdef OPTION_MASK_EABI
36370 { "eabi", OPTION_MASK_EABI
, false, false },
36372 #ifdef OPTION_MASK_LITTLE_ENDIAN
36373 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
36374 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
36376 #ifdef OPTION_MASK_RELOCATABLE
36377 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
36379 #ifdef OPTION_MASK_STRICT_ALIGN
36380 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
36382 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
36383 { "string", OPTION_MASK_STRING
, false, false },
36386 /* Builtin mask mapping for printing the flags. */
36387 static struct rs6000_opt_mask
const rs6000_builtin_mask_names
[] =
36389 { "altivec", RS6000_BTM_ALTIVEC
, false, false },
36390 { "vsx", RS6000_BTM_VSX
, false, false },
36391 { "paired", RS6000_BTM_PAIRED
, false, false },
36392 { "fre", RS6000_BTM_FRE
, false, false },
36393 { "fres", RS6000_BTM_FRES
, false, false },
36394 { "frsqrte", RS6000_BTM_FRSQRTE
, false, false },
36395 { "frsqrtes", RS6000_BTM_FRSQRTES
, false, false },
36396 { "popcntd", RS6000_BTM_POPCNTD
, false, false },
36397 { "cell", RS6000_BTM_CELL
, false, false },
36398 { "power8-vector", RS6000_BTM_P8_VECTOR
, false, false },
36399 { "power9-vector", RS6000_BTM_P9_VECTOR
, false, false },
36400 { "power9-misc", RS6000_BTM_P9_MISC
, false, false },
36401 { "crypto", RS6000_BTM_CRYPTO
, false, false },
36402 { "htm", RS6000_BTM_HTM
, false, false },
36403 { "hard-dfp", RS6000_BTM_DFP
, false, false },
36404 { "hard-float", RS6000_BTM_HARD_FLOAT
, false, false },
36405 { "long-double-128", RS6000_BTM_LDBL128
, false, false },
36406 { "float128", RS6000_BTM_FLOAT128
, false, false },
36409 /* Option variables that we want to support inside attribute((target)) and
36410 #pragma GCC target operations. */
36412 struct rs6000_opt_var
{
36413 const char *name
; /* option name */
36414 size_t global_offset
; /* offset of the option in global_options. */
36415 size_t target_offset
; /* offset of the option in target options. */
36418 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
36421 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
36422 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
36423 { "avoid-indexed-addresses",
36424 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
36425 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
36427 offsetof (struct gcc_options
, x_rs6000_paired_float
),
36428 offsetof (struct cl_target_option
, x_rs6000_paired_float
), },
36430 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
36431 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
36432 { "optimize-swaps",
36433 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
36434 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
36435 { "allow-movmisalign",
36436 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
36437 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
36438 { "allow-df-permute",
36439 offsetof (struct gcc_options
, x_TARGET_ALLOW_DF_PERMUTE
),
36440 offsetof (struct cl_target_option
, x_TARGET_ALLOW_DF_PERMUTE
), },
36442 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
36443 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
36445 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
36446 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
36447 { "align-branch-targets",
36448 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
36449 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
36450 { "vectorize-builtins",
36451 offsetof (struct gcc_options
, x_TARGET_VECTORIZE_BUILTINS
),
36452 offsetof (struct cl_target_option
, x_TARGET_VECTORIZE_BUILTINS
), },
36454 offsetof (struct gcc_options
, x_tls_markers
),
36455 offsetof (struct cl_target_option
, x_tls_markers
), },
36457 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
36458 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
36460 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
36461 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
36464 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36465 parsing. Return true if there were no errors. */
36468 rs6000_inner_target_options (tree args
, bool attr_p
)
36472 if (args
== NULL_TREE
)
36475 else if (TREE_CODE (args
) == STRING_CST
)
36477 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
36480 while ((q
= strtok (p
, ",")) != NULL
)
36482 bool error_p
= false;
36483 bool not_valid_p
= false;
36484 const char *cpu_opt
= NULL
;
36487 if (strncmp (q
, "cpu=", 4) == 0)
36489 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
36490 if (cpu_index
>= 0)
36491 rs6000_cpu_index
= cpu_index
;
36498 else if (strncmp (q
, "tune=", 5) == 0)
36500 int tune_index
= rs6000_cpu_name_lookup (q
+5);
36501 if (tune_index
>= 0)
36502 rs6000_tune_index
= tune_index
;
36512 bool invert
= false;
36516 if (strncmp (r
, "no-", 3) == 0)
36522 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
36523 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
36525 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
36527 if (!rs6000_opt_masks
[i
].valid_target
)
36528 not_valid_p
= true;
36532 rs6000_isa_flags_explicit
|= mask
;
36534 /* VSX needs altivec, so -mvsx automagically sets
36535 altivec and disables -mavoid-indexed-addresses. */
36538 if (mask
== OPTION_MASK_VSX
)
36540 mask
|= OPTION_MASK_ALTIVEC
;
36541 TARGET_AVOID_XFORM
= 0;
36545 if (rs6000_opt_masks
[i
].invert
)
36549 rs6000_isa_flags
&= ~mask
;
36551 rs6000_isa_flags
|= mask
;
36556 if (error_p
&& !not_valid_p
)
36558 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
36559 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
36561 size_t j
= rs6000_opt_vars
[i
].global_offset
;
36562 *((int *) ((char *)&global_options
+ j
)) = !invert
;
36564 not_valid_p
= false;
36572 const char *eprefix
, *esuffix
;
36577 eprefix
= "__attribute__((__target__(";
36582 eprefix
= "#pragma GCC target ";
36587 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt
, eprefix
,
36589 else if (not_valid_p
)
36590 error ("%s\"%s\"%s is not allowed", eprefix
, q
, esuffix
);
36592 error ("%s\"%s\"%s is invalid", eprefix
, q
, esuffix
);
36597 else if (TREE_CODE (args
) == TREE_LIST
)
36601 tree value
= TREE_VALUE (args
);
36604 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
36608 args
= TREE_CHAIN (args
);
36610 while (args
!= NULL_TREE
);
36615 error ("attribute %<target%> argument not a string");
36622 /* Print out the target options as a list for -mdebug=target. */
36625 rs6000_debug_target_options (tree args
, const char *prefix
)
36627 if (args
== NULL_TREE
)
36628 fprintf (stderr
, "%s<NULL>", prefix
);
36630 else if (TREE_CODE (args
) == STRING_CST
)
36632 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
36635 while ((q
= strtok (p
, ",")) != NULL
)
36638 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
36643 else if (TREE_CODE (args
) == TREE_LIST
)
36647 tree value
= TREE_VALUE (args
);
36650 rs6000_debug_target_options (value
, prefix
);
36653 args
= TREE_CHAIN (args
);
36655 while (args
!= NULL_TREE
);
36659 gcc_unreachable ();
36665 /* Hook to validate attribute((target("..."))). */
36668 rs6000_valid_attribute_p (tree fndecl
,
36669 tree
ARG_UNUSED (name
),
36673 struct cl_target_option cur_target
;
36675 tree old_optimize
= build_optimization_node (&global_options
);
36676 tree new_target
, new_optimize
;
36677 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
36679 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
36681 if (TARGET_DEBUG_TARGET
)
36683 tree tname
= DECL_NAME (fndecl
);
36684 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
36686 fprintf (stderr
, "function: %.*s\n",
36687 (int) IDENTIFIER_LENGTH (tname
),
36688 IDENTIFIER_POINTER (tname
));
36690 fprintf (stderr
, "function: unknown\n");
36692 fprintf (stderr
, "args:");
36693 rs6000_debug_target_options (args
, " ");
36694 fprintf (stderr
, "\n");
36697 fprintf (stderr
, "flags: 0x%x\n", flags
);
36699 fprintf (stderr
, "--------------------\n");
36702 /* attribute((target("default"))) does nothing, beyond
36703 affecting multi-versioning. */
36704 if (TREE_VALUE (args
)
36705 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
36706 && TREE_CHAIN (args
) == NULL_TREE
36707 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
36710 old_optimize
= build_optimization_node (&global_options
);
36711 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
36713 /* If the function changed the optimization levels as well as setting target
36714 options, start with the optimizations specified. */
36715 if (func_optimize
&& func_optimize
!= old_optimize
)
36716 cl_optimization_restore (&global_options
,
36717 TREE_OPTIMIZATION (func_optimize
));
36719 /* The target attributes may also change some optimization flags, so update
36720 the optimization options if necessary. */
36721 cl_target_option_save (&cur_target
, &global_options
);
36722 rs6000_cpu_index
= rs6000_tune_index
= -1;
36723 ret
= rs6000_inner_target_options (args
, true);
36725 /* Set up any additional state. */
36728 ret
= rs6000_option_override_internal (false);
36729 new_target
= build_target_option_node (&global_options
);
36734 new_optimize
= build_optimization_node (&global_options
);
36741 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
36743 if (old_optimize
!= new_optimize
)
36744 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
36747 cl_target_option_restore (&global_options
, &cur_target
);
36749 if (old_optimize
!= new_optimize
)
36750 cl_optimization_restore (&global_options
,
36751 TREE_OPTIMIZATION (old_optimize
));
36757 /* Hook to validate the current #pragma GCC target and set the state, and
36758 update the macros based on what was changed. If ARGS is NULL, then
36759 POP_TARGET is used to reset the options. */
36762 rs6000_pragma_target_parse (tree args
, tree pop_target
)
36764 tree prev_tree
= build_target_option_node (&global_options
);
36766 struct cl_target_option
*prev_opt
, *cur_opt
;
36767 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
36768 HOST_WIDE_INT prev_bumask
, cur_bumask
, diff_bumask
;
36770 if (TARGET_DEBUG_TARGET
)
36772 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
36773 fprintf (stderr
, "args:");
36774 rs6000_debug_target_options (args
, " ");
36775 fprintf (stderr
, "\n");
36779 fprintf (stderr
, "pop_target:\n");
36780 debug_tree (pop_target
);
36783 fprintf (stderr
, "pop_target: <NULL>\n");
36785 fprintf (stderr
, "--------------------\n");
36790 cur_tree
= ((pop_target
)
36792 : target_option_default_node
);
36793 cl_target_option_restore (&global_options
,
36794 TREE_TARGET_OPTION (cur_tree
));
36798 rs6000_cpu_index
= rs6000_tune_index
= -1;
36799 if (!rs6000_inner_target_options (args
, false)
36800 || !rs6000_option_override_internal (false)
36801 || (cur_tree
= build_target_option_node (&global_options
))
36804 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
36805 fprintf (stderr
, "invalid pragma\n");
36811 target_option_current_node
= cur_tree
;
36813 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36814 change the macros that are defined. */
36815 if (rs6000_target_modify_macros_ptr
)
36817 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
36818 prev_bumask
= prev_opt
->x_rs6000_builtin_mask
;
36819 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
36821 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
36822 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
36823 cur_bumask
= cur_opt
->x_rs6000_builtin_mask
;
36825 diff_bumask
= (prev_bumask
^ cur_bumask
);
36826 diff_flags
= (prev_flags
^ cur_flags
);
36828 if ((diff_flags
!= 0) || (diff_bumask
!= 0))
36830 /* Delete old macros. */
36831 rs6000_target_modify_macros_ptr (false,
36832 prev_flags
& diff_flags
,
36833 prev_bumask
& diff_bumask
);
36835 /* Define new macros. */
36836 rs6000_target_modify_macros_ptr (true,
36837 cur_flags
& diff_flags
,
36838 cur_bumask
& diff_bumask
);
36846 /* Remember the last target of rs6000_set_current_function. */
36847 static GTY(()) tree rs6000_previous_fndecl
;
36849 /* Establish appropriate back-end context for processing the function
36850 FNDECL. The argument might be NULL to indicate processing at top
36851 level, outside of any function scope. */
36853 rs6000_set_current_function (tree fndecl
)
36855 tree old_tree
= (rs6000_previous_fndecl
36856 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
)
36859 tree new_tree
= (fndecl
36860 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
36863 if (TARGET_DEBUG_TARGET
)
36865 bool print_final
= false;
36866 fprintf (stderr
, "\n==================== rs6000_set_current_function");
36869 fprintf (stderr
, ", fndecl %s (%p)",
36870 (DECL_NAME (fndecl
)
36871 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
36872 : "<unknown>"), (void *)fndecl
);
36874 if (rs6000_previous_fndecl
)
36875 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
36877 fprintf (stderr
, "\n");
36880 fprintf (stderr
, "\nnew fndecl target specific options:\n");
36881 debug_tree (new_tree
);
36882 print_final
= true;
36887 fprintf (stderr
, "\nold fndecl target specific options:\n");
36888 debug_tree (old_tree
);
36889 print_final
= true;
36893 fprintf (stderr
, "--------------------\n");
36896 /* Only change the context if the function changes. This hook is called
36897 several times in the course of compiling a function, and we don't want to
36898 slow things down too much or call target_reinit when it isn't safe. */
36899 if (fndecl
&& fndecl
!= rs6000_previous_fndecl
)
36901 rs6000_previous_fndecl
= fndecl
;
36902 if (old_tree
== new_tree
)
36905 else if (new_tree
&& new_tree
!= target_option_default_node
)
36907 cl_target_option_restore (&global_options
,
36908 TREE_TARGET_OPTION (new_tree
));
36909 if (TREE_TARGET_GLOBALS (new_tree
))
36910 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
36912 TREE_TARGET_GLOBALS (new_tree
)
36913 = save_target_globals_default_opts ();
36916 else if (old_tree
&& old_tree
!= target_option_default_node
)
36918 new_tree
= target_option_current_node
;
36919 cl_target_option_restore (&global_options
,
36920 TREE_TARGET_OPTION (new_tree
));
36921 if (TREE_TARGET_GLOBALS (new_tree
))
36922 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
36923 else if (new_tree
== target_option_default_node
)
36924 restore_target_globals (&default_target_globals
);
36926 TREE_TARGET_GLOBALS (new_tree
)
36927 = save_target_globals_default_opts ();
36933 /* Save the current options */
36936 rs6000_function_specific_save (struct cl_target_option
*ptr
,
36937 struct gcc_options
*opts
)
36939 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
36940 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
36943 /* Restore the current options */
36946 rs6000_function_specific_restore (struct gcc_options
*opts
,
36947 struct cl_target_option
*ptr
)
36950 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
36951 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
36952 (void) rs6000_option_override_internal (false);
36955 /* Print the current options */
36958 rs6000_function_specific_print (FILE *file
, int indent
,
36959 struct cl_target_option
*ptr
)
36961 rs6000_print_isa_options (file
, indent
, "Isa options set",
36962 ptr
->x_rs6000_isa_flags
);
36964 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
36965 ptr
->x_rs6000_isa_flags_explicit
);
36968 /* Helper function to print the current isa or misc options on a line. */
36971 rs6000_print_options_internal (FILE *file
,
36973 const char *string
,
36974 HOST_WIDE_INT flags
,
36975 const char *prefix
,
36976 const struct rs6000_opt_mask
*opts
,
36977 size_t num_elements
)
36980 size_t start_column
= 0;
36982 size_t max_column
= 120;
36983 size_t prefix_len
= strlen (prefix
);
36984 size_t comma_len
= 0;
36985 const char *comma
= "";
36988 start_column
+= fprintf (file
, "%*s", indent
, "");
36992 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
36996 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
36998 /* Print the various mask options. */
36999 cur_column
= start_column
;
37000 for (i
= 0; i
< num_elements
; i
++)
37002 bool invert
= opts
[i
].invert
;
37003 const char *name
= opts
[i
].name
;
37004 const char *no_str
= "";
37005 HOST_WIDE_INT mask
= opts
[i
].mask
;
37006 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
37010 if ((flags
& mask
) == 0)
37013 len
+= sizeof ("no-") - 1;
37021 if ((flags
& mask
) != 0)
37024 len
+= sizeof ("no-") - 1;
37031 if (cur_column
> max_column
)
37033 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
37034 cur_column
= start_column
+ len
;
37038 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
37040 comma_len
= sizeof (", ") - 1;
37043 fputs ("\n", file
);
37046 /* Helper function to print the current isa options on a line. */
37049 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
37050 HOST_WIDE_INT flags
)
37052 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
37053 &rs6000_opt_masks
[0],
37054 ARRAY_SIZE (rs6000_opt_masks
));
37058 rs6000_print_builtin_options (FILE *file
, int indent
, const char *string
,
37059 HOST_WIDE_INT flags
)
37061 rs6000_print_options_internal (file
, indent
, string
, flags
, "",
37062 &rs6000_builtin_mask_names
[0],
37063 ARRAY_SIZE (rs6000_builtin_mask_names
));
37066 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37067 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37068 -mvsx-timode, -mupper-regs-df).
37070 If the user used -mno-power8-vector, we need to turn off all of the implicit
37071 ISA 2.07 and 3.0 options that relate to the vector unit.
37073 If the user used -mno-power9-vector, we need to turn off all of the implicit
37074 ISA 3.0 options that relate to the vector unit.
37076 This function does not handle explicit options such as the user specifying
37077 -mdirect-move. These are handled in rs6000_option_override_internal, and
37078 the appropriate error is given if needed.
37080 We return a mask of all of the implicit options that should not be enabled
37083 static HOST_WIDE_INT
37084 rs6000_disable_incompatible_switches (void)
37086 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
37089 static const struct {
37090 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
37091 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
37092 const char *const name
; /* name of the switch. */
37094 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
37095 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
37096 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
37099 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
37101 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
37103 if ((rs6000_isa_flags
& no_flag
) == 0
37104 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
37106 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
37107 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
37113 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
37114 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
37116 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
37117 error ("-mno-%s turns off -m%s",
37119 rs6000_opt_masks
[j
].name
);
37122 gcc_assert (!set_flags
);
37125 rs6000_isa_flags
&= ~dep_flags
;
37126 ignore_masks
|= no_flag
| dep_flags
;
37130 if (!TARGET_P9_VECTOR
37131 && (rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) != 0
37132 && TARGET_P9_DFORM_BOTH
> 0)
37134 error ("-mno-power9-vector turns off -mpower9-dform");
37135 TARGET_P9_DFORM_BOTH
= 0;
37138 return ignore_masks
;
37142 /* Helper function for printing the function name when debugging. */
37144 static const char *
37145 get_decl_name (tree fn
)
37152 name
= DECL_NAME (fn
);
37154 return "<no-name>";
37156 return IDENTIFIER_POINTER (name
);
37159 /* Return the clone id of the target we are compiling code for in a target
37160 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37161 the priority list for the target clones (ordered from lowest to
37165 rs6000_clone_priority (tree fndecl
)
37167 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
37168 HOST_WIDE_INT isa_masks
;
37169 int ret
= CLONE_DEFAULT
;
37170 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
37171 const char *attrs_str
= NULL
;
37173 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
37174 attrs_str
= TREE_STRING_POINTER (attrs
);
37176 /* Return priority zero for default function. Return the ISA needed for the
37177 function if it is not the default. */
37178 if (strcmp (attrs_str
, "default") != 0)
37180 if (fn_opts
== NULL_TREE
)
37181 fn_opts
= target_option_default_node
;
37183 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
37184 isa_masks
= rs6000_isa_flags
;
37186 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
37188 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
37189 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
37193 if (TARGET_DEBUG_TARGET
)
37194 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
37195 get_decl_name (fndecl
), ret
);
37200 /* This compares the priority of target features in function DECL1 and DECL2.
37201 It returns positive value if DECL1 is higher priority, negative value if
37202 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37203 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37206 rs6000_compare_version_priority (tree decl1
, tree decl2
)
37208 int priority1
= rs6000_clone_priority (decl1
);
37209 int priority2
= rs6000_clone_priority (decl2
);
37210 int ret
= priority1
- priority2
;
37212 if (TARGET_DEBUG_TARGET
)
37213 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
37214 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
37219 /* Make a dispatcher declaration for the multi-versioned function DECL.
37220 Calls to DECL function will be replaced with calls to the dispatcher
37221 by the front-end. Returns the decl of the dispatcher function. */
37224 rs6000_get_function_versions_dispatcher (void *decl
)
37226 tree fn
= (tree
) decl
;
37227 struct cgraph_node
*node
= NULL
;
37228 struct cgraph_node
*default_node
= NULL
;
37229 struct cgraph_function_version_info
*node_v
= NULL
;
37230 struct cgraph_function_version_info
*first_v
= NULL
;
37232 tree dispatch_decl
= NULL
;
37234 struct cgraph_function_version_info
*default_version_info
= NULL
;
37235 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
37237 if (TARGET_DEBUG_TARGET
)
37238 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
37239 get_decl_name (fn
));
37241 node
= cgraph_node::get (fn
);
37242 gcc_assert (node
!= NULL
);
37244 node_v
= node
->function_version ();
37245 gcc_assert (node_v
!= NULL
);
37247 if (node_v
->dispatcher_resolver
!= NULL
)
37248 return node_v
->dispatcher_resolver
;
37250 /* Find the default version and make it the first node. */
37252 /* Go to the beginning of the chain. */
37253 while (first_v
->prev
!= NULL
)
37254 first_v
= first_v
->prev
;
37256 default_version_info
= first_v
;
37257 while (default_version_info
!= NULL
)
37259 const tree decl2
= default_version_info
->this_node
->decl
;
37260 if (is_function_default_version (decl2
))
37262 default_version_info
= default_version_info
->next
;
37265 /* If there is no default node, just return NULL. */
37266 if (default_version_info
== NULL
)
37269 /* Make default info the first node. */
37270 if (first_v
!= default_version_info
)
37272 default_version_info
->prev
->next
= default_version_info
->next
;
37273 if (default_version_info
->next
)
37274 default_version_info
->next
->prev
= default_version_info
->prev
;
37275 first_v
->prev
= default_version_info
;
37276 default_version_info
->next
= first_v
;
37277 default_version_info
->prev
= NULL
;
37280 default_node
= default_version_info
->this_node
;
37282 if (targetm
.has_ifunc_p ())
37284 struct cgraph_function_version_info
*it_v
= NULL
;
37285 struct cgraph_node
*dispatcher_node
= NULL
;
37286 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
37288 /* Right now, the dispatching is done via ifunc. */
37289 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
37291 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
37292 gcc_assert (dispatcher_node
!= NULL
);
37293 dispatcher_node
->dispatcher_function
= 1;
37294 dispatcher_version_info
37295 = dispatcher_node
->insert_new_function_version ();
37296 dispatcher_version_info
->next
= default_version_info
;
37297 dispatcher_node
->definition
= 1;
37299 /* Set the dispatcher for all the versions. */
37300 it_v
= default_version_info
;
37301 while (it_v
!= NULL
)
37303 it_v
->dispatcher_resolver
= dispatch_decl
;
37309 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
37310 "multiversioning needs ifunc which is not supported "
37314 return dispatch_decl
;
37317 /* Make the resolver function decl to dispatch the versions of a multi-
37318 versioned function, DEFAULT_DECL. Create an empty basic block in the
37319 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37323 make_resolver_func (const tree default_decl
,
37324 const tree dispatch_decl
,
37325 basic_block
*empty_bb
)
37327 /* IFUNC's have to be globally visible. So, if the default_decl is
37328 not, then the name of the IFUNC should be made unique. */
37329 bool is_uniq
= (TREE_PUBLIC (default_decl
) == 0);
37331 /* Append the filename to the resolver function if the versions are
37332 not externally visible. This is because the resolver function has
37333 to be externally visible for the loader to find it. So, appending
37334 the filename will prevent conflicts with a resolver function from
37335 another module which is based on the same version name. */
37336 char *resolver_name
= make_unique_name (default_decl
, "resolver", is_uniq
);
37338 /* The resolver function should return a (void *). */
37339 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
37340 tree decl
= build_fn_decl (resolver_name
, type
);
37341 tree decl_name
= get_identifier (resolver_name
);
37342 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
37344 DECL_NAME (decl
) = decl_name
;
37345 TREE_USED (decl
) = 1;
37346 DECL_ARTIFICIAL (decl
) = 1;
37347 DECL_IGNORED_P (decl
) = 0;
37348 /* IFUNC resolvers have to be externally visible. */
37349 TREE_PUBLIC (decl
) = 1;
37350 DECL_UNINLINABLE (decl
) = 1;
37352 /* Resolver is not external, body is generated. */
37353 DECL_EXTERNAL (decl
) = 0;
37354 DECL_EXTERNAL (dispatch_decl
) = 0;
37356 DECL_CONTEXT (decl
) = NULL_TREE
;
37357 DECL_INITIAL (decl
) = make_node (BLOCK
);
37358 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
37360 if (DECL_COMDAT_GROUP (default_decl
) || TREE_PUBLIC (default_decl
))
37362 /* In this case, each translation unit with a call to this
37363 versioned function will put out a resolver. Ensure it
37364 is comdat to keep just one copy. */
37365 DECL_COMDAT (decl
) = 1;
37366 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
37369 /* Build result decl and add to function_decl. */
37370 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
37371 DECL_ARTIFICIAL (t
) = 1;
37372 DECL_IGNORED_P (t
) = 1;
37373 DECL_RESULT (decl
) = t
;
37375 gimplify_function_tree (decl
);
37376 push_cfun (DECL_STRUCT_FUNCTION (decl
));
37377 *empty_bb
= init_lowered_empty_function (decl
, false,
37378 profile_count::uninitialized ());
37380 cgraph_node::add_new_function (decl
, true);
37381 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
37385 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37386 DECL_ATTRIBUTES (dispatch_decl
)
37387 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
37389 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
37390 XDELETEVEC (resolver_name
);
37394 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37395 return a pointer to VERSION_DECL if we are running on a machine that
37396 supports the index CLONE_ISA hardware architecture bits. This function will
37397 be called during version dispatch to decide which function version to
37398 execute. It returns the basic block at the end, to which more conditions
37402 add_condition_to_bb (tree function_decl
, tree version_decl
,
37403 int clone_isa
, basic_block new_bb
)
37405 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
37407 gcc_assert (new_bb
!= NULL
);
37408 gimple_seq gseq
= bb_seq (new_bb
);
37411 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
37412 build_fold_addr_expr (version_decl
));
37413 tree result_var
= create_tmp_var (ptr_type_node
);
37414 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
37415 gimple
*return_stmt
= gimple_build_return (result_var
);
37417 if (clone_isa
== CLONE_DEFAULT
)
37419 gimple_seq_add_stmt (&gseq
, convert_stmt
);
37420 gimple_seq_add_stmt (&gseq
, return_stmt
);
37421 set_bb_seq (new_bb
, gseq
);
37422 gimple_set_bb (convert_stmt
, new_bb
);
37423 gimple_set_bb (return_stmt
, new_bb
);
37428 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
37429 tree cond_var
= create_tmp_var (bool_int_type_node
);
37430 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BUILTIN_CPU_SUPPORTS
];
37431 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
37432 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
37433 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
37434 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
37436 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
37437 gimple_set_bb (call_cond_stmt
, new_bb
);
37438 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
37440 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
37441 NULL_TREE
, NULL_TREE
);
37442 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
37443 gimple_set_bb (if_else_stmt
, new_bb
);
37444 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
37446 gimple_seq_add_stmt (&gseq
, convert_stmt
);
37447 gimple_seq_add_stmt (&gseq
, return_stmt
);
37448 set_bb_seq (new_bb
, gseq
);
37450 basic_block bb1
= new_bb
;
37451 edge e12
= split_block (bb1
, if_else_stmt
);
37452 basic_block bb2
= e12
->dest
;
37453 e12
->flags
&= ~EDGE_FALLTHRU
;
37454 e12
->flags
|= EDGE_TRUE_VALUE
;
37456 edge e23
= split_block (bb2
, return_stmt
);
37457 gimple_set_bb (convert_stmt
, bb2
);
37458 gimple_set_bb (return_stmt
, bb2
);
37460 basic_block bb3
= e23
->dest
;
37461 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
37464 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
37470 /* This function generates the dispatch function for multi-versioned functions.
37471 DISPATCH_DECL is the function which will contain the dispatch logic.
37472 FNDECLS are the function choices for dispatch, and is a tree chain.
37473 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37474 code is generated. */
37477 dispatch_function_versions (tree dispatch_decl
,
37479 basic_block
*empty_bb
)
37483 vec
<tree
> *fndecls
;
37484 tree clones
[CLONE_MAX
];
37486 if (TARGET_DEBUG_TARGET
)
37487 fputs ("dispatch_function_versions, top\n", stderr
);
37489 gcc_assert (dispatch_decl
!= NULL
37490 && fndecls_p
!= NULL
37491 && empty_bb
!= NULL
);
37493 /* fndecls_p is actually a vector. */
37494 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
37496 /* At least one more version other than the default. */
37497 gcc_assert (fndecls
->length () >= 2);
37499 /* The first version in the vector is the default decl. */
37500 memset ((void *) clones
, '\0', sizeof (clones
));
37501 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
37503 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37504 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37505 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37506 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37507 to insert the code here to do the call. */
37509 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
37511 int priority
= rs6000_clone_priority (ele
);
37512 if (!clones
[priority
])
37513 clones
[priority
] = ele
;
37516 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
37519 if (TARGET_DEBUG_TARGET
)
37520 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
37521 ix
, get_decl_name (clones
[ix
]));
37523 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
37530 /* Generate the dispatching code body to dispatch multi-versioned function
37531 DECL. The target hook is called to process the "target" attributes and
37532 provide the code to dispatch the right function at run-time. NODE points
37533 to the dispatcher decl whose body will be created. */
37536 rs6000_generate_version_dispatcher_body (void *node_p
)
37539 basic_block empty_bb
;
37540 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
37541 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
37543 if (ninfo
->dispatcher_resolver
)
37544 return ninfo
->dispatcher_resolver
;
37546 /* node is going to be an alias, so remove the finalized bit. */
37547 node
->definition
= false;
37549 /* The first version in the chain corresponds to the default version. */
37550 ninfo
->dispatcher_resolver
= resolver
37551 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
37553 if (TARGET_DEBUG_TARGET
)
37554 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
37555 get_decl_name (resolver
));
37557 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
37558 auto_vec
<tree
, 2> fn_ver_vec
;
37560 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
37562 vinfo
= vinfo
->next
)
37564 struct cgraph_node
*version
= vinfo
->this_node
;
37565 /* Check for virtual functions here again, as by this time it should
37566 have been determined if this function needs a vtable index or
37567 not. This happens for methods in derived classes that override
37568 virtual methods in base classes but are not explicitly marked as
37570 if (DECL_VINDEX (version
->decl
))
37571 sorry ("Virtual function multiversioning not supported");
37573 fn_ver_vec
.safe_push (version
->decl
);
37576 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
37577 cgraph_edge::rebuild_edges ();
37583 /* Hook to determine if one function can safely inline another. */
37586 rs6000_can_inline_p (tree caller
, tree callee
)
37589 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
37590 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
37592 /* If callee has no option attributes, then it is ok to inline. */
37596 /* If caller has no option attributes, but callee does then it is not ok to
37598 else if (!caller_tree
)
37603 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
37604 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
37606 /* Callee's options should a subset of the caller's, i.e. a vsx function
37607 can inline an altivec function but a non-vsx function can't inline a
37609 if ((caller_opts
->x_rs6000_isa_flags
& callee_opts
->x_rs6000_isa_flags
)
37610 == callee_opts
->x_rs6000_isa_flags
)
37614 if (TARGET_DEBUG_TARGET
)
37615 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37616 get_decl_name (caller
), get_decl_name (callee
),
37617 (ret
? "can" : "cannot"));
37622 /* Allocate a stack temp and fixup the address so it meets the particular
37623 memory requirements (either offetable or REG+REG addressing). */
37626 rs6000_allocate_stack_temp (machine_mode mode
,
37627 bool offsettable_p
,
37630 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37631 rtx addr
= XEXP (stack
, 0);
37632 int strict_p
= (reload_in_progress
|| reload_completed
);
37634 if (!legitimate_indirect_address_p (addr
, strict_p
))
37637 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
37638 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
37640 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
37641 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
37647 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37648 to such a form to deal with memory reference instructions like STFIWX that
37649 only take reg+reg addressing. */
37652 rs6000_address_for_fpconvert (rtx x
)
37654 int strict_p
= (reload_in_progress
|| reload_completed
);
37657 gcc_assert (MEM_P (x
));
37658 addr
= XEXP (x
, 0);
37659 if (! legitimate_indirect_address_p (addr
, strict_p
)
37660 && ! legitimate_indexed_address_p (addr
, strict_p
))
37662 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
37664 rtx reg
= XEXP (addr
, 0);
37665 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
37666 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
37667 gcc_assert (REG_P (reg
));
37668 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
37671 else if (GET_CODE (addr
) == PRE_MODIFY
)
37673 rtx reg
= XEXP (addr
, 0);
37674 rtx expr
= XEXP (addr
, 1);
37675 gcc_assert (REG_P (reg
));
37676 gcc_assert (GET_CODE (expr
) == PLUS
);
37677 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
37681 x
= replace_equiv_address (x
, copy_addr_to_reg (addr
));
37687 /* Given a memory reference, if it is not in the form for altivec memory
37688 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37689 convert to the altivec format. */
37692 rs6000_address_for_altivec (rtx x
)
37694 gcc_assert (MEM_P (x
));
37695 if (!altivec_indexed_or_indirect_operand (x
, GET_MODE (x
)))
37697 rtx addr
= XEXP (x
, 0);
37698 int strict_p
= (reload_in_progress
|| reload_completed
);
37700 if (!legitimate_indexed_address_p (addr
, strict_p
)
37701 && !legitimate_indirect_address_p (addr
, strict_p
))
37702 addr
= copy_to_mode_reg (Pmode
, addr
);
37704 addr
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
37705 x
= change_address (x
, GET_MODE (x
), addr
);
37711 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37713 On the RS/6000, all integer constants are acceptable, most won't be valid
37714 for particular insns, though. Only easy FP constants are acceptable. */
37717 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
37719 if (TARGET_ELF
&& tls_referenced_p (x
))
37722 return ((GET_CODE (x
) != CONST_DOUBLE
&& GET_CODE (x
) != CONST_VECTOR
)
37723 || GET_MODE (x
) == VOIDmode
37724 || (TARGET_POWERPC64
&& mode
== DImode
)
37725 || easy_fp_constant (x
, mode
)
37726 || easy_vector_constant (x
, mode
));
37730 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37733 chain_already_loaded (rtx_insn
*last
)
37735 for (; last
!= NULL
; last
= PREV_INSN (last
))
37737 if (NONJUMP_INSN_P (last
))
37739 rtx patt
= PATTERN (last
);
37741 if (GET_CODE (patt
) == SET
)
37743 rtx lhs
= XEXP (patt
, 0);
37745 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
37753 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37756 rs6000_call_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
37758 const bool direct_call_p
37759 = GET_CODE (func_desc
) == SYMBOL_REF
&& SYMBOL_REF_FUNCTION_P (func_desc
);
37760 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
37761 rtx toc_load
= NULL_RTX
;
37762 rtx toc_restore
= NULL_RTX
;
37764 rtx abi_reg
= NULL_RTX
;
37769 /* Handle longcall attributes. */
37770 if (INTVAL (cookie
) & CALL_LONG
)
37771 func_desc
= rs6000_longcall_ref (func_desc
);
37773 /* Handle indirect calls. */
37774 if (GET_CODE (func_desc
) != SYMBOL_REF
37775 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func_desc
)))
37777 /* Save the TOC into its reserved slot before the call,
37778 and prepare to restore it after the call. */
37779 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
37780 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
37781 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
37782 gen_rtx_PLUS (Pmode
, stack_ptr
,
37783 stack_toc_offset
));
37784 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
37785 gen_rtvec (1, stack_toc_offset
),
37787 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
37789 /* Can we optimize saving the TOC in the prologue or
37790 do we need to do it at every call? */
37791 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
37792 cfun
->machine
->save_toc_in_prologue
= true;
37795 MEM_VOLATILE_P (stack_toc_mem
) = 1;
37796 emit_move_insn (stack_toc_mem
, toc_reg
);
37799 if (DEFAULT_ABI
== ABI_ELFv2
)
37801 /* A function pointer in the ELFv2 ABI is just a plain address, but
37802 the ABI requires it to be loaded into r12 before the call. */
37803 func_addr
= gen_rtx_REG (Pmode
, 12);
37804 emit_move_insn (func_addr
, func_desc
);
37805 abi_reg
= func_addr
;
37809 /* A function pointer under AIX is a pointer to a data area whose
37810 first word contains the actual address of the function, whose
37811 second word contains a pointer to its TOC, and whose third word
37812 contains a value to place in the static chain register (r11).
37813 Note that if we load the static chain, our "trampoline" need
37814 not have any executable code. */
37816 /* Load up address of the actual function. */
37817 func_desc
= force_reg (Pmode
, func_desc
);
37818 func_addr
= gen_reg_rtx (Pmode
);
37819 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func_desc
));
37821 /* Prepare to load the TOC of the called function. Note that the
37822 TOC load must happen immediately before the actual call so
37823 that unwinding the TOC registers works correctly. See the
37824 comment in frob_update_context. */
37825 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
37826 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
37827 gen_rtx_PLUS (Pmode
, func_desc
,
37829 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
37831 /* If we have a static chain, load it up. But, if the call was
37832 originally direct, the 3rd word has not been written since no
37833 trampoline has been built, so we ought not to load it, lest we
37834 override a static chain value. */
37836 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37837 && !chain_already_loaded (get_current_sequence ()->next
->last
))
37839 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
37840 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
37841 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
37842 gen_rtx_PLUS (Pmode
, func_desc
,
37844 emit_move_insn (sc_reg
, func_sc_mem
);
37851 /* Direct calls use the TOC: for local calls, the callee will
37852 assume the TOC register is set; for non-local calls, the
37853 PLT stub needs the TOC register. */
37855 func_addr
= func_desc
;
37858 /* Create the call. */
37859 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), flag
);
37860 if (value
!= NULL_RTX
)
37861 call
[0] = gen_rtx_SET (value
, call
[0]);
37865 call
[n_call
++] = toc_load
;
37867 call
[n_call
++] = toc_restore
;
37869 call
[n_call
++] = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNO
));
37871 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
37872 insn
= emit_call_insn (insn
);
37874 /* Mention all registers defined by the ABI to hold information
37875 as uses in CALL_INSN_FUNCTION_USAGE. */
37877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
37880 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37883 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx flag
, rtx cookie
)
37888 gcc_assert (INTVAL (cookie
) == 0);
37890 /* Create the call. */
37891 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_desc
), flag
);
37892 if (value
!= NULL_RTX
)
37893 call
[0] = gen_rtx_SET (value
, call
[0]);
37895 call
[1] = simple_return_rtx
;
37897 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
37898 insn
= emit_call_insn (insn
);
37900 /* Note use of the TOC register. */
37901 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, TOC_REGNUM
));
37904 /* Return whether we need to always update the saved TOC pointer when we update
37905 the stack pointer. */
37908 rs6000_save_toc_in_prologue_p (void)
37910 return (cfun
&& cfun
->machine
&& cfun
->machine
->save_toc_in_prologue
);
37913 #ifdef HAVE_GAS_HIDDEN
37914 # define USE_HIDDEN_LINKONCE 1
37916 # define USE_HIDDEN_LINKONCE 0
37919 /* Fills in the label name that should be used for a 476 link stack thunk. */
37922 get_ppc476_thunk_name (char name
[32])
37924 gcc_assert (TARGET_LINK_STACK
);
37926 if (USE_HIDDEN_LINKONCE
)
37927 sprintf (name
, "__ppc476.get_thunk");
37929 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
37932 /* This function emits the simple thunk routine that is used to preserve
37933 the link stack on the 476 cpu. */
37935 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
37937 rs6000_code_end (void)
37942 if (!TARGET_LINK_STACK
)
37945 get_ppc476_thunk_name (name
);
37947 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
37948 build_function_type_list (void_type_node
, NULL_TREE
));
37949 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
37950 NULL_TREE
, void_type_node
);
37951 TREE_PUBLIC (decl
) = 1;
37952 TREE_STATIC (decl
) = 1;
37955 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
37957 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
37958 targetm
.asm_out
.unique_section (decl
, 0);
37959 switch_to_section (get_named_section (decl
, NULL
, 0));
37960 DECL_WEAK (decl
) = 1;
37961 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
37962 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
37963 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
37964 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
37969 switch_to_section (text_section
);
37970 ASM_OUTPUT_LABEL (asm_out_file
, name
);
37973 DECL_INITIAL (decl
) = make_node (BLOCK
);
37974 current_function_decl
= decl
;
37975 allocate_struct_function (decl
, false);
37976 init_function_start (decl
);
37977 first_function_block_is_cold
= false;
37978 /* Make sure unwind info is emitted for the thunk if needed. */
37979 final_start_function (emit_barrier (), asm_out_file
, 1);
37981 fputs ("\tblr\n", asm_out_file
);
37983 final_end_function ();
37984 init_insn_lengths ();
37985 free_after_compilation (cfun
);
37987 current_function_decl
= NULL
;
37990 /* Add r30 to hard reg set if the prologue sets it up and it is not
37991 pic_offset_table_rtx. */
37994 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
37996 if (!TARGET_SINGLE_PIC_BASE
37998 && TARGET_MINIMAL_TOC
37999 && !constant_pool_empty_p ())
38000 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
38001 if (cfun
->machine
->split_stack_argp_used
)
38002 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
38006 /* Helper function for rs6000_split_logical to emit a logical instruction after
38007 spliting the operation to single GPR registers.
38009 DEST is the destination register.
38010 OP1 and OP2 are the input source registers.
38011 CODE is the base operation (AND, IOR, XOR, NOT).
38012 MODE is the machine mode.
38013 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38014 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38015 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38018 rs6000_split_logical_inner (rtx dest
,
38021 enum rtx_code code
,
38023 bool complement_final_p
,
38024 bool complement_op1_p
,
38025 bool complement_op2_p
)
38029 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38030 if (op2
&& GET_CODE (op2
) == CONST_INT
38031 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
38032 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
38034 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
38035 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
38037 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38042 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
38046 else if (value
== mask
)
38048 if (!rtx_equal_p (dest
, op1
))
38049 emit_insn (gen_rtx_SET (dest
, op1
));
38054 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38055 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38056 else if (code
== IOR
|| code
== XOR
)
38060 if (!rtx_equal_p (dest
, op1
))
38061 emit_insn (gen_rtx_SET (dest
, op1
));
38067 if (code
== AND
&& mode
== SImode
38068 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
38070 emit_insn (gen_andsi3 (dest
, op1
, op2
));
38074 if (complement_op1_p
)
38075 op1
= gen_rtx_NOT (mode
, op1
);
38077 if (complement_op2_p
)
38078 op2
= gen_rtx_NOT (mode
, op2
);
38080 /* For canonical RTL, if only one arm is inverted it is the first. */
38081 if (!complement_op1_p
&& complement_op2_p
)
38082 std::swap (op1
, op2
);
38084 bool_rtx
= ((code
== NOT
)
38085 ? gen_rtx_NOT (mode
, op1
)
38086 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
38088 if (complement_final_p
)
38089 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
38091 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
38094 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38095 operations are split immediately during RTL generation to allow for more
38096 optimizations of the AND/IOR/XOR.
38098 OPERANDS is an array containing the destination and two input operands.
38099 CODE is the base operation (AND, IOR, XOR, NOT).
38100 MODE is the machine mode.
38101 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38102 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38103 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38104 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38105 formation of the AND instructions. */
38108 rs6000_split_logical_di (rtx operands
[3],
38109 enum rtx_code code
,
38110 bool complement_final_p
,
38111 bool complement_op1_p
,
38112 bool complement_op2_p
)
38114 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
38115 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
38116 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
38117 enum hi_lo
{ hi
= 0, lo
= 1 };
38118 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
38121 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
38122 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
38123 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
38124 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
38127 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
38130 if (GET_CODE (operands
[2]) != CONST_INT
)
38132 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
38133 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
38137 HOST_WIDE_INT value
= INTVAL (operands
[2]);
38138 HOST_WIDE_INT value_hi_lo
[2];
38140 gcc_assert (!complement_final_p
);
38141 gcc_assert (!complement_op1_p
);
38142 gcc_assert (!complement_op2_p
);
38144 value_hi_lo
[hi
] = value
>> 32;
38145 value_hi_lo
[lo
] = value
& lower_32bits
;
38147 for (i
= 0; i
< 2; i
++)
38149 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
38151 if (sub_value
& sign_bit
)
38152 sub_value
|= upper_32bits
;
38154 op2_hi_lo
[i
] = GEN_INT (sub_value
);
38156 /* If this is an AND instruction, check to see if we need to load
38157 the value in a register. */
38158 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
38159 && !and_operand (op2_hi_lo
[i
], SImode
))
38160 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
38165 for (i
= 0; i
< 2; i
++)
38167 /* Split large IOR/XOR operations. */
38168 if ((code
== IOR
|| code
== XOR
)
38169 && GET_CODE (op2_hi_lo
[i
]) == CONST_INT
38170 && !complement_final_p
38171 && !complement_op1_p
38172 && !complement_op2_p
38173 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
38175 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
38176 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
38177 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
38178 rtx tmp
= gen_reg_rtx (SImode
);
38180 /* Make sure the constant is sign extended. */
38181 if ((hi_16bits
& sign_bit
) != 0)
38182 hi_16bits
|= upper_32bits
;
38184 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
38185 code
, SImode
, false, false, false);
38187 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
38188 code
, SImode
, false, false, false);
38191 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
38192 code
, SImode
, complement_final_p
,
38193 complement_op1_p
, complement_op2_p
);
38199 /* Split the insns that make up boolean operations operating on multiple GPR
38200 registers. The boolean MD patterns ensure that the inputs either are
38201 exactly the same as the output registers, or there is no overlap.
38203 OPERANDS is an array containing the destination and two input operands.
38204 CODE is the base operation (AND, IOR, XOR, NOT).
38205 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38206 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38207 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38210 rs6000_split_logical (rtx operands
[3],
38211 enum rtx_code code
,
38212 bool complement_final_p
,
38213 bool complement_op1_p
,
38214 bool complement_op2_p
)
38216 machine_mode mode
= GET_MODE (operands
[0]);
38217 machine_mode sub_mode
;
38219 int sub_size
, regno0
, regno1
, nregs
, i
;
38221 /* If this is DImode, use the specialized version that can run before
38222 register allocation. */
38223 if (mode
== DImode
&& !TARGET_POWERPC64
)
38225 rs6000_split_logical_di (operands
, code
, complement_final_p
,
38226 complement_op1_p
, complement_op2_p
);
38232 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
38233 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
38234 sub_size
= GET_MODE_SIZE (sub_mode
);
38235 regno0
= REGNO (op0
);
38236 regno1
= REGNO (op1
);
38238 gcc_assert (reload_completed
);
38239 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
38240 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
38242 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
38243 gcc_assert (nregs
> 1);
38245 if (op2
&& REG_P (op2
))
38246 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
38248 for (i
= 0; i
< nregs
; i
++)
38250 int offset
= i
* sub_size
;
38251 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
38252 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
38253 rtx sub_op2
= ((code
== NOT
)
38255 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
38257 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
38258 complement_final_p
, complement_op1_p
,
38266 /* Return true if the peephole2 can combine a load involving a combination of
38267 an addis instruction and a load with an offset that can be fused together on
38271 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
38272 rtx addis_value
, /* addis value. */
38273 rtx target
, /* target register that is loaded. */
38274 rtx mem
) /* bottom part of the memory addr. */
38279 /* Validate arguments. */
38280 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
38283 if (!base_reg_operand (target
, GET_MODE (target
)))
38286 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
38289 /* Allow sign/zero extension. */
38290 if (GET_CODE (mem
) == ZERO_EXTEND
38291 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
38292 mem
= XEXP (mem
, 0);
38297 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
38300 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
38301 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
38304 /* Validate that the register used to load the high value is either the
38305 register being loaded, or we can safely replace its use.
38307 This function is only called from the peephole2 pass and we assume that
38308 there are 2 instructions in the peephole (addis and load), so we want to
38309 check if the target register was not used in the memory address and the
38310 register to hold the addis result is dead after the peephole. */
38311 if (REGNO (addis_reg
) != REGNO (target
))
38313 if (reg_mentioned_p (target
, mem
))
38316 if (!peep2_reg_dead_p (2, addis_reg
))
38319 /* If the target register being loaded is the stack pointer, we must
38320 avoid loading any other value into it, even temporarily. */
38321 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
38325 base_reg
= XEXP (addr
, 0);
38326 return REGNO (addis_reg
) == REGNO (base_reg
);
38329 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38330 sequence. We adjust the addis register to use the target register. If the
38331 load sign extends, we adjust the code to do the zero extending load, and an
38332 explicit sign extension later since the fusion only covers zero extending
38336 operands[0] register set with addis (to be replaced with target)
38337 operands[1] value set via addis
38338 operands[2] target register being loaded
38339 operands[3] D-form memory reference using operands[0]. */
38342 expand_fusion_gpr_load (rtx
*operands
)
38344 rtx addis_value
= operands
[1];
38345 rtx target
= operands
[2];
38346 rtx orig_mem
= operands
[3];
38347 rtx new_addr
, new_mem
, orig_addr
, offset
;
38348 enum rtx_code plus_or_lo_sum
;
38349 machine_mode target_mode
= GET_MODE (target
);
38350 machine_mode extend_mode
= target_mode
;
38351 machine_mode ptr_mode
= Pmode
;
38352 enum rtx_code extend
= UNKNOWN
;
38354 if (GET_CODE (orig_mem
) == ZERO_EXTEND
38355 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
38357 extend
= GET_CODE (orig_mem
);
38358 orig_mem
= XEXP (orig_mem
, 0);
38359 target_mode
= GET_MODE (orig_mem
);
38362 gcc_assert (MEM_P (orig_mem
));
38364 orig_addr
= XEXP (orig_mem
, 0);
38365 plus_or_lo_sum
= GET_CODE (orig_addr
);
38366 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
38368 offset
= XEXP (orig_addr
, 1);
38369 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
38370 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
38372 if (extend
!= UNKNOWN
)
38373 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
38375 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
38376 UNSPEC_FUSION_GPR
);
38377 emit_insn (gen_rtx_SET (target
, new_mem
));
38379 if (extend
== SIGN_EXTEND
)
38381 int sub_off
= ((BYTES_BIG_ENDIAN
)
38382 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
38385 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
38387 emit_insn (gen_rtx_SET (target
,
38388 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
38394 /* Emit the addis instruction that will be part of a fused instruction
38398 emit_fusion_addis (rtx target
, rtx addis_value
, const char *comment
,
38399 const char *mode_name
)
38402 char insn_template
[80];
38403 const char *addis_str
= NULL
;
38404 const char *comment_str
= ASM_COMMENT_START
;
38406 if (*comment_str
== ' ')
38409 /* Emit the addis instruction. */
38410 fuse_ops
[0] = target
;
38411 if (satisfies_constraint_L (addis_value
))
38413 fuse_ops
[1] = addis_value
;
38414 addis_str
= "lis %0,%v1";
38417 else if (GET_CODE (addis_value
) == PLUS
)
38419 rtx op0
= XEXP (addis_value
, 0);
38420 rtx op1
= XEXP (addis_value
, 1);
38422 if (REG_P (op0
) && CONST_INT_P (op1
)
38423 && satisfies_constraint_L (op1
))
38427 addis_str
= "addis %0,%1,%v2";
38431 else if (GET_CODE (addis_value
) == HIGH
)
38433 rtx value
= XEXP (addis_value
, 0);
38434 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
38436 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
38437 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
38439 addis_str
= "addis %0,%2,%1@toc@ha";
38441 else if (TARGET_XCOFF
)
38442 addis_str
= "addis %0,%1@u(%2)";
38445 gcc_unreachable ();
38448 else if (GET_CODE (value
) == PLUS
)
38450 rtx op0
= XEXP (value
, 0);
38451 rtx op1
= XEXP (value
, 1);
38453 if (GET_CODE (op0
) == UNSPEC
38454 && XINT (op0
, 1) == UNSPEC_TOCREL
38455 && CONST_INT_P (op1
))
38457 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
38458 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
38461 addis_str
= "addis %0,%2,%1+%3@toc@ha";
38463 else if (TARGET_XCOFF
)
38464 addis_str
= "addis %0,%1+%3@u(%2)";
38467 gcc_unreachable ();
38471 else if (satisfies_constraint_L (value
))
38473 fuse_ops
[1] = value
;
38474 addis_str
= "lis %0,%v1";
38477 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
38479 fuse_ops
[1] = value
;
38480 addis_str
= "lis %0,%1@ha";
38485 fatal_insn ("Could not generate addis value for fusion", addis_value
);
38487 sprintf (insn_template
, "%s\t\t%s %s, type %s", addis_str
, comment_str
,
38488 comment
, mode_name
);
38489 output_asm_insn (insn_template
, fuse_ops
);
38492 /* Emit a D-form load or store instruction that is the second instruction
38493 of a fusion sequence. */
38496 emit_fusion_load_store (rtx load_store_reg
, rtx addis_reg
, rtx offset
,
38497 const char *insn_str
)
38500 char insn_template
[80];
38502 fuse_ops
[0] = load_store_reg
;
38503 fuse_ops
[1] = addis_reg
;
38505 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
38507 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
38508 fuse_ops
[2] = offset
;
38509 output_asm_insn (insn_template
, fuse_ops
);
38512 else if (GET_CODE (offset
) == UNSPEC
38513 && XINT (offset
, 1) == UNSPEC_TOCREL
)
38516 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
38518 else if (TARGET_XCOFF
)
38519 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
38522 gcc_unreachable ();
38524 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
38525 output_asm_insn (insn_template
, fuse_ops
);
38528 else if (GET_CODE (offset
) == PLUS
38529 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
38530 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
38531 && CONST_INT_P (XEXP (offset
, 1)))
38533 rtx tocrel_unspec
= XEXP (offset
, 0);
38535 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
38537 else if (TARGET_XCOFF
)
38538 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
38541 gcc_unreachable ();
38543 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
38544 fuse_ops
[3] = XEXP (offset
, 1);
38545 output_asm_insn (insn_template
, fuse_ops
);
38548 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
38550 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
38552 fuse_ops
[2] = offset
;
38553 output_asm_insn (insn_template
, fuse_ops
);
38557 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
38562 /* Wrap a TOC address that can be fused to indicate that special fusion
38563 processing is needed. */
38566 fusion_wrap_memory_address (rtx old_mem
)
38568 rtx old_addr
= XEXP (old_mem
, 0);
38569 rtvec v
= gen_rtvec (1, old_addr
);
38570 rtx new_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_FUSION_ADDIS
);
38571 return replace_equiv_address_nv (old_mem
, new_addr
, false);
38574 /* Given an address, convert it into the addis and load offset parts. Addresses
38575 created during the peephole2 process look like:
38576 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38577 (unspec [(...)] UNSPEC_TOCREL))
38579 Addresses created via toc fusion look like:
38580 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38583 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
38587 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_FUSION_ADDIS
)
38589 lo
= XVECEXP (addr
, 0, 0);
38590 hi
= gen_rtx_HIGH (Pmode
, lo
);
38592 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
38594 hi
= XEXP (addr
, 0);
38595 lo
= XEXP (addr
, 1);
38598 gcc_unreachable ();
38604 /* Return a string to fuse an addis instruction with a gpr load to the same
38605 register that we loaded up the addis instruction. The address that is used
38606 is the logical address that was formed during peephole2:
38607 (lo_sum (high) (low-part))
38609 Or the address is the TOC address that is wrapped before register allocation:
38610 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38612 The code is complicated, so we call output_asm_insn directly, and just
38616 emit_fusion_gpr_load (rtx target
, rtx mem
)
38621 const char *load_str
= NULL
;
38622 const char *mode_name
= NULL
;
38625 if (GET_CODE (mem
) == ZERO_EXTEND
)
38626 mem
= XEXP (mem
, 0);
38628 gcc_assert (REG_P (target
) && MEM_P (mem
));
38630 addr
= XEXP (mem
, 0);
38631 fusion_split_address (addr
, &addis_value
, &load_offset
);
38633 /* Now emit the load instruction to the same register. */
38634 mode
= GET_MODE (mem
);
38638 mode_name
= "char";
38643 mode_name
= "short";
38649 mode_name
= (mode
== SFmode
) ? "float" : "int";
38655 gcc_assert (TARGET_POWERPC64
);
38656 mode_name
= (mode
== DFmode
) ? "double" : "long";
38661 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
38664 /* Emit the addis instruction. */
38665 emit_fusion_addis (target
, addis_value
, "gpr load fusion", mode_name
);
38667 /* Emit the D-form load instruction. */
38668 emit_fusion_load_store (target
, target
, load_offset
, load_str
);
38674 /* Return true if the peephole2 can combine a load/store involving a
38675 combination of an addis instruction and the memory operation. This was
38676 added to the ISA 3.0 (power9) hardware. */
38679 fusion_p9_p (rtx addis_reg
, /* register set via addis. */
38680 rtx addis_value
, /* addis value. */
38681 rtx dest
, /* destination (memory or register). */
38682 rtx src
) /* source (register or memory). */
38684 rtx addr
, mem
, offset
;
38685 machine_mode mode
= GET_MODE (src
);
38687 /* Validate arguments. */
38688 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
38691 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
38694 /* Ignore extend operations that are part of the load. */
38695 if (GET_CODE (src
) == FLOAT_EXTEND
|| GET_CODE (src
) == ZERO_EXTEND
)
38696 src
= XEXP (src
, 0);
38698 /* Test for memory<-register or register<-memory. */
38699 if (fpr_reg_operand (src
, mode
) || int_reg_operand (src
, mode
))
38707 else if (MEM_P (src
))
38709 if (!fpr_reg_operand (dest
, mode
) && !int_reg_operand (dest
, mode
))
38718 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
38719 if (GET_CODE (addr
) == PLUS
)
38721 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
38724 return satisfies_constraint_I (XEXP (addr
, 1));
38727 else if (GET_CODE (addr
) == LO_SUM
)
38729 if (!rtx_equal_p (addis_reg
, XEXP (addr
, 0)))
38732 offset
= XEXP (addr
, 1);
38733 if (TARGET_XCOFF
|| (TARGET_ELF
&& TARGET_POWERPC64
))
38734 return small_toc_ref (offset
, GET_MODE (offset
));
38736 else if (TARGET_ELF
&& !TARGET_POWERPC64
)
38737 return CONSTANT_P (offset
);
38743 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38747 operands[0] register set with addis
38748 operands[1] value set via addis
38749 operands[2] target register being loaded
38750 operands[3] D-form memory reference using operands[0].
38752 This is similar to the fusion introduced with power8, except it scales to
38753 both loads/stores and does not require the result register to be the same as
38754 the base register. At the moment, we only do this if register set with addis
38758 expand_fusion_p9_load (rtx
*operands
)
38760 rtx tmp_reg
= operands
[0];
38761 rtx addis_value
= operands
[1];
38762 rtx target
= operands
[2];
38763 rtx orig_mem
= operands
[3];
38764 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
;
38765 enum rtx_code plus_or_lo_sum
;
38766 machine_mode target_mode
= GET_MODE (target
);
38767 machine_mode extend_mode
= target_mode
;
38768 machine_mode ptr_mode
= Pmode
;
38769 enum rtx_code extend
= UNKNOWN
;
38771 if (GET_CODE (orig_mem
) == FLOAT_EXTEND
|| GET_CODE (orig_mem
) == ZERO_EXTEND
)
38773 extend
= GET_CODE (orig_mem
);
38774 orig_mem
= XEXP (orig_mem
, 0);
38775 target_mode
= GET_MODE (orig_mem
);
38778 gcc_assert (MEM_P (orig_mem
));
38780 orig_addr
= XEXP (orig_mem
, 0);
38781 plus_or_lo_sum
= GET_CODE (orig_addr
);
38782 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
38784 offset
= XEXP (orig_addr
, 1);
38785 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
38786 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
38788 if (extend
!= UNKNOWN
)
38789 new_mem
= gen_rtx_fmt_e (extend
, extend_mode
, new_mem
);
38791 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
38794 set
= gen_rtx_SET (target
, new_mem
);
38795 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
38796 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
38802 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38806 operands[0] register set with addis
38807 operands[1] value set via addis
38808 operands[2] target D-form memory being stored to
38809 operands[3] register being stored
38811 This is similar to the fusion introduced with power8, except it scales to
38812 both loads/stores and does not require the result register to be the same as
38813 the base register. At the moment, we only do this if register set with addis
38817 expand_fusion_p9_store (rtx
*operands
)
38819 rtx tmp_reg
= operands
[0];
38820 rtx addis_value
= operands
[1];
38821 rtx orig_mem
= operands
[2];
38822 rtx src
= operands
[3];
38823 rtx new_addr
, new_mem
, orig_addr
, offset
, set
, clobber
, insn
, new_src
;
38824 enum rtx_code plus_or_lo_sum
;
38825 machine_mode target_mode
= GET_MODE (orig_mem
);
38826 machine_mode ptr_mode
= Pmode
;
38828 gcc_assert (MEM_P (orig_mem
));
38830 orig_addr
= XEXP (orig_mem
, 0);
38831 plus_or_lo_sum
= GET_CODE (orig_addr
);
38832 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
38834 offset
= XEXP (orig_addr
, 1);
38835 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
38836 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
38838 new_src
= gen_rtx_UNSPEC (target_mode
, gen_rtvec (1, src
),
38841 set
= gen_rtx_SET (new_mem
, new_src
);
38842 clobber
= gen_rtx_CLOBBER (VOIDmode
, tmp_reg
);
38843 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
));
38849 /* Return a string to fuse an addis instruction with a load using extended
38850 fusion. The address that is used is the logical address that was formed
38851 during peephole2: (lo_sum (high) (low-part))
38853 The code is complicated, so we call output_asm_insn directly, and just
38857 emit_fusion_p9_load (rtx reg
, rtx mem
, rtx tmp_reg
)
38859 machine_mode mode
= GET_MODE (reg
);
38863 const char *load_string
;
38866 if (GET_CODE (mem
) == FLOAT_EXTEND
|| GET_CODE (mem
) == ZERO_EXTEND
)
38868 mem
= XEXP (mem
, 0);
38869 mode
= GET_MODE (mem
);
38872 if (GET_CODE (reg
) == SUBREG
)
38874 gcc_assert (SUBREG_BYTE (reg
) == 0);
38875 reg
= SUBREG_REG (reg
);
38879 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg
);
38882 if (FP_REGNO_P (r
))
38884 if (mode
== SFmode
)
38885 load_string
= "lfs";
38886 else if (mode
== DFmode
|| mode
== DImode
)
38887 load_string
= "lfd";
38889 gcc_unreachable ();
38891 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
38893 if (mode
== SFmode
)
38894 load_string
= "lxssp";
38895 else if (mode
== DFmode
|| mode
== DImode
)
38896 load_string
= "lxsd";
38898 gcc_unreachable ();
38900 else if (INT_REGNO_P (r
))
38905 load_string
= "lbz";
38908 load_string
= "lhz";
38912 load_string
= "lwz";
38916 if (!TARGET_POWERPC64
)
38917 gcc_unreachable ();
38918 load_string
= "ld";
38921 gcc_unreachable ();
38925 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg
);
38928 fatal_insn ("emit_fusion_p9_load not MEM", mem
);
38930 addr
= XEXP (mem
, 0);
38931 fusion_split_address (addr
, &hi
, &lo
);
38933 /* Emit the addis instruction. */
38934 emit_fusion_addis (tmp_reg
, hi
, "power9 load fusion", GET_MODE_NAME (mode
));
38936 /* Emit the D-form load instruction. */
38937 emit_fusion_load_store (reg
, tmp_reg
, lo
, load_string
);
38942 /* Return a string to fuse an addis instruction with a store using extended
38943 fusion. The address that is used is the logical address that was formed
38944 during peephole2: (lo_sum (high) (low-part))
38946 The code is complicated, so we call output_asm_insn directly, and just
38950 emit_fusion_p9_store (rtx mem
, rtx reg
, rtx tmp_reg
)
38952 machine_mode mode
= GET_MODE (reg
);
38956 const char *store_string
;
38959 if (GET_CODE (reg
) == SUBREG
)
38961 gcc_assert (SUBREG_BYTE (reg
) == 0);
38962 reg
= SUBREG_REG (reg
);
38966 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg
);
38969 if (FP_REGNO_P (r
))
38971 if (mode
== SFmode
)
38972 store_string
= "stfs";
38973 else if (mode
== DFmode
)
38974 store_string
= "stfd";
38976 gcc_unreachable ();
38978 else if (ALTIVEC_REGNO_P (r
) && TARGET_P9_DFORM_SCALAR
)
38980 if (mode
== SFmode
)
38981 store_string
= "stxssp";
38982 else if (mode
== DFmode
|| mode
== DImode
)
38983 store_string
= "stxsd";
38985 gcc_unreachable ();
38987 else if (INT_REGNO_P (r
))
38992 store_string
= "stb";
38995 store_string
= "sth";
38999 store_string
= "stw";
39003 if (!TARGET_POWERPC64
)
39004 gcc_unreachable ();
39005 store_string
= "std";
39008 gcc_unreachable ();
39012 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg
);
39015 fatal_insn ("emit_fusion_p9_store not MEM", mem
);
39017 addr
= XEXP (mem
, 0);
39018 fusion_split_address (addr
, &hi
, &lo
);
39020 /* Emit the addis instruction. */
39021 emit_fusion_addis (tmp_reg
, hi
, "power9 store fusion", GET_MODE_NAME (mode
));
39023 /* Emit the D-form load instruction. */
39024 emit_fusion_load_store (reg
, tmp_reg
, lo
, store_string
);
39030 /* Analyze vector computations and remove unnecessary doubleword
39031 swaps (xxswapdi instructions). This pass is performed only
39032 for little-endian VSX code generation.
39034 For this specific case, loads and stores of 4x32 and 2x64 vectors
39035 are inefficient. These are implemented using the lvx2dx and
39036 stvx2dx instructions, which invert the order of doublewords in
39037 a vector register. Thus the code generation inserts an xxswapdi
39038 after each such load, and prior to each such store. (For spill
39039 code after register assignment, an additional xxswapdi is inserted
39040 following each store in order to return a hard register to its
39043 The extra xxswapdi instructions reduce performance. This can be
39044 particularly bad for vectorized code. The purpose of this pass
39045 is to reduce the number of xxswapdi instructions required for
39048 The primary insight is that much code that operates on vectors
39049 does not care about the relative order of elements in a register,
39050 so long as the correct memory order is preserved. If we have
39051 a computation where all input values are provided by lvxd2x/xxswapdi
39052 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39053 and all intermediate computations are pure SIMD (independent of
39054 element order), then all the xxswapdi's associated with the loads
39055 and stores may be removed.
39057 This pass uses some of the infrastructure and logical ideas from
39058 the "web" pass in web.c. We create maximal webs of computations
39059 fitting the description above using union-find. Each such web is
39060 then optimized by removing its unnecessary xxswapdi instructions.
39062 The pass is placed prior to global optimization so that we can
39063 perform the optimization in the safest and simplest way possible;
39064 that is, by replacing each xxswapdi insn with a register copy insn.
39065 Subsequent forward propagation will remove copies where possible.
39067 There are some operations sensitive to element order for which we
39068 can still allow the operation, provided we modify those operations.
39069 These include CONST_VECTORs, for which we must swap the first and
39070 second halves of the constant vector; and SUBREGs, for which we
39071 must adjust the byte offset to account for the swapped doublewords.
39072 A remaining opportunity would be non-immediate-form splats, for
39073 which we should adjust the selected lane of the input. We should
39074 also make code generation adjustments for sum-across operations,
39075 since this is a common vectorizer reduction.
39077 Because we run prior to the first split, we can see loads and stores
39078 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39079 vector loads and stores that have not yet been split into a permuting
39080 load/store and a swap. (One way this can happen is with a builtin
39081 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39082 than deleting a swap, we convert the load/store into a permuting
39083 load/store (which effectively removes the swap). */
39085 /* Notes on Permutes
39087 We do not currently handle computations that contain permutes. There
39088 is a general transformation that can be performed correctly, but it
39089 may introduce more expensive code than it replaces. To handle these
39090 would require a cost model to determine when to perform the optimization.
39091 This commentary records how this could be done if desired.
39093 The most general permute is something like this (example for V16QI):
39095 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39096 (parallel [(const_int a0) (const_int a1)
39098 (const_int a14) (const_int a15)]))
39100 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39101 to produce in the result.
39103 Regardless of mode, we can convert the PARALLEL to a mask of 16
39104 byte-element selectors. Let's call this M, with M[i] representing
39105 the ith byte-element selector value. Then if we swap doublewords
39106 throughout the computation, we can get correct behavior by replacing
39107 M with M' as follows:
39109 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39110 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39112 This seems promising at first, since we are just replacing one mask
39113 with another. But certain masks are preferable to others. If M
39114 is a mask that matches a vmrghh pattern, for example, M' certainly
39115 will not. Instead of a single vmrghh, we would generate a load of
39116 M' and a vperm. So we would need to know how many xxswapd's we can
39117 remove as a result of this transformation to determine if it's
39118 profitable; and preferably the logic would need to be aware of all
39119 the special preferable masks.
39121 Another form of permute is an UNSPEC_VPERM, in which the mask is
39122 already in a register. In some cases, this mask may be a constant
39123 that we can discover with ud-chains, in which case the above
39124 transformation is ok. However, the common usage here is for the
39125 mask to be produced by an UNSPEC_LVSL, in which case the mask
39126 cannot be known at compile time. In such a case we would have to
39127 generate several instructions to compute M' as above at run time,
39128 and a cost model is needed again.
39130 However, when the mask M for an UNSPEC_VPERM is loaded from the
39131 constant pool, we can replace M with M' as above at no cost
39132 beyond adding a constant pool entry. */
39134 /* This is based on the union-find logic in web.c. web_entry_base is
39135 defined in df.h. */
39136 class swap_web_entry
: public web_entry_base
39139 /* Pointer to the insn. */
39141 /* Set if insn contains a mention of a vector register. All other
39142 fields are undefined if this field is unset. */
39143 unsigned int is_relevant
: 1;
39144 /* Set if insn is a load. */
39145 unsigned int is_load
: 1;
39146 /* Set if insn is a store. */
39147 unsigned int is_store
: 1;
39148 /* Set if insn is a doubleword swap. This can either be a register swap
39149 or a permuting load or store (test is_load and is_store for this). */
39150 unsigned int is_swap
: 1;
39151 /* Set if the insn has a live-in use of a parameter register. */
39152 unsigned int is_live_in
: 1;
39153 /* Set if the insn has a live-out def of a return register. */
39154 unsigned int is_live_out
: 1;
39155 /* Set if the insn contains a subreg reference of a vector register. */
39156 unsigned int contains_subreg
: 1;
39157 /* Set if the insn contains a 128-bit integer operand. */
39158 unsigned int is_128_int
: 1;
39159 /* Set if this is a call-insn. */
39160 unsigned int is_call
: 1;
39161 /* Set if this insn does not perform a vector operation for which
39162 element order matters, or if we know how to fix it up if it does.
39163 Undefined if is_swap is set. */
39164 unsigned int is_swappable
: 1;
39165 /* A nonzero value indicates what kind of special handling for this
39166 insn is required if doublewords are swapped. Undefined if
39167 is_swappable is not set. */
39168 unsigned int special_handling
: 4;
39169 /* Set if the web represented by this entry cannot be optimized. */
39170 unsigned int web_not_optimizable
: 1;
39171 /* Set if this insn should be deleted. */
39172 unsigned int will_delete
: 1;
39175 enum special_handling_values
{
39188 /* Union INSN with all insns containing definitions that reach USE.
39189 Detect whether USE is live-in to the current function. */
39191 union_defs (swap_web_entry
*insn_entry
, rtx insn
, df_ref use
)
39193 struct df_link
*link
= DF_REF_CHAIN (use
);
39196 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
39200 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
39201 insn_entry
[INSN_UID (insn
)].is_live_in
= 1;
39203 if (DF_REF_INSN_INFO (link
->ref
))
39205 rtx def_insn
= DF_REF_INSN (link
->ref
);
39206 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
39207 insn_entry
+ INSN_UID (def_insn
));
39214 /* Union INSN with all insns containing uses reached from DEF.
39215 Detect whether DEF is live-out from the current function. */
39217 union_uses (swap_web_entry
*insn_entry
, rtx insn
, df_ref def
)
39219 struct df_link
*link
= DF_REF_CHAIN (def
);
39222 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
39226 /* This could be an eh use or some other artificial use;
39227 we treat these all the same (killing the optimization). */
39228 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
39229 insn_entry
[INSN_UID (insn
)].is_live_out
= 1;
39231 if (DF_REF_INSN_INFO (link
->ref
))
39233 rtx use_insn
= DF_REF_INSN (link
->ref
);
39234 (void)unionfind_union (insn_entry
+ INSN_UID (insn
),
39235 insn_entry
+ INSN_UID (use_insn
));
39242 /* Return 1 iff INSN is a load insn, including permuting loads that
39243 represent an lvxd2x instruction; else return 0. */
39244 static unsigned int
39245 insn_is_load_p (rtx insn
)
39247 rtx body
= PATTERN (insn
);
39249 if (GET_CODE (body
) == SET
)
39251 if (GET_CODE (SET_SRC (body
)) == MEM
)
39254 if (GET_CODE (SET_SRC (body
)) == VEC_SELECT
39255 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
)
39261 if (GET_CODE (body
) != PARALLEL
)
39264 rtx set
= XVECEXP (body
, 0, 0);
39266 if (GET_CODE (set
) == SET
&& GET_CODE (SET_SRC (set
)) == MEM
)
39272 /* Return 1 iff INSN is a store insn, including permuting stores that
39273 represent an stvxd2x instruction; else return 0. */
39274 static unsigned int
39275 insn_is_store_p (rtx insn
)
39277 rtx body
= PATTERN (insn
);
39278 if (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == MEM
)
39280 if (GET_CODE (body
) != PARALLEL
)
39282 rtx set
= XVECEXP (body
, 0, 0);
39283 if (GET_CODE (set
) == SET
&& GET_CODE (SET_DEST (set
)) == MEM
)
39288 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39289 a permuting load, or a permuting store. */
39290 static unsigned int
39291 insn_is_swap_p (rtx insn
)
39293 rtx body
= PATTERN (insn
);
39294 if (GET_CODE (body
) != SET
)
39296 rtx rhs
= SET_SRC (body
);
39297 if (GET_CODE (rhs
) != VEC_SELECT
)
39299 rtx parallel
= XEXP (rhs
, 1);
39300 if (GET_CODE (parallel
) != PARALLEL
)
39302 unsigned int len
= XVECLEN (parallel
, 0);
39303 if (len
!= 2 && len
!= 4 && len
!= 8 && len
!= 16)
39305 for (unsigned int i
= 0; i
< len
/ 2; ++i
)
39307 rtx op
= XVECEXP (parallel
, 0, i
);
39308 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != len
/ 2 + i
)
39311 for (unsigned int i
= len
/ 2; i
< len
; ++i
)
39313 rtx op
= XVECEXP (parallel
, 0, i
);
39314 if (GET_CODE (op
) != CONST_INT
|| INTVAL (op
) != i
- len
/ 2)
39320 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
39322 const_load_sequence_p (swap_web_entry
*insn_entry
, rtx insn
)
39324 unsigned uid
= INSN_UID (insn
);
39325 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
)
39328 const_rtx tocrel_base
;
39330 /* Find the unique use in the swap and locate its def. If the def
39331 isn't unique, punt. */
39332 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
39334 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
39336 struct df_link
*def_link
= DF_REF_CHAIN (use
);
39337 if (!def_link
|| def_link
->next
)
39340 rtx def_insn
= DF_REF_INSN (def_link
->ref
);
39341 unsigned uid2
= INSN_UID (def_insn
);
39342 if (!insn_entry
[uid2
].is_load
|| !insn_entry
[uid2
].is_swap
)
39345 rtx body
= PATTERN (def_insn
);
39346 if (GET_CODE (body
) != SET
39347 || GET_CODE (SET_SRC (body
)) != VEC_SELECT
39348 || GET_CODE (XEXP (SET_SRC (body
), 0)) != MEM
)
39351 rtx mem
= XEXP (SET_SRC (body
), 0);
39352 rtx base_reg
= XEXP (mem
, 0);
39355 insn_info
= DF_INSN_INFO_GET (def_insn
);
39356 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
39358 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
39361 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
39362 if (!base_def_link
|| base_def_link
->next
)
39365 rtx tocrel_insn
= DF_REF_INSN (base_def_link
->ref
);
39366 rtx tocrel_body
= PATTERN (tocrel_insn
);
39368 if (GET_CODE (tocrel_body
) != SET
)
39370 /* There is an extra level of indirection for small/large
39372 rtx tocrel_expr
= SET_SRC (tocrel_body
);
39373 if (GET_CODE (tocrel_expr
) == MEM
)
39374 tocrel_expr
= XEXP (tocrel_expr
, 0);
39375 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
39377 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
39378 if (GET_CODE (base
) != SYMBOL_REF
|| !CONSTANT_POOL_ADDRESS_P (base
))
39385 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
39386 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
39388 v2df_reduction_p (rtx op
)
39390 if (GET_MODE (op
) != V2DFmode
)
39393 enum rtx_code code
= GET_CODE (op
);
39394 if (code
!= PLUS
&& code
!= SMIN
&& code
!= SMAX
)
39397 rtx concat
= XEXP (op
, 0);
39398 if (GET_CODE (concat
) != VEC_CONCAT
)
39401 rtx select0
= XEXP (concat
, 0);
39402 rtx select1
= XEXP (concat
, 1);
39403 if (GET_CODE (select0
) != VEC_SELECT
|| GET_CODE (select1
) != VEC_SELECT
)
39406 rtx reg0
= XEXP (select0
, 0);
39407 rtx reg1
= XEXP (select1
, 0);
39408 if (!rtx_equal_p (reg0
, reg1
) || !REG_P (reg0
))
39411 rtx parallel0
= XEXP (select0
, 1);
39412 rtx parallel1
= XEXP (select1
, 1);
39413 if (GET_CODE (parallel0
) != PARALLEL
|| GET_CODE (parallel1
) != PARALLEL
)
39416 if (!rtx_equal_p (XVECEXP (parallel0
, 0, 0), const1_rtx
)
39417 || !rtx_equal_p (XVECEXP (parallel1
, 0, 0), const0_rtx
))
39423 /* Return 1 iff OP is an operand that will not be affected by having
39424 vector doublewords swapped in memory. */
39425 static unsigned int
39426 rtx_is_swappable_p (rtx op
, unsigned int *special
)
39428 enum rtx_code code
= GET_CODE (op
);
39447 *special
= SH_CONST_VECTOR
;
39451 case VEC_DUPLICATE
:
39452 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
39453 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
39454 it represents a vector splat for which we can do special
39456 if (GET_CODE (XEXP (op
, 0)) == CONST_INT
)
39458 else if (REG_P (XEXP (op
, 0))
39459 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
39460 /* This catches V2DF and V2DI splat, at a minimum. */
39462 else if (GET_CODE (XEXP (op
, 0)) == TRUNCATE
39463 && REG_P (XEXP (XEXP (op
, 0), 0))
39464 && GET_MODE_INNER (GET_MODE (op
)) == GET_MODE (XEXP (op
, 0)))
39465 /* This catches splat of a truncated value. */
39467 else if (GET_CODE (XEXP (op
, 0)) == VEC_SELECT
)
39468 /* If the duplicated item is from a select, defer to the select
39469 processing to see if we can change the lane for the splat. */
39470 return rtx_is_swappable_p (XEXP (op
, 0), special
);
39475 /* A vec_extract operation is ok if we change the lane. */
39476 if (GET_CODE (XEXP (op
, 0)) == REG
39477 && GET_MODE_INNER (GET_MODE (XEXP (op
, 0))) == GET_MODE (op
)
39478 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
39479 && XVECLEN (parallel
, 0) == 1
39480 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
)
39482 *special
= SH_EXTRACT
;
39485 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
39486 XXPERMDI is a swap operation, it will be identified by
39487 insn_is_swap_p and therefore we won't get here. */
39488 else if (GET_CODE (XEXP (op
, 0)) == VEC_CONCAT
39489 && (GET_MODE (XEXP (op
, 0)) == V4DFmode
39490 || GET_MODE (XEXP (op
, 0)) == V4DImode
)
39491 && GET_CODE ((parallel
= XEXP (op
, 1))) == PARALLEL
39492 && XVECLEN (parallel
, 0) == 2
39493 && GET_CODE (XVECEXP (parallel
, 0, 0)) == CONST_INT
39494 && GET_CODE (XVECEXP (parallel
, 0, 1)) == CONST_INT
)
39496 *special
= SH_XXPERMDI
;
39499 else if (v2df_reduction_p (op
))
39506 /* Various operations are unsafe for this optimization, at least
39507 without significant additional work. Permutes are obviously
39508 problematic, as both the permute control vector and the ordering
39509 of the target values are invalidated by doubleword swapping.
39510 Vector pack and unpack modify the number of vector lanes.
39511 Merge-high/low will not operate correctly on swapped operands.
39512 Vector shifts across element boundaries are clearly uncool,
39513 as are vector select and concatenate operations. Vector
39514 sum-across instructions define one operand with a specific
39515 order-dependent element, so additional fixup code would be
39516 needed to make those work. Vector set and non-immediate-form
39517 vector splat are element-order sensitive. A few of these
39518 cases might be workable with special handling if required.
39519 Adding cost modeling would be appropriate in some cases. */
39520 int val
= XINT (op
, 1);
39525 case UNSPEC_VMRGH_DIRECT
:
39526 case UNSPEC_VMRGL_DIRECT
:
39527 case UNSPEC_VPACK_SIGN_SIGN_SAT
:
39528 case UNSPEC_VPACK_SIGN_UNS_SAT
:
39529 case UNSPEC_VPACK_UNS_UNS_MOD
:
39530 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
:
39531 case UNSPEC_VPACK_UNS_UNS_SAT
:
39533 case UNSPEC_VPERM_UNS
:
39534 case UNSPEC_VPERMHI
:
39535 case UNSPEC_VPERMSI
:
39537 case UNSPEC_VSLDOI
:
39540 case UNSPEC_VSUM2SWS
:
39541 case UNSPEC_VSUM4S
:
39542 case UNSPEC_VSUM4UBS
:
39543 case UNSPEC_VSUMSWS
:
39544 case UNSPEC_VSUMSWS_DIRECT
:
39545 case UNSPEC_VSX_CONCAT
:
39546 case UNSPEC_VSX_SET
:
39547 case UNSPEC_VSX_SLDWI
:
39548 case UNSPEC_VUNPACK_HI_SIGN
:
39549 case UNSPEC_VUNPACK_HI_SIGN_DIRECT
:
39550 case UNSPEC_VUNPACK_LO_SIGN
:
39551 case UNSPEC_VUNPACK_LO_SIGN_DIRECT
:
39552 case UNSPEC_VUPKHPX
:
39553 case UNSPEC_VUPKHS_V4SF
:
39554 case UNSPEC_VUPKHU_V4SF
:
39555 case UNSPEC_VUPKLPX
:
39556 case UNSPEC_VUPKLS_V4SF
:
39557 case UNSPEC_VUPKLU_V4SF
:
39558 case UNSPEC_VSX_CVDPSPN
:
39559 case UNSPEC_VSX_CVSPDP
:
39560 case UNSPEC_VSX_CVSPDPN
:
39561 case UNSPEC_VSX_EXTRACT
:
39562 case UNSPEC_VSX_VSLO
:
39563 case UNSPEC_VSX_VEC_INIT
:
39565 case UNSPEC_VSPLT_DIRECT
:
39566 case UNSPEC_VSX_XXSPLTD
:
39567 *special
= SH_SPLAT
;
39569 case UNSPEC_REDUC_PLUS
:
39579 const char *fmt
= GET_RTX_FORMAT (code
);
39582 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
39583 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
39585 unsigned int special_op
= SH_NONE
;
39586 ok
&= rtx_is_swappable_p (XEXP (op
, i
), &special_op
);
39587 if (special_op
== SH_NONE
)
39589 /* Ensure we never have two kinds of special handling
39590 for the same insn. */
39591 if (*special
!= SH_NONE
&& *special
!= special_op
)
39593 *special
= special_op
;
39595 else if (fmt
[i
] == 'E')
39596 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
39598 unsigned int special_op
= SH_NONE
;
39599 ok
&= rtx_is_swappable_p (XVECEXP (op
, i
, j
), &special_op
);
39600 if (special_op
== SH_NONE
)
39602 /* Ensure we never have two kinds of special handling
39603 for the same insn. */
39604 if (*special
!= SH_NONE
&& *special
!= special_op
)
39606 *special
= special_op
;
39612 /* Return 1 iff INSN is an operand that will not be affected by
39613 having vector doublewords swapped in memory (in which case
39614 *SPECIAL is unchanged), or that can be modified to be correct
39615 if vector doublewords are swapped in memory (in which case
39616 *SPECIAL is changed to a value indicating how). */
39617 static unsigned int
39618 insn_is_swappable_p (swap_web_entry
*insn_entry
, rtx insn
,
39619 unsigned int *special
)
39621 /* Calls are always bad. */
39622 if (GET_CODE (insn
) == CALL_INSN
)
39625 /* Loads and stores seen here are not permuting, but we can still
39626 fix them up by converting them to permuting ones. Exceptions:
39627 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
39628 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
39629 for the SET source. Also we must now make an exception for lvx
39630 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
39631 explicit "& -16") since this leads to unrecognizable insns. */
39632 rtx body
= PATTERN (insn
);
39633 int i
= INSN_UID (insn
);
39635 if (insn_entry
[i
].is_load
)
39637 if (GET_CODE (body
) == SET
)
39639 rtx rhs
= SET_SRC (body
);
39640 /* Even without a swap, the RHS might be a vec_select for, say,
39641 a byte-reversing load. */
39642 if (GET_CODE (rhs
) != MEM
)
39644 if (GET_CODE (XEXP (rhs
, 0)) == AND
)
39647 *special
= SH_NOSWAP_LD
;
39654 if (insn_entry
[i
].is_store
)
39656 if (GET_CODE (body
) == SET
39657 && GET_CODE (SET_SRC (body
)) != UNSPEC
)
39659 rtx lhs
= SET_DEST (body
);
39660 /* Even without a swap, the LHS might be a vec_select for, say,
39661 a byte-reversing store. */
39662 if (GET_CODE (lhs
) != MEM
)
39664 if (GET_CODE (XEXP (lhs
, 0)) == AND
)
39667 *special
= SH_NOSWAP_ST
;
39674 /* A convert to single precision can be left as is provided that
39675 all of its uses are in xxspltw instructions that splat BE element
39677 if (GET_CODE (body
) == SET
39678 && GET_CODE (SET_SRC (body
)) == UNSPEC
39679 && XINT (SET_SRC (body
), 1) == UNSPEC_VSX_CVDPSPN
)
39682 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
39684 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
39686 struct df_link
*link
= DF_REF_CHAIN (def
);
39690 for (; link
; link
= link
->next
) {
39691 rtx use_insn
= DF_REF_INSN (link
->ref
);
39692 rtx use_body
= PATTERN (use_insn
);
39693 if (GET_CODE (use_body
) != SET
39694 || GET_CODE (SET_SRC (use_body
)) != UNSPEC
39695 || XINT (SET_SRC (use_body
), 1) != UNSPEC_VSX_XXSPLTW
39696 || XVECEXP (SET_SRC (use_body
), 0, 1) != const0_rtx
)
39704 /* A concatenation of two doublewords is ok if we reverse the
39705 order of the inputs. */
39706 if (GET_CODE (body
) == SET
39707 && GET_CODE (SET_SRC (body
)) == VEC_CONCAT
39708 && (GET_MODE (SET_SRC (body
)) == V2DFmode
39709 || GET_MODE (SET_SRC (body
)) == V2DImode
))
39711 *special
= SH_CONCAT
;
39715 /* V2DF reductions are always swappable. */
39716 if (GET_CODE (body
) == PARALLEL
)
39718 rtx expr
= XVECEXP (body
, 0, 0);
39719 if (GET_CODE (expr
) == SET
39720 && v2df_reduction_p (SET_SRC (expr
)))
39724 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
39726 if (GET_CODE (body
) == SET
39727 && GET_CODE (SET_SRC (body
)) == UNSPEC
39728 && XINT (SET_SRC (body
), 1) == UNSPEC_VPERM
39729 && XVECLEN (SET_SRC (body
), 0) == 3
39730 && GET_CODE (XVECEXP (SET_SRC (body
), 0, 2)) == REG
)
39732 rtx mask_reg
= XVECEXP (SET_SRC (body
), 0, 2);
39733 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
39735 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
39736 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
39738 struct df_link
*def_link
= DF_REF_CHAIN (use
);
39739 /* Punt if multiple definitions for this reg. */
39740 if (def_link
&& !def_link
->next
&&
39741 const_load_sequence_p (insn_entry
,
39742 DF_REF_INSN (def_link
->ref
)))
39744 *special
= SH_VPERM
;
39750 /* Otherwise check the operands for vector lane violations. */
39751 return rtx_is_swappable_p (body
, special
);
39754 enum chain_purpose
{ FOR_LOADS
, FOR_STORES
};
39756 /* Return true if the UD or DU chain headed by LINK is non-empty,
39757 and every entry on the chain references an insn that is a
39758 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
39759 register swap must have only permuting loads as reaching defs.
39760 If PURPOSE is FOR_STORES, each such register swap must have only
39761 register swaps or permuting stores as reached uses. */
39763 chain_contains_only_swaps (swap_web_entry
*insn_entry
, struct df_link
*link
,
39764 enum chain_purpose purpose
)
39769 for (; link
; link
= link
->next
)
39771 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link
->ref
))))
39774 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
39777 rtx reached_insn
= DF_REF_INSN (link
->ref
);
39778 unsigned uid
= INSN_UID (reached_insn
);
39779 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (reached_insn
);
39781 if (!insn_entry
[uid
].is_swap
|| insn_entry
[uid
].is_load
39782 || insn_entry
[uid
].is_store
)
39785 if (purpose
== FOR_LOADS
)
39788 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
39790 struct df_link
*swap_link
= DF_REF_CHAIN (use
);
39794 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
39797 rtx swap_def_insn
= DF_REF_INSN (swap_link
->ref
);
39798 unsigned uid2
= INSN_UID (swap_def_insn
);
39800 /* Only permuting loads are allowed. */
39801 if (!insn_entry
[uid2
].is_swap
|| !insn_entry
[uid2
].is_load
)
39804 swap_link
= swap_link
->next
;
39808 else if (purpose
== FOR_STORES
)
39811 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
39813 struct df_link
*swap_link
= DF_REF_CHAIN (def
);
39817 if (DF_REF_IS_ARTIFICIAL (link
->ref
))
39820 rtx swap_use_insn
= DF_REF_INSN (swap_link
->ref
);
39821 unsigned uid2
= INSN_UID (swap_use_insn
);
39823 /* Permuting stores or register swaps are allowed. */
39824 if (!insn_entry
[uid2
].is_swap
|| insn_entry
[uid2
].is_load
)
39827 swap_link
= swap_link
->next
;
39836 /* Mark the xxswapdi instructions associated with permuting loads and
39837 stores for removal. Note that we only flag them for deletion here,
39838 as there is a possibility of a swap being reached from multiple
39841 mark_swaps_for_removal (swap_web_entry
*insn_entry
, unsigned int i
)
39843 rtx insn
= insn_entry
[i
].insn
;
39844 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
39846 if (insn_entry
[i
].is_load
)
39849 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
39851 struct df_link
*link
= DF_REF_CHAIN (def
);
39853 /* We know by now that these are swaps, so we can delete
39854 them confidently. */
39857 rtx use_insn
= DF_REF_INSN (link
->ref
);
39858 insn_entry
[INSN_UID (use_insn
)].will_delete
= 1;
39863 else if (insn_entry
[i
].is_store
)
39866 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
39868 /* Ignore uses for addressability. */
39869 machine_mode mode
= GET_MODE (DF_REF_REG (use
));
39870 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
39873 struct df_link
*link
= DF_REF_CHAIN (use
);
39875 /* We know by now that these are swaps, so we can delete
39876 them confidently. */
39879 rtx def_insn
= DF_REF_INSN (link
->ref
);
39880 insn_entry
[INSN_UID (def_insn
)].will_delete
= 1;
39887 /* OP is either a CONST_VECTOR or an expression containing one.
39888 Swap the first half of the vector with the second in the first
39889 case. Recurse to find it in the second. */
39891 swap_const_vector_halves (rtx op
)
39894 enum rtx_code code
= GET_CODE (op
);
39895 if (GET_CODE (op
) == CONST_VECTOR
)
39897 int half_units
= GET_MODE_NUNITS (GET_MODE (op
)) / 2;
39898 for (i
= 0; i
< half_units
; ++i
)
39900 rtx temp
= CONST_VECTOR_ELT (op
, i
);
39901 CONST_VECTOR_ELT (op
, i
) = CONST_VECTOR_ELT (op
, i
+ half_units
);
39902 CONST_VECTOR_ELT (op
, i
+ half_units
) = temp
;
39908 const char *fmt
= GET_RTX_FORMAT (code
);
39909 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
39910 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
39911 swap_const_vector_halves (XEXP (op
, i
));
39912 else if (fmt
[i
] == 'E')
39913 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
39914 swap_const_vector_halves (XVECEXP (op
, i
, j
));
39918 /* Find all subregs of a vector expression that perform a narrowing,
39919 and adjust the subreg index to account for doubleword swapping. */
39921 adjust_subreg_index (rtx op
)
39923 enum rtx_code code
= GET_CODE (op
);
39925 && (GET_MODE_SIZE (GET_MODE (op
))
39926 < GET_MODE_SIZE (GET_MODE (XEXP (op
, 0)))))
39928 unsigned int index
= SUBREG_BYTE (op
);
39933 SUBREG_BYTE (op
) = index
;
39936 const char *fmt
= GET_RTX_FORMAT (code
);
39938 for (i
= 0; i
< GET_RTX_LENGTH (code
); ++i
)
39939 if (fmt
[i
] == 'e' || fmt
[i
] == 'u')
39940 adjust_subreg_index (XEXP (op
, i
));
39941 else if (fmt
[i
] == 'E')
39942 for (j
= 0; j
< XVECLEN (op
, i
); ++j
)
39943 adjust_subreg_index (XVECEXP (op
, i
, j
));
39946 /* Convert the non-permuting load INSN to a permuting one. */
39948 permute_load (rtx_insn
*insn
)
39950 rtx body
= PATTERN (insn
);
39951 rtx mem_op
= SET_SRC (body
);
39952 rtx tgt_reg
= SET_DEST (body
);
39953 machine_mode mode
= GET_MODE (tgt_reg
);
39954 int n_elts
= GET_MODE_NUNITS (mode
);
39955 int half_elts
= n_elts
/ 2;
39956 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
39958 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
39959 XVECEXP (par
, 0, i
) = GEN_INT (j
);
39960 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
39961 XVECEXP (par
, 0, i
) = GEN_INT (j
);
39962 rtx sel
= gen_rtx_VEC_SELECT (mode
, mem_op
, par
);
39963 SET_SRC (body
) = sel
;
39964 INSN_CODE (insn
) = -1; /* Force re-recognition. */
39965 df_insn_rescan (insn
);
39968 fprintf (dump_file
, "Replacing load %d with permuted load\n",
39972 /* Convert the non-permuting store INSN to a permuting one. */
39974 permute_store (rtx_insn
*insn
)
39976 rtx body
= PATTERN (insn
);
39977 rtx src_reg
= SET_SRC (body
);
39978 machine_mode mode
= GET_MODE (src_reg
);
39979 int n_elts
= GET_MODE_NUNITS (mode
);
39980 int half_elts
= n_elts
/ 2;
39981 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
39983 for (i
= 0, j
= half_elts
; i
< half_elts
; ++i
, ++j
)
39984 XVECEXP (par
, 0, i
) = GEN_INT (j
);
39985 for (i
= half_elts
, j
= 0; j
< half_elts
; ++i
, ++j
)
39986 XVECEXP (par
, 0, i
) = GEN_INT (j
);
39987 rtx sel
= gen_rtx_VEC_SELECT (mode
, src_reg
, par
);
39988 SET_SRC (body
) = sel
;
39989 INSN_CODE (insn
) = -1; /* Force re-recognition. */
39990 df_insn_rescan (insn
);
39993 fprintf (dump_file
, "Replacing store %d with permuted store\n",
39997 /* Given OP that contains a vector extract operation, adjust the index
39998 of the extracted lane to account for the doubleword swap. */
40000 adjust_extract (rtx_insn
*insn
)
40002 rtx pattern
= PATTERN (insn
);
40003 if (GET_CODE (pattern
) == PARALLEL
)
40004 pattern
= XVECEXP (pattern
, 0, 0);
40005 rtx src
= SET_SRC (pattern
);
40006 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40007 account for that. */
40008 rtx sel
= GET_CODE (src
) == VEC_DUPLICATE
? XEXP (src
, 0) : src
;
40009 rtx par
= XEXP (sel
, 1);
40010 int half_elts
= GET_MODE_NUNITS (GET_MODE (XEXP (sel
, 0))) >> 1;
40011 int lane
= INTVAL (XVECEXP (par
, 0, 0));
40012 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
40013 XVECEXP (par
, 0, 0) = GEN_INT (lane
);
40014 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40015 df_insn_rescan (insn
);
40018 fprintf (dump_file
, "Changing lane for extract %d\n", INSN_UID (insn
));
40021 /* Given OP that contains a vector direct-splat operation, adjust the index
40022 of the source lane to account for the doubleword swap. */
40024 adjust_splat (rtx_insn
*insn
)
40026 rtx body
= PATTERN (insn
);
40027 rtx unspec
= XEXP (body
, 1);
40028 int half_elts
= GET_MODE_NUNITS (GET_MODE (unspec
)) >> 1;
40029 int lane
= INTVAL (XVECEXP (unspec
, 0, 1));
40030 lane
= lane
>= half_elts
? lane
- half_elts
: lane
+ half_elts
;
40031 XVECEXP (unspec
, 0, 1) = GEN_INT (lane
);
40032 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40033 df_insn_rescan (insn
);
40036 fprintf (dump_file
, "Changing lane for splat %d\n", INSN_UID (insn
));
40039 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40040 swap), reverse the order of the source operands and adjust the indices
40041 of the source lanes to account for doubleword reversal. */
40043 adjust_xxpermdi (rtx_insn
*insn
)
40045 rtx set
= PATTERN (insn
);
40046 rtx select
= XEXP (set
, 1);
40047 rtx concat
= XEXP (select
, 0);
40048 rtx src0
= XEXP (concat
, 0);
40049 XEXP (concat
, 0) = XEXP (concat
, 1);
40050 XEXP (concat
, 1) = src0
;
40051 rtx parallel
= XEXP (select
, 1);
40052 int lane0
= INTVAL (XVECEXP (parallel
, 0, 0));
40053 int lane1
= INTVAL (XVECEXP (parallel
, 0, 1));
40054 int new_lane0
= 3 - lane1
;
40055 int new_lane1
= 3 - lane0
;
40056 XVECEXP (parallel
, 0, 0) = GEN_INT (new_lane0
);
40057 XVECEXP (parallel
, 0, 1) = GEN_INT (new_lane1
);
40058 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40059 df_insn_rescan (insn
);
40062 fprintf (dump_file
, "Changing lanes for xxpermdi %d\n", INSN_UID (insn
));
40065 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40066 reverse the order of those inputs. */
40068 adjust_concat (rtx_insn
*insn
)
40070 rtx set
= PATTERN (insn
);
40071 rtx concat
= XEXP (set
, 1);
40072 rtx src0
= XEXP (concat
, 0);
40073 XEXP (concat
, 0) = XEXP (concat
, 1);
40074 XEXP (concat
, 1) = src0
;
40075 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40076 df_insn_rescan (insn
);
40079 fprintf (dump_file
, "Reversing inputs for concat %d\n", INSN_UID (insn
));
40082 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40083 constant pool to reflect swapped doublewords. */
40085 adjust_vperm (rtx_insn
*insn
)
40087 /* We previously determined that the UNSPEC_VPERM was fed by a
40088 swap of a swapping load of a TOC-relative constant pool symbol.
40089 Find the MEM in the swapping load and replace it with a MEM for
40090 the adjusted mask constant. */
40091 rtx set
= PATTERN (insn
);
40092 rtx mask_reg
= XVECEXP (SET_SRC (set
), 0, 2);
40094 /* Find the swap. */
40095 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40097 rtx_insn
*swap_insn
= 0;
40098 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
40099 if (rtx_equal_p (DF_REF_REG (use
), mask_reg
))
40101 struct df_link
*def_link
= DF_REF_CHAIN (use
);
40102 gcc_assert (def_link
&& !def_link
->next
);
40103 swap_insn
= DF_REF_INSN (def_link
->ref
);
40106 gcc_assert (swap_insn
);
40108 /* Find the load. */
40109 insn_info
= DF_INSN_INFO_GET (swap_insn
);
40110 rtx_insn
*load_insn
= 0;
40111 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
40113 struct df_link
*def_link
= DF_REF_CHAIN (use
);
40114 gcc_assert (def_link
&& !def_link
->next
);
40115 load_insn
= DF_REF_INSN (def_link
->ref
);
40118 gcc_assert (load_insn
);
40120 /* Find the TOC-relative symbol access. */
40121 insn_info
= DF_INSN_INFO_GET (load_insn
);
40122 rtx_insn
*tocrel_insn
= 0;
40123 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
40125 struct df_link
*def_link
= DF_REF_CHAIN (use
);
40126 gcc_assert (def_link
&& !def_link
->next
);
40127 tocrel_insn
= DF_REF_INSN (def_link
->ref
);
40130 gcc_assert (tocrel_insn
);
40132 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40133 to set tocrel_base; otherwise it would be unnecessary as we've
40134 already established it will return true. */
40136 const_rtx tocrel_base
;
40137 rtx tocrel_expr
= SET_SRC (PATTERN (tocrel_insn
));
40138 /* There is an extra level of indirection for small/large code models. */
40139 if (GET_CODE (tocrel_expr
) == MEM
)
40140 tocrel_expr
= XEXP (tocrel_expr
, 0);
40141 if (!toc_relative_expr_p (tocrel_expr
, false, &tocrel_base
, NULL
))
40142 gcc_unreachable ();
40143 split_const (XVECEXP (tocrel_base
, 0, 0), &base
, &offset
);
40144 rtx const_vector
= get_pool_constant (base
);
40145 /* With the extra indirection, get_pool_constant will produce the
40146 real constant from the reg_equal expression, so get the real
40148 if (GET_CODE (const_vector
) == SYMBOL_REF
)
40149 const_vector
= get_pool_constant (const_vector
);
40150 gcc_assert (GET_CODE (const_vector
) == CONST_VECTOR
);
40152 /* Create an adjusted mask from the initial mask. */
40153 unsigned int new_mask
[16], i
, val
;
40154 for (i
= 0; i
< 16; ++i
) {
40155 val
= INTVAL (XVECEXP (const_vector
, 0, i
));
40157 new_mask
[i
] = (val
+ 8) % 16;
40159 new_mask
[i
] = ((val
+ 8) % 16) + 16;
40162 /* Create a new CONST_VECTOR and a MEM that references it. */
40163 rtx vals
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
40164 for (i
= 0; i
< 16; ++i
)
40165 XVECEXP (vals
, 0, i
) = GEN_INT (new_mask
[i
]);
40166 rtx new_const_vector
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (vals
, 0));
40167 rtx new_mem
= force_const_mem (V16QImode
, new_const_vector
);
40168 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40169 can't recognize. Force the SYMBOL_REF into a register. */
40170 if (!REG_P (XEXP (new_mem
, 0))) {
40171 rtx base_reg
= force_reg (Pmode
, XEXP (new_mem
, 0));
40172 XEXP (new_mem
, 0) = base_reg
;
40173 /* Move the newly created insn ahead of the load insn. */
40174 rtx_insn
*force_insn
= get_last_insn ();
40175 remove_insn (force_insn
);
40176 rtx_insn
*before_load_insn
= PREV_INSN (load_insn
);
40177 add_insn_after (force_insn
, before_load_insn
, BLOCK_FOR_INSN (load_insn
));
40178 df_insn_rescan (before_load_insn
);
40179 df_insn_rescan (force_insn
);
40182 /* Replace the MEM in the load instruction and rescan it. */
40183 XEXP (SET_SRC (PATTERN (load_insn
)), 0) = new_mem
;
40184 INSN_CODE (load_insn
) = -1; /* Force re-recognition. */
40185 df_insn_rescan (load_insn
);
40188 fprintf (dump_file
, "Adjusting mask for vperm %d\n", INSN_UID (insn
));
40191 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40192 with special handling. Take care of that here. */
40194 handle_special_swappables (swap_web_entry
*insn_entry
, unsigned i
)
40196 rtx_insn
*insn
= insn_entry
[i
].insn
;
40197 rtx body
= PATTERN (insn
);
40199 switch (insn_entry
[i
].special_handling
)
40202 gcc_unreachable ();
40203 case SH_CONST_VECTOR
:
40205 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40206 gcc_assert (GET_CODE (body
) == SET
);
40207 rtx rhs
= SET_SRC (body
);
40208 swap_const_vector_halves (rhs
);
40210 fprintf (dump_file
, "Swapping constant halves in insn %d\n", i
);
40214 /* A subreg of the same size is already safe. For subregs that
40215 select a smaller portion of a reg, adjust the index for
40216 swapped doublewords. */
40217 adjust_subreg_index (body
);
40219 fprintf (dump_file
, "Adjusting subreg in insn %d\n", i
);
40222 /* Convert a non-permuting load to a permuting one. */
40223 permute_load (insn
);
40226 /* Convert a non-permuting store to a permuting one. */
40227 permute_store (insn
);
40230 /* Change the lane on an extract operation. */
40231 adjust_extract (insn
);
40234 /* Change the lane on a direct-splat operation. */
40235 adjust_splat (insn
);
40238 /* Change the lanes on an XXPERMDI operation. */
40239 adjust_xxpermdi (insn
);
40242 /* Reverse the order of a concatenation operation. */
40243 adjust_concat (insn
);
40246 /* Change the mask loaded from the constant pool for a VPERM. */
40247 adjust_vperm (insn
);
40252 /* Find the insn from the Ith table entry, which is known to be a
40253 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40255 replace_swap_with_copy (swap_web_entry
*insn_entry
, unsigned i
)
40257 rtx_insn
*insn
= insn_entry
[i
].insn
;
40258 rtx body
= PATTERN (insn
);
40259 rtx src_reg
= XEXP (SET_SRC (body
), 0);
40260 rtx copy
= gen_rtx_SET (SET_DEST (body
), src_reg
);
40261 rtx_insn
*new_insn
= emit_insn_before (copy
, insn
);
40262 set_block_for_insn (new_insn
, BLOCK_FOR_INSN (insn
));
40263 df_insn_rescan (new_insn
);
40267 unsigned int new_uid
= INSN_UID (new_insn
);
40268 fprintf (dump_file
, "Replacing swap %d with copy %d\n", i
, new_uid
);
40271 df_insn_delete (insn
);
40272 remove_insn (insn
);
40273 insn
->set_deleted ();
40276 /* Dump the swap table to DUMP_FILE. */
40278 dump_swap_insn_table (swap_web_entry
*insn_entry
)
40280 int e
= get_max_uid ();
40281 fprintf (dump_file
, "\nRelevant insns with their flag settings\n\n");
40283 for (int i
= 0; i
< e
; ++i
)
40284 if (insn_entry
[i
].is_relevant
)
40286 swap_web_entry
*pred_entry
= (swap_web_entry
*)insn_entry
[i
].pred ();
40287 fprintf (dump_file
, "%6d %6d ", i
,
40288 pred_entry
&& pred_entry
->insn
40289 ? INSN_UID (pred_entry
->insn
) : 0);
40290 if (insn_entry
[i
].is_load
)
40291 fputs ("load ", dump_file
);
40292 if (insn_entry
[i
].is_store
)
40293 fputs ("store ", dump_file
);
40294 if (insn_entry
[i
].is_swap
)
40295 fputs ("swap ", dump_file
);
40296 if (insn_entry
[i
].is_live_in
)
40297 fputs ("live-in ", dump_file
);
40298 if (insn_entry
[i
].is_live_out
)
40299 fputs ("live-out ", dump_file
);
40300 if (insn_entry
[i
].contains_subreg
)
40301 fputs ("subreg ", dump_file
);
40302 if (insn_entry
[i
].is_128_int
)
40303 fputs ("int128 ", dump_file
);
40304 if (insn_entry
[i
].is_call
)
40305 fputs ("call ", dump_file
);
40306 if (insn_entry
[i
].is_swappable
)
40308 fputs ("swappable ", dump_file
);
40309 if (insn_entry
[i
].special_handling
== SH_CONST_VECTOR
)
40310 fputs ("special:constvec ", dump_file
);
40311 else if (insn_entry
[i
].special_handling
== SH_SUBREG
)
40312 fputs ("special:subreg ", dump_file
);
40313 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_LD
)
40314 fputs ("special:load ", dump_file
);
40315 else if (insn_entry
[i
].special_handling
== SH_NOSWAP_ST
)
40316 fputs ("special:store ", dump_file
);
40317 else if (insn_entry
[i
].special_handling
== SH_EXTRACT
)
40318 fputs ("special:extract ", dump_file
);
40319 else if (insn_entry
[i
].special_handling
== SH_SPLAT
)
40320 fputs ("special:splat ", dump_file
);
40321 else if (insn_entry
[i
].special_handling
== SH_XXPERMDI
)
40322 fputs ("special:xxpermdi ", dump_file
);
40323 else if (insn_entry
[i
].special_handling
== SH_CONCAT
)
40324 fputs ("special:concat ", dump_file
);
40325 else if (insn_entry
[i
].special_handling
== SH_VPERM
)
40326 fputs ("special:vperm ", dump_file
);
40328 if (insn_entry
[i
].web_not_optimizable
)
40329 fputs ("unoptimizable ", dump_file
);
40330 if (insn_entry
[i
].will_delete
)
40331 fputs ("delete ", dump_file
);
40332 fputs ("\n", dump_file
);
40334 fputs ("\n", dump_file
);
40337 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
40338 Here RTX is an (& addr (const_int -16)). Always return a new copy
40339 to avoid problems with combine. */
40341 alignment_with_canonical_addr (rtx align
)
40344 rtx addr
= XEXP (align
, 0);
40349 else if (GET_CODE (addr
) == PLUS
)
40351 rtx addrop0
= XEXP (addr
, 0);
40352 rtx addrop1
= XEXP (addr
, 1);
40354 if (!REG_P (addrop0
))
40355 addrop0
= force_reg (GET_MODE (addrop0
), addrop0
);
40357 if (!REG_P (addrop1
))
40358 addrop1
= force_reg (GET_MODE (addrop1
), addrop1
);
40360 canon
= gen_rtx_PLUS (GET_MODE (addr
), addrop0
, addrop1
);
40364 canon
= force_reg (GET_MODE (addr
), addr
);
40366 return gen_rtx_AND (GET_MODE (align
), canon
, GEN_INT (-16));
40369 /* Check whether an rtx is an alignment mask, and if so, return
40370 a fully-expanded rtx for the masking operation. */
40372 alignment_mask (rtx_insn
*insn
)
40374 rtx body
= PATTERN (insn
);
40376 if (GET_CODE (body
) != SET
40377 || GET_CODE (SET_SRC (body
)) != AND
40378 || !REG_P (XEXP (SET_SRC (body
), 0)))
40381 rtx mask
= XEXP (SET_SRC (body
), 1);
40383 if (GET_CODE (mask
) == CONST_INT
)
40385 if (INTVAL (mask
) == -16)
40386 return alignment_with_canonical_addr (SET_SRC (body
));
40394 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40398 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
40400 if (!rtx_equal_p (DF_REF_REG (use
), mask
))
40403 struct df_link
*def_link
= DF_REF_CHAIN (use
);
40404 if (!def_link
|| def_link
->next
)
40407 rtx_insn
*const_insn
= DF_REF_INSN (def_link
->ref
);
40408 rtx const_body
= PATTERN (const_insn
);
40409 if (GET_CODE (const_body
) != SET
)
40412 real_mask
= SET_SRC (const_body
);
40414 if (GET_CODE (real_mask
) != CONST_INT
40415 || INTVAL (real_mask
) != -16)
40419 if (real_mask
== 0)
40422 return alignment_with_canonical_addr (SET_SRC (body
));
40425 /* Given INSN that's a load or store based at BASE_REG, look for a
40426 feeding computation that aligns its address on a 16-byte boundary. */
40428 find_alignment_op (rtx_insn
*insn
, rtx base_reg
)
40431 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40432 rtx and_operation
= 0;
40434 FOR_EACH_INSN_INFO_USE (base_use
, insn_info
)
40436 if (!rtx_equal_p (DF_REF_REG (base_use
), base_reg
))
40439 struct df_link
*base_def_link
= DF_REF_CHAIN (base_use
);
40440 if (!base_def_link
|| base_def_link
->next
)
40443 /* With stack-protector code enabled, and possibly in other
40444 circumstances, there may not be an associated insn for
40446 if (DF_REF_IS_ARTIFICIAL (base_def_link
->ref
))
40449 rtx_insn
*and_insn
= DF_REF_INSN (base_def_link
->ref
);
40450 and_operation
= alignment_mask (and_insn
);
40451 if (and_operation
!= 0)
40455 return and_operation
;
40458 struct del_info
{ bool replace
; rtx_insn
*replace_insn
; };
40460 /* If INSN is the load for an lvx pattern, put it in canonical form. */
40462 recombine_lvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
40464 rtx body
= PATTERN (insn
);
40465 gcc_assert (GET_CODE (body
) == SET
40466 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
40467 && GET_CODE (XEXP (SET_SRC (body
), 0)) == MEM
);
40469 rtx mem
= XEXP (SET_SRC (body
), 0);
40470 rtx base_reg
= XEXP (mem
, 0);
40472 rtx and_operation
= find_alignment_op (insn
, base_reg
);
40474 if (and_operation
!= 0)
40477 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40478 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
40480 struct df_link
*link
= DF_REF_CHAIN (def
);
40481 if (!link
|| link
->next
)
40484 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
40485 if (!insn_is_swap_p (swap_insn
)
40486 || insn_is_load_p (swap_insn
)
40487 || insn_is_store_p (swap_insn
))
40490 /* Expected lvx pattern found. Change the swap to
40491 a copy, and propagate the AND operation into the
40493 to_delete
[INSN_UID (swap_insn
)].replace
= true;
40494 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
40496 XEXP (mem
, 0) = and_operation
;
40497 SET_SRC (body
) = mem
;
40498 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40499 df_insn_rescan (insn
);
40502 fprintf (dump_file
, "lvx opportunity found at %d\n",
40508 /* If INSN is the store for an stvx pattern, put it in canonical form. */
40510 recombine_stvx_pattern (rtx_insn
*insn
, del_info
*to_delete
)
40512 rtx body
= PATTERN (insn
);
40513 gcc_assert (GET_CODE (body
) == SET
40514 && GET_CODE (SET_DEST (body
)) == MEM
40515 && GET_CODE (SET_SRC (body
)) == VEC_SELECT
);
40516 rtx mem
= SET_DEST (body
);
40517 rtx base_reg
= XEXP (mem
, 0);
40519 rtx and_operation
= find_alignment_op (insn
, base_reg
);
40521 if (and_operation
!= 0)
40523 rtx src_reg
= XEXP (SET_SRC (body
), 0);
40525 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40526 FOR_EACH_INSN_INFO_USE (src_use
, insn_info
)
40528 if (!rtx_equal_p (DF_REF_REG (src_use
), src_reg
))
40531 struct df_link
*link
= DF_REF_CHAIN (src_use
);
40532 if (!link
|| link
->next
)
40535 rtx_insn
*swap_insn
= DF_REF_INSN (link
->ref
);
40536 if (!insn_is_swap_p (swap_insn
)
40537 || insn_is_load_p (swap_insn
)
40538 || insn_is_store_p (swap_insn
))
40541 /* Expected stvx pattern found. Change the swap to
40542 a copy, and propagate the AND operation into the
40544 to_delete
[INSN_UID (swap_insn
)].replace
= true;
40545 to_delete
[INSN_UID (swap_insn
)].replace_insn
= swap_insn
;
40547 XEXP (mem
, 0) = and_operation
;
40548 SET_SRC (body
) = src_reg
;
40549 INSN_CODE (insn
) = -1; /* Force re-recognition. */
40550 df_insn_rescan (insn
);
40553 fprintf (dump_file
, "stvx opportunity found at %d\n",
40559 /* Look for patterns created from builtin lvx and stvx calls, and
40560 canonicalize them to be properly recognized as such. */
40562 recombine_lvx_stvx_patterns (function
*fun
)
40568 int num_insns
= get_max_uid ();
40569 del_info
*to_delete
= XCNEWVEC (del_info
, num_insns
);
40571 FOR_ALL_BB_FN (bb
, fun
)
40572 FOR_BB_INSNS (bb
, insn
)
40574 if (!NONDEBUG_INSN_P (insn
))
40577 if (insn_is_load_p (insn
) && insn_is_swap_p (insn
))
40578 recombine_lvx_pattern (insn
, to_delete
);
40579 else if (insn_is_store_p (insn
) && insn_is_swap_p (insn
))
40580 recombine_stvx_pattern (insn
, to_delete
);
40583 /* Turning swaps into copies is delayed until now, to avoid problems
40584 with deleting instructions during the insn walk. */
40585 for (i
= 0; i
< num_insns
; i
++)
40586 if (to_delete
[i
].replace
)
40588 rtx swap_body
= PATTERN (to_delete
[i
].replace_insn
);
40589 rtx src_reg
= XEXP (SET_SRC (swap_body
), 0);
40590 rtx copy
= gen_rtx_SET (SET_DEST (swap_body
), src_reg
);
40591 rtx_insn
*new_insn
= emit_insn_before (copy
,
40592 to_delete
[i
].replace_insn
);
40593 set_block_for_insn (new_insn
,
40594 BLOCK_FOR_INSN (to_delete
[i
].replace_insn
));
40595 df_insn_rescan (new_insn
);
40596 df_insn_delete (to_delete
[i
].replace_insn
);
40597 remove_insn (to_delete
[i
].replace_insn
);
40598 to_delete
[i
].replace_insn
->set_deleted ();
40604 /* Main entry point for this pass. */
40606 rs6000_analyze_swaps (function
*fun
)
40608 swap_web_entry
*insn_entry
;
40610 rtx_insn
*insn
, *curr_insn
= 0;
40612 /* Dataflow analysis for use-def chains. */
40613 df_set_flags (DF_RD_PRUNE_DEAD_DEFS
);
40614 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
40616 df_set_flags (DF_DEFER_INSN_RESCAN
);
40618 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
40619 recombine_lvx_stvx_patterns (fun
);
40621 /* Allocate structure to represent webs of insns. */
40622 insn_entry
= XCNEWVEC (swap_web_entry
, get_max_uid ());
40624 /* Walk the insns to gather basic data. */
40625 FOR_ALL_BB_FN (bb
, fun
)
40626 FOR_BB_INSNS_SAFE (bb
, insn
, curr_insn
)
40628 unsigned int uid
= INSN_UID (insn
);
40629 if (NONDEBUG_INSN_P (insn
))
40631 insn_entry
[uid
].insn
= insn
;
40633 if (GET_CODE (insn
) == CALL_INSN
)
40634 insn_entry
[uid
].is_call
= 1;
40636 /* Walk the uses and defs to see if we mention vector regs.
40637 Record any constraints on optimization of such mentions. */
40638 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40640 FOR_EACH_INSN_INFO_USE (mention
, insn_info
)
40642 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40643 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
40645 /* If a use gets its value from a call insn, it will be
40646 a hard register and will look like (reg:V4SI 3 3).
40647 The df analysis creates two mentions for GPR3 and GPR4,
40648 both DImode. We must recognize this and treat it as a
40649 vector mention to ensure the call is unioned with this
40651 if (mode
== DImode
&& DF_REF_INSN_INFO (mention
))
40653 rtx feeder
= DF_REF_INSN (mention
);
40654 /* FIXME: It is pretty hard to get from the df mention
40655 to the mode of the use in the insn. We arbitrarily
40656 pick a vector mode here, even though the use might
40657 be a real DImode. We can be too conservative
40658 (create a web larger than necessary) because of
40659 this, so consider eventually fixing this. */
40660 if (GET_CODE (feeder
) == CALL_INSN
)
40664 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
40666 insn_entry
[uid
].is_relevant
= 1;
40667 if (mode
== TImode
|| mode
== V1TImode
40668 || FLOAT128_VECTOR_P (mode
))
40669 insn_entry
[uid
].is_128_int
= 1;
40670 if (DF_REF_INSN_INFO (mention
))
40671 insn_entry
[uid
].contains_subreg
40672 = !rtx_equal_p (DF_REF_REG (mention
),
40673 DF_REF_REAL_REG (mention
));
40674 union_defs (insn_entry
, insn
, mention
);
40677 FOR_EACH_INSN_INFO_DEF (mention
, insn_info
)
40679 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40680 machine_mode mode
= GET_MODE (DF_REF_REAL_REG (mention
));
40682 /* If we're loading up a hard vector register for a call,
40683 it looks like (set (reg:V4SI 9 9) (...)). The df
40684 analysis creates two mentions for GPR9 and GPR10, both
40685 DImode. So relying on the mode from the mentions
40686 isn't sufficient to ensure we union the call into the
40687 web with the parameter setup code. */
40688 if (mode
== DImode
&& GET_CODE (insn
) == SET
40689 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn
))))
40690 mode
= GET_MODE (SET_DEST (insn
));
40692 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
) || mode
== TImode
)
40694 insn_entry
[uid
].is_relevant
= 1;
40695 if (mode
== TImode
|| mode
== V1TImode
40696 || FLOAT128_VECTOR_P (mode
))
40697 insn_entry
[uid
].is_128_int
= 1;
40698 if (DF_REF_INSN_INFO (mention
))
40699 insn_entry
[uid
].contains_subreg
40700 = !rtx_equal_p (DF_REF_REG (mention
),
40701 DF_REF_REAL_REG (mention
));
40702 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
40703 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention
)))
40704 insn_entry
[uid
].is_live_out
= 1;
40705 union_uses (insn_entry
, insn
, mention
);
40709 if (insn_entry
[uid
].is_relevant
)
40711 /* Determine if this is a load or store. */
40712 insn_entry
[uid
].is_load
= insn_is_load_p (insn
);
40713 insn_entry
[uid
].is_store
= insn_is_store_p (insn
);
40715 /* Determine if this is a doubleword swap. If not,
40716 determine whether it can legally be swapped. */
40717 if (insn_is_swap_p (insn
))
40718 insn_entry
[uid
].is_swap
= 1;
40721 unsigned int special
= SH_NONE
;
40722 insn_entry
[uid
].is_swappable
40723 = insn_is_swappable_p (insn_entry
, insn
, &special
);
40724 if (special
!= SH_NONE
&& insn_entry
[uid
].contains_subreg
)
40725 insn_entry
[uid
].is_swappable
= 0;
40726 else if (special
!= SH_NONE
)
40727 insn_entry
[uid
].special_handling
= special
;
40728 else if (insn_entry
[uid
].contains_subreg
)
40729 insn_entry
[uid
].special_handling
= SH_SUBREG
;
40737 fprintf (dump_file
, "\nSwap insn entry table when first built\n");
40738 dump_swap_insn_table (insn_entry
);
40741 /* Record unoptimizable webs. */
40742 unsigned e
= get_max_uid (), i
;
40743 for (i
= 0; i
< e
; ++i
)
40745 if (!insn_entry
[i
].is_relevant
)
40748 swap_web_entry
*root
40749 = (swap_web_entry
*)(&insn_entry
[i
])->unionfind_root ();
40751 if (insn_entry
[i
].is_live_in
|| insn_entry
[i
].is_live_out
40752 || (insn_entry
[i
].contains_subreg
40753 && insn_entry
[i
].special_handling
!= SH_SUBREG
)
40754 || insn_entry
[i
].is_128_int
|| insn_entry
[i
].is_call
40755 || !(insn_entry
[i
].is_swappable
|| insn_entry
[i
].is_swap
))
40756 root
->web_not_optimizable
= 1;
40758 /* If we have loads or stores that aren't permuting then the
40759 optimization isn't appropriate. */
40760 else if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
40761 && !insn_entry
[i
].is_swap
&& !insn_entry
[i
].is_swappable
)
40762 root
->web_not_optimizable
= 1;
40764 /* If we have permuting loads or stores that are not accompanied
40765 by a register swap, the optimization isn't appropriate. */
40766 else if (insn_entry
[i
].is_load
&& insn_entry
[i
].is_swap
)
40768 rtx insn
= insn_entry
[i
].insn
;
40769 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40772 FOR_EACH_INSN_INFO_DEF (def
, insn_info
)
40774 struct df_link
*link
= DF_REF_CHAIN (def
);
40776 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_LOADS
))
40778 root
->web_not_optimizable
= 1;
40783 else if (insn_entry
[i
].is_store
&& insn_entry
[i
].is_swap
)
40785 rtx insn
= insn_entry
[i
].insn
;
40786 struct df_insn_info
*insn_info
= DF_INSN_INFO_GET (insn
);
40789 FOR_EACH_INSN_INFO_USE (use
, insn_info
)
40791 struct df_link
*link
= DF_REF_CHAIN (use
);
40793 if (!chain_contains_only_swaps (insn_entry
, link
, FOR_STORES
))
40795 root
->web_not_optimizable
= 1;
40804 fprintf (dump_file
, "\nSwap insn entry table after web analysis\n");
40805 dump_swap_insn_table (insn_entry
);
40808 /* For each load and store in an optimizable web (which implies
40809 the loads and stores are permuting), find the associated
40810 register swaps and mark them for removal. Due to various
40811 optimizations we may mark the same swap more than once. Also
40812 perform special handling for swappable insns that require it. */
40813 for (i
= 0; i
< e
; ++i
)
40814 if ((insn_entry
[i
].is_load
|| insn_entry
[i
].is_store
)
40815 && insn_entry
[i
].is_swap
)
40817 swap_web_entry
* root_entry
40818 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
40819 if (!root_entry
->web_not_optimizable
)
40820 mark_swaps_for_removal (insn_entry
, i
);
40822 else if (insn_entry
[i
].is_swappable
&& insn_entry
[i
].special_handling
)
40824 swap_web_entry
* root_entry
40825 = (swap_web_entry
*)((&insn_entry
[i
])->unionfind_root ());
40826 if (!root_entry
->web_not_optimizable
)
40827 handle_special_swappables (insn_entry
, i
);
40830 /* Now delete the swaps marked for removal. */
40831 for (i
= 0; i
< e
; ++i
)
40832 if (insn_entry
[i
].will_delete
)
40833 replace_swap_with_copy (insn_entry
, i
);
40840 const pass_data pass_data_analyze_swaps
=
40842 RTL_PASS
, /* type */
40843 "swaps", /* name */
40844 OPTGROUP_NONE
, /* optinfo_flags */
40845 TV_NONE
, /* tv_id */
40846 0, /* properties_required */
40847 0, /* properties_provided */
40848 0, /* properties_destroyed */
40849 0, /* todo_flags_start */
40850 TODO_df_finish
, /* todo_flags_finish */
40853 class pass_analyze_swaps
: public rtl_opt_pass
40856 pass_analyze_swaps(gcc::context
*ctxt
)
40857 : rtl_opt_pass(pass_data_analyze_swaps
, ctxt
)
40860 /* opt_pass methods: */
40861 virtual bool gate (function
*)
40863 return (optimize
> 0 && !BYTES_BIG_ENDIAN
&& TARGET_VSX
40864 && !TARGET_P9_VECTOR
&& rs6000_optimize_swaps
);
40867 virtual unsigned int execute (function
*fun
)
40869 return rs6000_analyze_swaps (fun
);
40874 return new pass_analyze_swaps (m_ctxt
);
40877 }; // class pass_analyze_swaps
40880 make_pass_analyze_swaps (gcc::context
*ctxt
)
40882 return new pass_analyze_swaps (ctxt
);
40885 #ifdef RS6000_GLIBC_ATOMIC_FENV
40886 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
40887 static tree atomic_hold_decl
, atomic_clear_decl
, atomic_update_decl
;
40890 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
40893 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
40895 if (!TARGET_HARD_FLOAT
)
40897 #ifdef RS6000_GLIBC_ATOMIC_FENV
40898 if (atomic_hold_decl
== NULL_TREE
)
40901 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
40902 get_identifier ("__atomic_feholdexcept"),
40903 build_function_type_list (void_type_node
,
40904 double_ptr_type_node
,
40906 TREE_PUBLIC (atomic_hold_decl
) = 1;
40907 DECL_EXTERNAL (atomic_hold_decl
) = 1;
40910 if (atomic_clear_decl
== NULL_TREE
)
40913 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
40914 get_identifier ("__atomic_feclearexcept"),
40915 build_function_type_list (void_type_node
,
40917 TREE_PUBLIC (atomic_clear_decl
) = 1;
40918 DECL_EXTERNAL (atomic_clear_decl
) = 1;
40921 tree const_double
= build_qualified_type (double_type_node
,
40923 tree const_double_ptr
= build_pointer_type (const_double
);
40924 if (atomic_update_decl
== NULL_TREE
)
40927 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
40928 get_identifier ("__atomic_feupdateenv"),
40929 build_function_type_list (void_type_node
,
40932 TREE_PUBLIC (atomic_update_decl
) = 1;
40933 DECL_EXTERNAL (atomic_update_decl
) = 1;
40936 tree fenv_var
= create_tmp_var_raw (double_type_node
);
40937 TREE_ADDRESSABLE (fenv_var
) = 1;
40938 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
, fenv_var
);
40940 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
40941 *clear
= build_call_expr (atomic_clear_decl
, 0);
40942 *update
= build_call_expr (atomic_update_decl
, 1,
40943 fold_convert (const_double_ptr
, fenv_addr
));
40948 tree mffs
= rs6000_builtin_decls
[RS6000_BUILTIN_MFFS
];
40949 tree mtfsf
= rs6000_builtin_decls
[RS6000_BUILTIN_MTFSF
];
40950 tree call_mffs
= build_call_expr (mffs
, 0);
40952 /* Generates the equivalent of feholdexcept (&fenv_var)
40954 *fenv_var = __builtin_mffs ();
40956 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
40957 __builtin_mtfsf (0xff, fenv_hold); */
40959 /* Mask to clear everything except for the rounding modes and non-IEEE
40960 arithmetic flag. */
40961 const unsigned HOST_WIDE_INT hold_exception_mask
=
40962 HOST_WIDE_INT_C (0xffffffff00000007);
40964 tree fenv_var
= create_tmp_var_raw (double_type_node
);
40966 tree hold_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_var
, call_mffs
);
40968 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
40969 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
40970 build_int_cst (uint64_type_node
,
40971 hold_exception_mask
));
40973 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
40976 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
40977 build_int_cst (unsigned_type_node
, 0xff),
40980 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
40982 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
40984 double fenv_clear = __builtin_mffs ();
40985 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
40986 __builtin_mtfsf (0xff, fenv_clear); */
40988 /* Mask to clear everything except for the rounding modes and non-IEEE
40989 arithmetic flag. */
40990 const unsigned HOST_WIDE_INT clear_exception_mask
=
40991 HOST_WIDE_INT_C (0xffffffff00000000);
40993 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
40995 tree clear_mffs
= build2 (MODIFY_EXPR
, void_type_node
, fenv_clear
, call_mffs
);
40997 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
40998 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
41000 build_int_cst (uint64_type_node
,
41001 clear_exception_mask
));
41003 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
41004 fenv_clear_llu_and
);
41006 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
41007 build_int_cst (unsigned_type_node
, 0xff),
41010 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
41012 /* Generates the equivalent of feupdateenv (&fenv_var)
41014 double old_fenv = __builtin_mffs ();
41015 double fenv_update;
41016 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41017 (*(uint64_t*)fenv_var 0x1ff80fff);
41018 __builtin_mtfsf (0xff, fenv_update); */
41020 const unsigned HOST_WIDE_INT update_exception_mask
=
41021 HOST_WIDE_INT_C (0xffffffff1fffff00);
41022 const unsigned HOST_WIDE_INT new_exception_mask
=
41023 HOST_WIDE_INT_C (0x1ff80fff);
41025 tree old_fenv
= create_tmp_var_raw (double_type_node
);
41026 tree update_mffs
= build2 (MODIFY_EXPR
, void_type_node
, old_fenv
, call_mffs
);
41028 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
41029 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
41030 build_int_cst (uint64_type_node
,
41031 update_exception_mask
));
41033 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
41034 build_int_cst (uint64_type_node
,
41035 new_exception_mask
));
41037 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
41038 old_llu_and
, new_llu_and
);
41040 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
41043 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
41044 build_int_cst (unsigned_type_node
, 0xff),
41045 fenv_update_mtfsf
);
41047 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
41051 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
41053 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
41055 rtx_tmp0
= gen_reg_rtx (V2DImode
);
41056 rtx_tmp1
= gen_reg_rtx (V2DImode
);
41058 /* The destination of the vmrgew instruction layout is:
41059 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
41060 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
41061 vmrgew instruction will be correct. */
41062 if (VECTOR_ELT_ORDER_BIG
)
41064 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
41065 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
41069 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
41070 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
41073 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
41074 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
41076 if (signed_convert
)
41078 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
41079 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
41083 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
41084 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
41087 if (VECTOR_ELT_ORDER_BIG
)
41088 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
41090 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
41094 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
41097 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
41099 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
41100 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
41102 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
41103 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
41105 rtx_tmp2
= gen_reg_rtx (V4SImode
);
41106 rtx_tmp3
= gen_reg_rtx (V4SImode
);
41108 if (signed_convert
)
41110 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
41111 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
41115 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
41116 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
41119 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
41122 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41125 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
41126 optimization_type opt_type
)
41131 return (opt_type
== OPTIMIZE_FOR_SPEED
41132 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
41139 struct gcc_target targetm
= TARGET_INITIALIZER
;
41141 #include "gt-rs6000.h"