]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/powerpcspe/powerpcspe.c
PR82045: Avoid passing machine modes through "..."
[thirdparty/gcc.git] / gcc / config / powerpcspe / powerpcspe.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "ira.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80
81 /* This file should be included last. */
82 #include "target-def.h"
83
84 #ifndef TARGET_NO_PROTOTYPE
85 #define TARGET_NO_PROTOTYPE 0
86 #endif
87
88 #define min(A,B) ((A) < (B) ? (A) : (B))
89 #define max(A,B) ((A) > (B) ? (A) : (B))
90
91 /* Structure used to define the rs6000 stack */
92 typedef struct rs6000_stack {
93 int reload_completed; /* stack info won't change from here on */
94 int first_gp_reg_save; /* first callee saved GP register used */
95 int first_fp_reg_save; /* first callee saved FP register used */
96 int first_altivec_reg_save; /* first callee saved AltiVec register used */
97 int lr_save_p; /* true if the link reg needs to be saved */
98 int cr_save_p; /* true if the CR reg needs to be saved */
99 unsigned int vrsave_mask; /* mask of vec registers to save */
100 int push_p; /* true if we need to allocate stack space */
101 int calls_p; /* true if the function makes any calls */
102 int world_save_p; /* true if we're saving *everything*:
103 r13-r31, cr, f14-f31, vrsave, v20-v31 */
104 enum rs6000_abi abi; /* which ABI to use */
105 int gp_save_offset; /* offset to save GP regs from initial SP */
106 int fp_save_offset; /* offset to save FP regs from initial SP */
107 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
108 int lr_save_offset; /* offset to save LR from initial SP */
109 int cr_save_offset; /* offset to save CR from initial SP */
110 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
111 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
112 int varargs_save_offset; /* offset to save the varargs registers */
113 int ehrd_offset; /* offset to EH return data */
114 int ehcr_offset; /* offset to EH CR field data */
115 int reg_size; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size; /* variable save area size */
117 int parm_size; /* outgoing parameter size */
118 int save_size; /* save area size */
119 int fixed_size; /* fixed size of stack frame */
120 int gp_size; /* size of saved GP registers */
121 int fp_size; /* size of saved FP registers */
122 int altivec_size; /* size of saved AltiVec registers */
123 int cr_size; /* size to hold CR if not in fixed area */
124 int vrsave_size; /* size to hold VRSAVE */
125 int altivec_padding_size; /* size of altivec alignment padding */
126 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
127 int spe_padding_size;
128 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
129 int spe_64bit_regs_used;
130 int savres_strategy;
131 } rs6000_stack_t;
132
133 /* A C structure for machine-specific, per-function data.
134 This is added to the cfun structure. */
135 typedef struct GTY(()) machine_function
136 {
137 /* Whether the instruction chain has been scanned already. */
138 int spe_insn_chain_scanned_p;
139 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
140 int ra_needs_full_frame;
141 /* Flags if __builtin_return_address (0) was used. */
142 int ra_need_lr;
143 /* Cache lr_save_p after expansion of builtin_eh_return. */
144 int lr_save_state;
145 /* Whether we need to save the TOC to the reserved stack location in the
146 function prologue. */
147 bool save_toc_in_prologue;
148 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
149 varargs save area. */
150 HOST_WIDE_INT varargs_save_offset;
151 /* Temporary stack slot to use for SDmode copies. This slot is
152 64-bits wide and is allocated early enough so that the offset
153 does not overflow the 16-bit load/store offset field. */
154 rtx sdmode_stack_slot;
155 /* Alternative internal arg pointer for -fsplit-stack. */
156 rtx split_stack_arg_pointer;
157 bool split_stack_argp_used;
158 /* Flag if r2 setup is needed with ELFv2 ABI. */
159 bool r2_setup_needed;
160 /* The number of components we use for separate shrink-wrapping. */
161 int n_components;
162 /* The components already handled by separate shrink-wrapping, which should
163 not be considered by the prologue and epilogue. */
164 bool gpr_is_wrapped_separately[32];
165 bool fpr_is_wrapped_separately[32];
166 bool lr_is_wrapped_separately;
167 } machine_function;
168
169 /* Support targetm.vectorize.builtin_mask_for_load. */
170 static GTY(()) tree altivec_builtin_mask_for_load;
171
172 /* Set to nonzero once AIX common-mode calls have been defined. */
173 static GTY(()) int common_mode_defined;
174
175 /* Label number of label created for -mrelocatable, to call to so we can
176 get the address of the GOT section */
177 static int rs6000_pic_labelno;
178
179 #ifdef USING_ELFOS_H
180 /* Counter for labels which are to be placed in .fixup. */
181 int fixuplabelno = 0;
182 #endif
183
184 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
185 int dot_symbols;
186
187 /* Specify the machine mode that pointers have. After generation of rtl, the
188 compiler makes no further distinction between pointers and any other objects
189 of this machine mode. */
190 scalar_int_mode rs6000_pmode;
191
192 /* Width in bits of a pointer. */
193 unsigned rs6000_pointer_size;
194
195 #ifdef HAVE_AS_GNU_ATTRIBUTE
196 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
197 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
198 # endif
199 /* Flag whether floating point values have been passed/returned.
200 Note that this doesn't say whether fprs are used, since the
201 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
202 should be set for soft-float values passed in gprs and ieee128
203 values passed in vsx registers. */
204 static bool rs6000_passes_float;
205 static bool rs6000_passes_long_double;
206 /* Flag whether vector values have been passed/returned. */
207 static bool rs6000_passes_vector;
208 /* Flag whether small (<= 8 byte) structures have been returned. */
209 static bool rs6000_returns_struct;
210 #endif
211
212 /* Value is TRUE if register/mode pair is acceptable. */
213 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
214
215 /* Maximum number of registers needed for a given register class and mode. */
216 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
217
218 /* How many registers are needed for a given register and mode. */
219 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
220
221 /* Map register number to register class. */
222 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
223
224 static int dbg_cost_ctrl;
225
226 /* Built in types. */
227 tree rs6000_builtin_types[RS6000_BTI_MAX];
228 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
229
230 /* Flag to say the TOC is initialized */
231 int toc_initialized, need_toc_init;
232 char toc_label_name[10];
233
234 /* Cached value of rs6000_variable_issue. This is cached in
235 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
236 static short cached_can_issue_more;
237
238 static GTY(()) section *read_only_data_section;
239 static GTY(()) section *private_data_section;
240 static GTY(()) section *tls_data_section;
241 static GTY(()) section *tls_private_data_section;
242 static GTY(()) section *read_only_private_data_section;
243 static GTY(()) section *sdata2_section;
244 static GTY(()) section *toc_section;
245
246 struct builtin_description
247 {
248 const HOST_WIDE_INT mask;
249 const enum insn_code icode;
250 const char *const name;
251 const enum rs6000_builtins code;
252 };
253
254 /* Describe the vector unit used for modes. */
255 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
256 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
257
258 /* Register classes for various constraints that are based on the target
259 switches. */
260 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
261
262 /* Describe the alignment of a vector. */
263 int rs6000_vector_align[NUM_MACHINE_MODES];
264
265 /* Map selected modes to types for builtins. */
266 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
267
268 /* What modes to automatically generate reciprocal divide estimate (fre) and
269 reciprocal sqrt (frsqrte) for. */
270 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
271
272 /* Masks to determine which reciprocal esitmate instructions to generate
273 automatically. */
274 enum rs6000_recip_mask {
275 RECIP_SF_DIV = 0x001, /* Use divide estimate */
276 RECIP_DF_DIV = 0x002,
277 RECIP_V4SF_DIV = 0x004,
278 RECIP_V2DF_DIV = 0x008,
279
280 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
281 RECIP_DF_RSQRT = 0x020,
282 RECIP_V4SF_RSQRT = 0x040,
283 RECIP_V2DF_RSQRT = 0x080,
284
285 /* Various combination of flags for -mrecip=xxx. */
286 RECIP_NONE = 0,
287 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
288 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
289 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
290
291 RECIP_HIGH_PRECISION = RECIP_ALL,
292
293 /* On low precision machines like the power5, don't enable double precision
294 reciprocal square root estimate, since it isn't accurate enough. */
295 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
296 };
297
298 /* -mrecip options. */
299 static struct
300 {
301 const char *string; /* option name */
302 unsigned int mask; /* mask bits to set */
303 } recip_options[] = {
304 { "all", RECIP_ALL },
305 { "none", RECIP_NONE },
306 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV) },
308 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
309 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
310 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
311 | RECIP_V2DF_RSQRT) },
312 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
313 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
314 };
315
316 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
317 static const struct
318 {
319 const char *cpu;
320 unsigned int cpuid;
321 } cpu_is_info[] = {
322 { "power9", PPC_PLATFORM_POWER9 },
323 { "power8", PPC_PLATFORM_POWER8 },
324 { "power7", PPC_PLATFORM_POWER7 },
325 { "power6x", PPC_PLATFORM_POWER6X },
326 { "power6", PPC_PLATFORM_POWER6 },
327 { "power5+", PPC_PLATFORM_POWER5_PLUS },
328 { "power5", PPC_PLATFORM_POWER5 },
329 { "ppc970", PPC_PLATFORM_PPC970 },
330 { "power4", PPC_PLATFORM_POWER4 },
331 { "ppca2", PPC_PLATFORM_PPCA2 },
332 { "ppc476", PPC_PLATFORM_PPC476 },
333 { "ppc464", PPC_PLATFORM_PPC464 },
334 { "ppc440", PPC_PLATFORM_PPC440 },
335 { "ppc405", PPC_PLATFORM_PPC405 },
336 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
337 };
338
339 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
340 static const struct
341 {
342 const char *hwcap;
343 int mask;
344 unsigned int id;
345 } cpu_supports_info[] = {
346 /* AT_HWCAP masks. */
347 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
348 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
349 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
350 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
351 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
352 { "booke", PPC_FEATURE_BOOKE, 0 },
353 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
354 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
355 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
356 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
357 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
358 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
359 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
360 { "notb", PPC_FEATURE_NO_TB, 0 },
361 { "pa6t", PPC_FEATURE_PA6T, 0 },
362 { "power4", PPC_FEATURE_POWER4, 0 },
363 { "power5", PPC_FEATURE_POWER5, 0 },
364 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
365 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
366 { "ppc32", PPC_FEATURE_32, 0 },
367 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
368 { "ppc64", PPC_FEATURE_64, 0 },
369 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
370 { "smt", PPC_FEATURE_SMT, 0 },
371 { "spe", PPC_FEATURE_HAS_SPE, 0 },
372 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
373 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
374 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
375
376 /* AT_HWCAP2 masks. */
377 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
378 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
379 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
380 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
381 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
382 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
383 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
384 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
385 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
386 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
387 };
388
389 /* Newer LIBCs explicitly export this symbol to declare that they provide
390 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
391 reference to this symbol whenever we expand a CPU builtin, so that
392 we never link against an old LIBC. */
393 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
394
395 /* True if we have expanded a CPU builtin. */
396 bool cpu_builtin_p;
397
398 /* Pointer to function (in powerpcspe-c.c) that can define or undefine target
399 macros that have changed. Languages that don't support the preprocessor
400 don't link in powerpcspe-c.c, so we can't call it directly. */
401 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
402
403 /* Simplfy register classes into simpler classifications. We assume
404 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
405 check for standard register classes (gpr/floating/altivec/vsx) and
406 floating/vector classes (float/altivec/vsx). */
407
408 enum rs6000_reg_type {
409 NO_REG_TYPE,
410 PSEUDO_REG_TYPE,
411 GPR_REG_TYPE,
412 VSX_REG_TYPE,
413 ALTIVEC_REG_TYPE,
414 FPR_REG_TYPE,
415 SPR_REG_TYPE,
416 CR_REG_TYPE,
417 SPE_ACC_TYPE,
418 SPEFSCR_REG_TYPE
419 };
420
421 /* Map register class to register type. */
422 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
423
424 /* First/last register type for the 'normal' register types (i.e. general
425 purpose, floating point, altivec, and VSX registers). */
426 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
427
428 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
429
430
431 /* Register classes we care about in secondary reload or go if legitimate
432 address. We only need to worry about GPR, FPR, and Altivec registers here,
433 along an ANY field that is the OR of the 3 register classes. */
434
435 enum rs6000_reload_reg_type {
436 RELOAD_REG_GPR, /* General purpose registers. */
437 RELOAD_REG_FPR, /* Traditional floating point regs. */
438 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
439 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
440 N_RELOAD_REG
441 };
442
443 /* For setting up register classes, loop through the 3 register classes mapping
444 into real registers, and skip the ANY class, which is just an OR of the
445 bits. */
446 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
447 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
448
449 /* Map reload register type to a register in the register class. */
450 struct reload_reg_map_type {
451 const char *name; /* Register class name. */
452 int reg; /* Register in the register class. */
453 };
454
455 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
456 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
457 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
458 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
459 { "Any", -1 }, /* RELOAD_REG_ANY. */
460 };
461
462 /* Mask bits for each register class, indexed per mode. Historically the
463 compiler has been more restrictive which types can do PRE_MODIFY instead of
464 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
465 typedef unsigned char addr_mask_type;
466
467 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
468 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
469 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
470 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
471 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
472 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
473 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
474 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
475
476 /* Register type masks based on the type, of valid addressing modes. */
477 struct rs6000_reg_addr {
478 enum insn_code reload_load; /* INSN to reload for loading. */
479 enum insn_code reload_store; /* INSN to reload for storing. */
480 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
481 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
482 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
483 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
484 /* INSNs for fusing addi with loads
485 or stores for each reg. class. */
486 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
487 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
488 /* INSNs for fusing addis with loads
489 or stores for each reg. class. */
490 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
491 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
492 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
493 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
494 bool fused_toc; /* Mode supports TOC fusion. */
495 };
496
497 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
498
499 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
500 static inline bool
501 mode_supports_pre_incdec_p (machine_mode mode)
502 {
503 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
504 != 0);
505 }
506
507 /* Helper function to say whether a mode supports PRE_MODIFY. */
508 static inline bool
509 mode_supports_pre_modify_p (machine_mode mode)
510 {
511 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
512 != 0);
513 }
514
515 /* Given that there exists at least one variable that is set (produced)
516 by OUT_INSN and read (consumed) by IN_INSN, return true iff
517 IN_INSN represents one or more memory store operations and none of
518 the variables set by OUT_INSN is used by IN_INSN as the address of a
519 store operation. If either IN_INSN or OUT_INSN does not represent
520 a "single" RTL SET expression (as loosely defined by the
521 implementation of the single_set function) or a PARALLEL with only
522 SETs, CLOBBERs, and USEs inside, this function returns false.
523
524 This rs6000-specific version of store_data_bypass_p checks for
525 certain conditions that result in assertion failures (and internal
526 compiler errors) in the generic store_data_bypass_p function and
527 returns false rather than calling store_data_bypass_p if one of the
528 problematic conditions is detected. */
529
530 int
531 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
532 {
533 rtx out_set, in_set;
534 rtx out_pat, in_pat;
535 rtx out_exp, in_exp;
536 int i, j;
537
538 in_set = single_set (in_insn);
539 if (in_set)
540 {
541 if (MEM_P (SET_DEST (in_set)))
542 {
543 out_set = single_set (out_insn);
544 if (!out_set)
545 {
546 out_pat = PATTERN (out_insn);
547 if (GET_CODE (out_pat) == PARALLEL)
548 {
549 for (i = 0; i < XVECLEN (out_pat, 0); i++)
550 {
551 out_exp = XVECEXP (out_pat, 0, i);
552 if ((GET_CODE (out_exp) == CLOBBER)
553 || (GET_CODE (out_exp) == USE))
554 continue;
555 else if (GET_CODE (out_exp) != SET)
556 return false;
557 }
558 }
559 }
560 }
561 }
562 else
563 {
564 in_pat = PATTERN (in_insn);
565 if (GET_CODE (in_pat) != PARALLEL)
566 return false;
567
568 for (i = 0; i < XVECLEN (in_pat, 0); i++)
569 {
570 in_exp = XVECEXP (in_pat, 0, i);
571 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
572 continue;
573 else if (GET_CODE (in_exp) != SET)
574 return false;
575
576 if (MEM_P (SET_DEST (in_exp)))
577 {
578 out_set = single_set (out_insn);
579 if (!out_set)
580 {
581 out_pat = PATTERN (out_insn);
582 if (GET_CODE (out_pat) != PARALLEL)
583 return false;
584 for (j = 0; j < XVECLEN (out_pat, 0); j++)
585 {
586 out_exp = XVECEXP (out_pat, 0, j);
587 if ((GET_CODE (out_exp) == CLOBBER)
588 || (GET_CODE (out_exp) == USE))
589 continue;
590 else if (GET_CODE (out_exp) != SET)
591 return false;
592 }
593 }
594 }
595 }
596 }
597 return store_data_bypass_p (out_insn, in_insn);
598 }
599
600 /* Return true if we have D-form addressing in altivec registers. */
601 static inline bool
602 mode_supports_vmx_dform (machine_mode mode)
603 {
604 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
605 }
606
607 /* Return true if we have D-form addressing in VSX registers. This addressing
608 is more limited than normal d-form addressing in that the offset must be
609 aligned on a 16-byte boundary. */
610 static inline bool
611 mode_supports_vsx_dform_quad (machine_mode mode)
612 {
613 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
614 != 0);
615 }
616
617 \f
618 /* Target cpu costs. */
619
620 struct processor_costs {
621 const int mulsi; /* cost of SImode multiplication. */
622 const int mulsi_const; /* cost of SImode multiplication by constant. */
623 const int mulsi_const9; /* cost of SImode mult by short constant. */
624 const int muldi; /* cost of DImode multiplication. */
625 const int divsi; /* cost of SImode division. */
626 const int divdi; /* cost of DImode division. */
627 const int fp; /* cost of simple SFmode and DFmode insns. */
628 const int dmul; /* cost of DFmode multiplication (and fmadd). */
629 const int sdiv; /* cost of SFmode division (fdivs). */
630 const int ddiv; /* cost of DFmode division (fdiv). */
631 const int cache_line_size; /* cache line size in bytes. */
632 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
633 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
634 const int simultaneous_prefetches; /* number of parallel prefetch
635 operations. */
636 const int sfdf_convert; /* cost of SF->DF conversion. */
637 };
638
639 const struct processor_costs *rs6000_cost;
640
641 /* Processor costs (relative to an add) */
642
643 /* Instruction size costs on 32bit processors. */
644 static const
645 struct processor_costs size32_cost = {
646 COSTS_N_INSNS (1), /* mulsi */
647 COSTS_N_INSNS (1), /* mulsi_const */
648 COSTS_N_INSNS (1), /* mulsi_const9 */
649 COSTS_N_INSNS (1), /* muldi */
650 COSTS_N_INSNS (1), /* divsi */
651 COSTS_N_INSNS (1), /* divdi */
652 COSTS_N_INSNS (1), /* fp */
653 COSTS_N_INSNS (1), /* dmul */
654 COSTS_N_INSNS (1), /* sdiv */
655 COSTS_N_INSNS (1), /* ddiv */
656 32, /* cache line size */
657 0, /* l1 cache */
658 0, /* l2 cache */
659 0, /* streams */
660 0, /* SF->DF convert */
661 };
662
663 /* Instruction size costs on 64bit processors. */
664 static const
665 struct processor_costs size64_cost = {
666 COSTS_N_INSNS (1), /* mulsi */
667 COSTS_N_INSNS (1), /* mulsi_const */
668 COSTS_N_INSNS (1), /* mulsi_const9 */
669 COSTS_N_INSNS (1), /* muldi */
670 COSTS_N_INSNS (1), /* divsi */
671 COSTS_N_INSNS (1), /* divdi */
672 COSTS_N_INSNS (1), /* fp */
673 COSTS_N_INSNS (1), /* dmul */
674 COSTS_N_INSNS (1), /* sdiv */
675 COSTS_N_INSNS (1), /* ddiv */
676 128, /* cache line size */
677 0, /* l1 cache */
678 0, /* l2 cache */
679 0, /* streams */
680 0, /* SF->DF convert */
681 };
682
683 /* Instruction costs on RS64A processors. */
684 static const
685 struct processor_costs rs64a_cost = {
686 COSTS_N_INSNS (20), /* mulsi */
687 COSTS_N_INSNS (12), /* mulsi_const */
688 COSTS_N_INSNS (8), /* mulsi_const9 */
689 COSTS_N_INSNS (34), /* muldi */
690 COSTS_N_INSNS (65), /* divsi */
691 COSTS_N_INSNS (67), /* divdi */
692 COSTS_N_INSNS (4), /* fp */
693 COSTS_N_INSNS (4), /* dmul */
694 COSTS_N_INSNS (31), /* sdiv */
695 COSTS_N_INSNS (31), /* ddiv */
696 128, /* cache line size */
697 128, /* l1 cache */
698 2048, /* l2 cache */
699 1, /* streams */
700 0, /* SF->DF convert */
701 };
702
703 /* Instruction costs on MPCCORE processors. */
704 static const
705 struct processor_costs mpccore_cost = {
706 COSTS_N_INSNS (2), /* mulsi */
707 COSTS_N_INSNS (2), /* mulsi_const */
708 COSTS_N_INSNS (2), /* mulsi_const9 */
709 COSTS_N_INSNS (2), /* muldi */
710 COSTS_N_INSNS (6), /* divsi */
711 COSTS_N_INSNS (6), /* divdi */
712 COSTS_N_INSNS (4), /* fp */
713 COSTS_N_INSNS (5), /* dmul */
714 COSTS_N_INSNS (10), /* sdiv */
715 COSTS_N_INSNS (17), /* ddiv */
716 32, /* cache line size */
717 4, /* l1 cache */
718 16, /* l2 cache */
719 1, /* streams */
720 0, /* SF->DF convert */
721 };
722
723 /* Instruction costs on PPC403 processors. */
724 static const
725 struct processor_costs ppc403_cost = {
726 COSTS_N_INSNS (4), /* mulsi */
727 COSTS_N_INSNS (4), /* mulsi_const */
728 COSTS_N_INSNS (4), /* mulsi_const9 */
729 COSTS_N_INSNS (4), /* muldi */
730 COSTS_N_INSNS (33), /* divsi */
731 COSTS_N_INSNS (33), /* divdi */
732 COSTS_N_INSNS (11), /* fp */
733 COSTS_N_INSNS (11), /* dmul */
734 COSTS_N_INSNS (11), /* sdiv */
735 COSTS_N_INSNS (11), /* ddiv */
736 32, /* cache line size */
737 4, /* l1 cache */
738 16, /* l2 cache */
739 1, /* streams */
740 0, /* SF->DF convert */
741 };
742
743 /* Instruction costs on PPC405 processors. */
744 static const
745 struct processor_costs ppc405_cost = {
746 COSTS_N_INSNS (5), /* mulsi */
747 COSTS_N_INSNS (4), /* mulsi_const */
748 COSTS_N_INSNS (3), /* mulsi_const9 */
749 COSTS_N_INSNS (5), /* muldi */
750 COSTS_N_INSNS (35), /* divsi */
751 COSTS_N_INSNS (35), /* divdi */
752 COSTS_N_INSNS (11), /* fp */
753 COSTS_N_INSNS (11), /* dmul */
754 COSTS_N_INSNS (11), /* sdiv */
755 COSTS_N_INSNS (11), /* ddiv */
756 32, /* cache line size */
757 16, /* l1 cache */
758 128, /* l2 cache */
759 1, /* streams */
760 0, /* SF->DF convert */
761 };
762
763 /* Instruction costs on PPC440 processors. */
764 static const
765 struct processor_costs ppc440_cost = {
766 COSTS_N_INSNS (3), /* mulsi */
767 COSTS_N_INSNS (2), /* mulsi_const */
768 COSTS_N_INSNS (2), /* mulsi_const9 */
769 COSTS_N_INSNS (3), /* muldi */
770 COSTS_N_INSNS (34), /* divsi */
771 COSTS_N_INSNS (34), /* divdi */
772 COSTS_N_INSNS (5), /* fp */
773 COSTS_N_INSNS (5), /* dmul */
774 COSTS_N_INSNS (19), /* sdiv */
775 COSTS_N_INSNS (33), /* ddiv */
776 32, /* cache line size */
777 32, /* l1 cache */
778 256, /* l2 cache */
779 1, /* streams */
780 0, /* SF->DF convert */
781 };
782
783 /* Instruction costs on PPC476 processors. */
784 static const
785 struct processor_costs ppc476_cost = {
786 COSTS_N_INSNS (4), /* mulsi */
787 COSTS_N_INSNS (4), /* mulsi_const */
788 COSTS_N_INSNS (4), /* mulsi_const9 */
789 COSTS_N_INSNS (4), /* muldi */
790 COSTS_N_INSNS (11), /* divsi */
791 COSTS_N_INSNS (11), /* divdi */
792 COSTS_N_INSNS (6), /* fp */
793 COSTS_N_INSNS (6), /* dmul */
794 COSTS_N_INSNS (19), /* sdiv */
795 COSTS_N_INSNS (33), /* ddiv */
796 32, /* l1 cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 1, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC601 processors. */
804 static const
805 struct processor_costs ppc601_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (5), /* mulsi_const */
808 COSTS_N_INSNS (5), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (36), /* divsi */
811 COSTS_N_INSNS (36), /* divdi */
812 COSTS_N_INSNS (4), /* fp */
813 COSTS_N_INSNS (5), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 256, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC603 processors. */
824 static const
825 struct processor_costs ppc603_cost = {
826 COSTS_N_INSNS (5), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (2), /* mulsi_const9 */
829 COSTS_N_INSNS (5), /* muldi */
830 COSTS_N_INSNS (37), /* divsi */
831 COSTS_N_INSNS (37), /* divdi */
832 COSTS_N_INSNS (3), /* fp */
833 COSTS_N_INSNS (4), /* dmul */
834 COSTS_N_INSNS (18), /* sdiv */
835 COSTS_N_INSNS (33), /* ddiv */
836 32, /* cache line size */
837 8, /* l1 cache */
838 64, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC604 processors. */
844 static const
845 struct processor_costs ppc604_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (20), /* divsi */
851 COSTS_N_INSNS (20), /* divdi */
852 COSTS_N_INSNS (3), /* fp */
853 COSTS_N_INSNS (3), /* dmul */
854 COSTS_N_INSNS (18), /* sdiv */
855 COSTS_N_INSNS (32), /* ddiv */
856 32, /* cache line size */
857 16, /* l1 cache */
858 512, /* l2 cache */
859 1, /* streams */
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on PPC604e processors. */
864 static const
865 struct processor_costs ppc604e_cost = {
866 COSTS_N_INSNS (2), /* mulsi */
867 COSTS_N_INSNS (2), /* mulsi_const */
868 COSTS_N_INSNS (2), /* mulsi_const9 */
869 COSTS_N_INSNS (2), /* muldi */
870 COSTS_N_INSNS (20), /* divsi */
871 COSTS_N_INSNS (20), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (3), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (32), /* ddiv */
876 32, /* cache line size */
877 32, /* l1 cache */
878 1024, /* l2 cache */
879 1, /* streams */
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPC620 processors. */
884 static const
885 struct processor_costs ppc620_cost = {
886 COSTS_N_INSNS (5), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (3), /* mulsi_const9 */
889 COSTS_N_INSNS (7), /* muldi */
890 COSTS_N_INSNS (21), /* divsi */
891 COSTS_N_INSNS (37), /* divdi */
892 COSTS_N_INSNS (3), /* fp */
893 COSTS_N_INSNS (3), /* dmul */
894 COSTS_N_INSNS (18), /* sdiv */
895 COSTS_N_INSNS (32), /* ddiv */
896 128, /* cache line size */
897 32, /* l1 cache */
898 1024, /* l2 cache */
899 1, /* streams */
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPC630 processors. */
904 static const
905 struct processor_costs ppc630_cost = {
906 COSTS_N_INSNS (5), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (3), /* mulsi_const9 */
909 COSTS_N_INSNS (7), /* muldi */
910 COSTS_N_INSNS (21), /* divsi */
911 COSTS_N_INSNS (37), /* divdi */
912 COSTS_N_INSNS (3), /* fp */
913 COSTS_N_INSNS (3), /* dmul */
914 COSTS_N_INSNS (17), /* sdiv */
915 COSTS_N_INSNS (21), /* ddiv */
916 128, /* cache line size */
917 64, /* l1 cache */
918 1024, /* l2 cache */
919 1, /* streams */
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on Cell processor. */
924 /* COSTS_N_INSNS (1) ~ one add. */
925 static const
926 struct processor_costs ppccell_cost = {
927 COSTS_N_INSNS (9/2)+2, /* mulsi */
928 COSTS_N_INSNS (6/2), /* mulsi_const */
929 COSTS_N_INSNS (6/2), /* mulsi_const9 */
930 COSTS_N_INSNS (15/2)+2, /* muldi */
931 COSTS_N_INSNS (38/2), /* divsi */
932 COSTS_N_INSNS (70/2), /* divdi */
933 COSTS_N_INSNS (10/2), /* fp */
934 COSTS_N_INSNS (10/2), /* dmul */
935 COSTS_N_INSNS (74/2), /* sdiv */
936 COSTS_N_INSNS (74/2), /* ddiv */
937 128, /* cache line size */
938 32, /* l1 cache */
939 512, /* l2 cache */
940 6, /* streams */
941 0, /* SF->DF convert */
942 };
943
944 /* Instruction costs on PPC750 and PPC7400 processors. */
945 static const
946 struct processor_costs ppc750_cost = {
947 COSTS_N_INSNS (5), /* mulsi */
948 COSTS_N_INSNS (3), /* mulsi_const */
949 COSTS_N_INSNS (2), /* mulsi_const9 */
950 COSTS_N_INSNS (5), /* muldi */
951 COSTS_N_INSNS (17), /* divsi */
952 COSTS_N_INSNS (17), /* divdi */
953 COSTS_N_INSNS (3), /* fp */
954 COSTS_N_INSNS (3), /* dmul */
955 COSTS_N_INSNS (17), /* sdiv */
956 COSTS_N_INSNS (31), /* ddiv */
957 32, /* cache line size */
958 32, /* l1 cache */
959 512, /* l2 cache */
960 1, /* streams */
961 0, /* SF->DF convert */
962 };
963
964 /* Instruction costs on PPC7450 processors. */
965 static const
966 struct processor_costs ppc7450_cost = {
967 COSTS_N_INSNS (4), /* mulsi */
968 COSTS_N_INSNS (3), /* mulsi_const */
969 COSTS_N_INSNS (3), /* mulsi_const9 */
970 COSTS_N_INSNS (4), /* muldi */
971 COSTS_N_INSNS (23), /* divsi */
972 COSTS_N_INSNS (23), /* divdi */
973 COSTS_N_INSNS (5), /* fp */
974 COSTS_N_INSNS (5), /* dmul */
975 COSTS_N_INSNS (21), /* sdiv */
976 COSTS_N_INSNS (35), /* ddiv */
977 32, /* cache line size */
978 32, /* l1 cache */
979 1024, /* l2 cache */
980 1, /* streams */
981 0, /* SF->DF convert */
982 };
983
984 /* Instruction costs on PPC8540 processors. */
985 static const
986 struct processor_costs ppc8540_cost = {
987 COSTS_N_INSNS (4), /* mulsi */
988 COSTS_N_INSNS (4), /* mulsi_const */
989 COSTS_N_INSNS (4), /* mulsi_const9 */
990 COSTS_N_INSNS (4), /* muldi */
991 COSTS_N_INSNS (19), /* divsi */
992 COSTS_N_INSNS (19), /* divdi */
993 COSTS_N_INSNS (4), /* fp */
994 COSTS_N_INSNS (4), /* dmul */
995 COSTS_N_INSNS (29), /* sdiv */
996 COSTS_N_INSNS (29), /* ddiv */
997 32, /* cache line size */
998 32, /* l1 cache */
999 256, /* l2 cache */
1000 1, /* prefetch streams /*/
1001 0, /* SF->DF convert */
1002 };
1003
1004 /* Instruction costs on E300C2 and E300C3 cores. */
1005 static const
1006 struct processor_costs ppce300c2c3_cost = {
1007 COSTS_N_INSNS (4), /* mulsi */
1008 COSTS_N_INSNS (4), /* mulsi_const */
1009 COSTS_N_INSNS (4), /* mulsi_const9 */
1010 COSTS_N_INSNS (4), /* muldi */
1011 COSTS_N_INSNS (19), /* divsi */
1012 COSTS_N_INSNS (19), /* divdi */
1013 COSTS_N_INSNS (3), /* fp */
1014 COSTS_N_INSNS (4), /* dmul */
1015 COSTS_N_INSNS (18), /* sdiv */
1016 COSTS_N_INSNS (33), /* ddiv */
1017 32,
1018 16, /* l1 cache */
1019 16, /* l2 cache */
1020 1, /* prefetch streams /*/
1021 0, /* SF->DF convert */
1022 };
1023
1024 /* Instruction costs on PPCE500MC processors. */
1025 static const
1026 struct processor_costs ppce500mc_cost = {
1027 COSTS_N_INSNS (4), /* mulsi */
1028 COSTS_N_INSNS (4), /* mulsi_const */
1029 COSTS_N_INSNS (4), /* mulsi_const9 */
1030 COSTS_N_INSNS (4), /* muldi */
1031 COSTS_N_INSNS (14), /* divsi */
1032 COSTS_N_INSNS (14), /* divdi */
1033 COSTS_N_INSNS (8), /* fp */
1034 COSTS_N_INSNS (10), /* dmul */
1035 COSTS_N_INSNS (36), /* sdiv */
1036 COSTS_N_INSNS (66), /* ddiv */
1037 64, /* cache line size */
1038 32, /* l1 cache */
1039 128, /* l2 cache */
1040 1, /* prefetch streams /*/
1041 0, /* SF->DF convert */
1042 };
1043
1044 /* Instruction costs on PPCE500MC64 processors. */
1045 static const
1046 struct processor_costs ppce500mc64_cost = {
1047 COSTS_N_INSNS (4), /* mulsi */
1048 COSTS_N_INSNS (4), /* mulsi_const */
1049 COSTS_N_INSNS (4), /* mulsi_const9 */
1050 COSTS_N_INSNS (4), /* muldi */
1051 COSTS_N_INSNS (14), /* divsi */
1052 COSTS_N_INSNS (14), /* divdi */
1053 COSTS_N_INSNS (4), /* fp */
1054 COSTS_N_INSNS (10), /* dmul */
1055 COSTS_N_INSNS (36), /* sdiv */
1056 COSTS_N_INSNS (66), /* ddiv */
1057 64, /* cache line size */
1058 32, /* l1 cache */
1059 128, /* l2 cache */
1060 1, /* prefetch streams /*/
1061 0, /* SF->DF convert */
1062 };
1063
1064 /* Instruction costs on PPCE5500 processors. */
1065 static const
1066 struct processor_costs ppce5500_cost = {
1067 COSTS_N_INSNS (5), /* mulsi */
1068 COSTS_N_INSNS (5), /* mulsi_const */
1069 COSTS_N_INSNS (4), /* mulsi_const9 */
1070 COSTS_N_INSNS (5), /* muldi */
1071 COSTS_N_INSNS (14), /* divsi */
1072 COSTS_N_INSNS (14), /* divdi */
1073 COSTS_N_INSNS (7), /* fp */
1074 COSTS_N_INSNS (10), /* dmul */
1075 COSTS_N_INSNS (36), /* sdiv */
1076 COSTS_N_INSNS (66), /* ddiv */
1077 64, /* cache line size */
1078 32, /* l1 cache */
1079 128, /* l2 cache */
1080 1, /* prefetch streams /*/
1081 0, /* SF->DF convert */
1082 };
1083
1084 /* Instruction costs on PPCE6500 processors. */
1085 static const
1086 struct processor_costs ppce6500_cost = {
1087 COSTS_N_INSNS (5), /* mulsi */
1088 COSTS_N_INSNS (5), /* mulsi_const */
1089 COSTS_N_INSNS (4), /* mulsi_const9 */
1090 COSTS_N_INSNS (5), /* muldi */
1091 COSTS_N_INSNS (14), /* divsi */
1092 COSTS_N_INSNS (14), /* divdi */
1093 COSTS_N_INSNS (7), /* fp */
1094 COSTS_N_INSNS (10), /* dmul */
1095 COSTS_N_INSNS (36), /* sdiv */
1096 COSTS_N_INSNS (66), /* ddiv */
1097 64, /* cache line size */
1098 32, /* l1 cache */
1099 128, /* l2 cache */
1100 1, /* prefetch streams /*/
1101 0, /* SF->DF convert */
1102 };
1103
1104 /* Instruction costs on AppliedMicro Titan processors. */
1105 static const
1106 struct processor_costs titan_cost = {
1107 COSTS_N_INSNS (5), /* mulsi */
1108 COSTS_N_INSNS (5), /* mulsi_const */
1109 COSTS_N_INSNS (5), /* mulsi_const9 */
1110 COSTS_N_INSNS (5), /* muldi */
1111 COSTS_N_INSNS (18), /* divsi */
1112 COSTS_N_INSNS (18), /* divdi */
1113 COSTS_N_INSNS (10), /* fp */
1114 COSTS_N_INSNS (10), /* dmul */
1115 COSTS_N_INSNS (46), /* sdiv */
1116 COSTS_N_INSNS (72), /* ddiv */
1117 32, /* cache line size */
1118 32, /* l1 cache */
1119 512, /* l2 cache */
1120 1, /* prefetch streams /*/
1121 0, /* SF->DF convert */
1122 };
1123
1124 /* Instruction costs on POWER4 and POWER5 processors. */
1125 static const
1126 struct processor_costs power4_cost = {
1127 COSTS_N_INSNS (3), /* mulsi */
1128 COSTS_N_INSNS (2), /* mulsi_const */
1129 COSTS_N_INSNS (2), /* mulsi_const9 */
1130 COSTS_N_INSNS (4), /* muldi */
1131 COSTS_N_INSNS (18), /* divsi */
1132 COSTS_N_INSNS (34), /* divdi */
1133 COSTS_N_INSNS (3), /* fp */
1134 COSTS_N_INSNS (3), /* dmul */
1135 COSTS_N_INSNS (17), /* sdiv */
1136 COSTS_N_INSNS (17), /* ddiv */
1137 128, /* cache line size */
1138 32, /* l1 cache */
1139 1024, /* l2 cache */
1140 8, /* prefetch streams /*/
1141 0, /* SF->DF convert */
1142 };
1143
1144 /* Instruction costs on POWER6 processors. */
1145 static const
1146 struct processor_costs power6_cost = {
1147 COSTS_N_INSNS (8), /* mulsi */
1148 COSTS_N_INSNS (8), /* mulsi_const */
1149 COSTS_N_INSNS (8), /* mulsi_const9 */
1150 COSTS_N_INSNS (8), /* muldi */
1151 COSTS_N_INSNS (22), /* divsi */
1152 COSTS_N_INSNS (28), /* divdi */
1153 COSTS_N_INSNS (3), /* fp */
1154 COSTS_N_INSNS (3), /* dmul */
1155 COSTS_N_INSNS (13), /* sdiv */
1156 COSTS_N_INSNS (16), /* ddiv */
1157 128, /* cache line size */
1158 64, /* l1 cache */
1159 2048, /* l2 cache */
1160 16, /* prefetch streams */
1161 0, /* SF->DF convert */
1162 };
1163
1164 /* Instruction costs on POWER7 processors. */
1165 static const
1166 struct processor_costs power7_cost = {
1167 COSTS_N_INSNS (2), /* mulsi */
1168 COSTS_N_INSNS (2), /* mulsi_const */
1169 COSTS_N_INSNS (2), /* mulsi_const9 */
1170 COSTS_N_INSNS (2), /* muldi */
1171 COSTS_N_INSNS (18), /* divsi */
1172 COSTS_N_INSNS (34), /* divdi */
1173 COSTS_N_INSNS (3), /* fp */
1174 COSTS_N_INSNS (3), /* dmul */
1175 COSTS_N_INSNS (13), /* sdiv */
1176 COSTS_N_INSNS (16), /* ddiv */
1177 128, /* cache line size */
1178 32, /* l1 cache */
1179 256, /* l2 cache */
1180 12, /* prefetch streams */
1181 COSTS_N_INSNS (3), /* SF->DF convert */
1182 };
1183
1184 /* Instruction costs on POWER8 processors. */
1185 static const
1186 struct processor_costs power8_cost = {
1187 COSTS_N_INSNS (3), /* mulsi */
1188 COSTS_N_INSNS (3), /* mulsi_const */
1189 COSTS_N_INSNS (3), /* mulsi_const9 */
1190 COSTS_N_INSNS (3), /* muldi */
1191 COSTS_N_INSNS (19), /* divsi */
1192 COSTS_N_INSNS (35), /* divdi */
1193 COSTS_N_INSNS (3), /* fp */
1194 COSTS_N_INSNS (3), /* dmul */
1195 COSTS_N_INSNS (14), /* sdiv */
1196 COSTS_N_INSNS (17), /* ddiv */
1197 128, /* cache line size */
1198 32, /* l1 cache */
1199 256, /* l2 cache */
1200 12, /* prefetch streams */
1201 COSTS_N_INSNS (3), /* SF->DF convert */
1202 };
1203
1204 /* Instruction costs on POWER9 processors. */
1205 static const
1206 struct processor_costs power9_cost = {
1207 COSTS_N_INSNS (3), /* mulsi */
1208 COSTS_N_INSNS (3), /* mulsi_const */
1209 COSTS_N_INSNS (3), /* mulsi_const9 */
1210 COSTS_N_INSNS (3), /* muldi */
1211 COSTS_N_INSNS (8), /* divsi */
1212 COSTS_N_INSNS (12), /* divdi */
1213 COSTS_N_INSNS (3), /* fp */
1214 COSTS_N_INSNS (3), /* dmul */
1215 COSTS_N_INSNS (13), /* sdiv */
1216 COSTS_N_INSNS (18), /* ddiv */
1217 128, /* cache line size */
1218 32, /* l1 cache */
1219 512, /* l2 cache */
1220 8, /* prefetch streams */
1221 COSTS_N_INSNS (3), /* SF->DF convert */
1222 };
1223
1224 /* Instruction costs on POWER A2 processors. */
1225 static const
1226 struct processor_costs ppca2_cost = {
1227 COSTS_N_INSNS (16), /* mulsi */
1228 COSTS_N_INSNS (16), /* mulsi_const */
1229 COSTS_N_INSNS (16), /* mulsi_const9 */
1230 COSTS_N_INSNS (16), /* muldi */
1231 COSTS_N_INSNS (22), /* divsi */
1232 COSTS_N_INSNS (28), /* divdi */
1233 COSTS_N_INSNS (3), /* fp */
1234 COSTS_N_INSNS (3), /* dmul */
1235 COSTS_N_INSNS (59), /* sdiv */
1236 COSTS_N_INSNS (72), /* ddiv */
1237 64,
1238 16, /* l1 cache */
1239 2048, /* l2 cache */
1240 16, /* prefetch streams */
1241 0, /* SF->DF convert */
1242 };
1243
1244 \f
1245 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1246 #undef RS6000_BUILTIN_0
1247 #undef RS6000_BUILTIN_1
1248 #undef RS6000_BUILTIN_2
1249 #undef RS6000_BUILTIN_3
1250 #undef RS6000_BUILTIN_A
1251 #undef RS6000_BUILTIN_D
1252 #undef RS6000_BUILTIN_E
1253 #undef RS6000_BUILTIN_H
1254 #undef RS6000_BUILTIN_P
1255 #undef RS6000_BUILTIN_Q
1256 #undef RS6000_BUILTIN_S
1257 #undef RS6000_BUILTIN_X
1258
1259 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1260 { NAME, ICODE, MASK, ATTR },
1261
1262 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1263 { NAME, ICODE, MASK, ATTR },
1264
1265 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1266 { NAME, ICODE, MASK, ATTR },
1267
1268 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1269 { NAME, ICODE, MASK, ATTR },
1270
1271 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1272 { NAME, ICODE, MASK, ATTR },
1273
1274 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1275 { NAME, ICODE, MASK, ATTR },
1276
1277 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1278 { NAME, ICODE, MASK, ATTR },
1279
1280 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1281 { NAME, ICODE, MASK, ATTR },
1282
1283 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1284 { NAME, ICODE, MASK, ATTR },
1285
1286 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1288
1289 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1291
1292 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1294
1295 struct rs6000_builtin_info_type {
1296 const char *name;
1297 const enum insn_code icode;
1298 const HOST_WIDE_INT mask;
1299 const unsigned attr;
1300 };
1301
1302 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1303 {
1304 #include "powerpcspe-builtin.def"
1305 };
1306
1307 #undef RS6000_BUILTIN_0
1308 #undef RS6000_BUILTIN_1
1309 #undef RS6000_BUILTIN_2
1310 #undef RS6000_BUILTIN_3
1311 #undef RS6000_BUILTIN_A
1312 #undef RS6000_BUILTIN_D
1313 #undef RS6000_BUILTIN_E
1314 #undef RS6000_BUILTIN_H
1315 #undef RS6000_BUILTIN_P
1316 #undef RS6000_BUILTIN_Q
1317 #undef RS6000_BUILTIN_S
1318 #undef RS6000_BUILTIN_X
1319
1320 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1321 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1322
1323 \f
1324 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1325 static bool spe_func_has_64bit_regs_p (void);
1326 static struct machine_function * rs6000_init_machine_status (void);
1327 static int rs6000_ra_ever_killed (void);
1328 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1329 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1330 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1331 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1332 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1333 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1334 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1335 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1336 bool);
1337 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1338 unsigned int);
1339 static bool is_microcoded_insn (rtx_insn *);
1340 static bool is_nonpipeline_insn (rtx_insn *);
1341 static bool is_cracked_insn (rtx_insn *);
1342 static bool is_load_insn (rtx, rtx *);
1343 static bool is_store_insn (rtx, rtx *);
1344 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1345 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1346 static bool insn_must_be_first_in_group (rtx_insn *);
1347 static bool insn_must_be_last_in_group (rtx_insn *);
1348 static void altivec_init_builtins (void);
1349 static tree builtin_function_type (machine_mode, machine_mode,
1350 machine_mode, machine_mode,
1351 enum rs6000_builtins, const char *name);
1352 static void rs6000_common_init_builtins (void);
1353 static void paired_init_builtins (void);
1354 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1355 static void spe_init_builtins (void);
1356 static void htm_init_builtins (void);
1357 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1358 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1359 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1360 static rs6000_stack_t *rs6000_stack_info (void);
1361 static void is_altivec_return_reg (rtx, void *);
1362 int easy_vector_constant (rtx, machine_mode);
1363 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1364 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1365 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1366 bool, bool);
1367 #if TARGET_MACHO
1368 static void macho_branch_islands (void);
1369 #endif
1370 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1371 int, int *);
1372 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1373 int, int, int *);
1374 static bool rs6000_mode_dependent_address (const_rtx);
1375 static bool rs6000_debug_mode_dependent_address (const_rtx);
1376 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1377 machine_mode, rtx);
1378 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1379 machine_mode,
1380 rtx);
1381 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1382 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1383 enum reg_class);
1384 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1385 machine_mode);
1386 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1387 enum reg_class,
1388 machine_mode);
1389 static bool rs6000_cannot_change_mode_class (machine_mode,
1390 machine_mode,
1391 enum reg_class);
1392 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1393 machine_mode,
1394 enum reg_class);
1395 static bool rs6000_save_toc_in_prologue_p (void);
1396 static rtx rs6000_internal_arg_pointer (void);
1397
1398 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1399 int, int *)
1400 = rs6000_legitimize_reload_address;
1401
1402 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1403 = rs6000_mode_dependent_address;
1404
1405 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1406 machine_mode, rtx)
1407 = rs6000_secondary_reload_class;
1408
1409 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1410 = rs6000_preferred_reload_class;
1411
1412 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1413 machine_mode)
1414 = rs6000_secondary_memory_needed;
1415
1416 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1417 machine_mode,
1418 enum reg_class)
1419 = rs6000_cannot_change_mode_class;
1420
1421 const int INSN_NOT_AVAILABLE = -1;
1422
1423 static void rs6000_print_isa_options (FILE *, int, const char *,
1424 HOST_WIDE_INT);
1425 static void rs6000_print_builtin_options (FILE *, int, const char *,
1426 HOST_WIDE_INT);
1427 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1428
1429 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1430 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1431 enum rs6000_reg_type,
1432 machine_mode,
1433 secondary_reload_info *,
1434 bool);
1435 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1436 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1437 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1438
1439 /* Hash table stuff for keeping track of TOC entries. */
1440
1441 struct GTY((for_user)) toc_hash_struct
1442 {
1443 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1444 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1445 rtx key;
1446 machine_mode key_mode;
1447 int labelno;
1448 };
1449
1450 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1451 {
1452 static hashval_t hash (toc_hash_struct *);
1453 static bool equal (toc_hash_struct *, toc_hash_struct *);
1454 };
1455
1456 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1457
1458 /* Hash table to keep track of the argument types for builtin functions. */
1459
1460 struct GTY((for_user)) builtin_hash_struct
1461 {
1462 tree type;
1463 machine_mode mode[4]; /* return value + 3 arguments. */
1464 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1465 };
1466
1467 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1468 {
1469 static hashval_t hash (builtin_hash_struct *);
1470 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1471 };
1472
1473 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1474
1475 \f
1476 /* Default register names. */
1477 char rs6000_reg_names[][8] =
1478 {
1479 "0", "1", "2", "3", "4", "5", "6", "7",
1480 "8", "9", "10", "11", "12", "13", "14", "15",
1481 "16", "17", "18", "19", "20", "21", "22", "23",
1482 "24", "25", "26", "27", "28", "29", "30", "31",
1483 "0", "1", "2", "3", "4", "5", "6", "7",
1484 "8", "9", "10", "11", "12", "13", "14", "15",
1485 "16", "17", "18", "19", "20", "21", "22", "23",
1486 "24", "25", "26", "27", "28", "29", "30", "31",
1487 "mq", "lr", "ctr","ap",
1488 "0", "1", "2", "3", "4", "5", "6", "7",
1489 "ca",
1490 /* AltiVec registers. */
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "8", "9", "10", "11", "12", "13", "14", "15",
1493 "16", "17", "18", "19", "20", "21", "22", "23",
1494 "24", "25", "26", "27", "28", "29", "30", "31",
1495 "vrsave", "vscr",
1496 /* SPE registers. */
1497 "spe_acc", "spefscr",
1498 /* Soft frame pointer. */
1499 "sfp",
1500 /* HTM SPR registers. */
1501 "tfhar", "tfiar", "texasr",
1502 /* SPE High registers. */
1503 "0", "1", "2", "3", "4", "5", "6", "7",
1504 "8", "9", "10", "11", "12", "13", "14", "15",
1505 "16", "17", "18", "19", "20", "21", "22", "23",
1506 "24", "25", "26", "27", "28", "29", "30", "31"
1507 };
1508
1509 #ifdef TARGET_REGNAMES
1510 static const char alt_reg_names[][8] =
1511 {
1512 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1513 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1514 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1515 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1516 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1517 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1518 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1519 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1520 "mq", "lr", "ctr", "ap",
1521 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1522 "ca",
1523 /* AltiVec registers. */
1524 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1525 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1526 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1527 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1528 "vrsave", "vscr",
1529 /* SPE registers. */
1530 "spe_acc", "spefscr",
1531 /* Soft frame pointer. */
1532 "sfp",
1533 /* HTM SPR registers. */
1534 "tfhar", "tfiar", "texasr",
1535 /* SPE High registers. */
1536 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1537 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1538 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1539 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1540 };
1541 #endif
1542
1543 /* Table of valid machine attributes. */
1544
1545 static const struct attribute_spec rs6000_attribute_table[] =
1546 {
1547 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1548 affects_type_identity } */
1549 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1550 false },
1551 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1552 false },
1553 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1554 false },
1555 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1556 false },
1557 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1558 false },
1559 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1560 SUBTARGET_ATTRIBUTE_TABLE,
1561 #endif
1562 { NULL, 0, 0, false, false, false, NULL, false }
1563 };
1564 \f
1565 #ifndef TARGET_PROFILE_KERNEL
1566 #define TARGET_PROFILE_KERNEL 0
1567 #endif
1568
1569 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1570 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1571 \f
1572 /* Initialize the GCC target structure. */
1573 #undef TARGET_ATTRIBUTE_TABLE
1574 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1575 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1576 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1577 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1578 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1579
1580 #undef TARGET_ASM_ALIGNED_DI_OP
1581 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1582
1583 /* Default unaligned ops are only provided for ELF. Find the ops needed
1584 for non-ELF systems. */
1585 #ifndef OBJECT_FORMAT_ELF
1586 #if TARGET_XCOFF
1587 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1588 64-bit targets. */
1589 #undef TARGET_ASM_UNALIGNED_HI_OP
1590 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1591 #undef TARGET_ASM_UNALIGNED_SI_OP
1592 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1593 #undef TARGET_ASM_UNALIGNED_DI_OP
1594 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1595 #else
1596 /* For Darwin. */
1597 #undef TARGET_ASM_UNALIGNED_HI_OP
1598 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1599 #undef TARGET_ASM_UNALIGNED_SI_OP
1600 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1601 #undef TARGET_ASM_UNALIGNED_DI_OP
1602 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1603 #undef TARGET_ASM_ALIGNED_DI_OP
1604 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1605 #endif
1606 #endif
1607
1608 /* This hook deals with fixups for relocatable code and DI-mode objects
1609 in 64-bit code. */
1610 #undef TARGET_ASM_INTEGER
1611 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1612
1613 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1614 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1615 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1616 #endif
1617
1618 #undef TARGET_SET_UP_BY_PROLOGUE
1619 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1620
1621 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1623 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1624 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1625 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1626 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1627 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1628 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1629 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1630 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1631 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1632 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1633
1634 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1635 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1636
1637 #undef TARGET_INTERNAL_ARG_POINTER
1638 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1639
1640 #undef TARGET_HAVE_TLS
1641 #define TARGET_HAVE_TLS HAVE_AS_TLS
1642
1643 #undef TARGET_CANNOT_FORCE_CONST_MEM
1644 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1645
1646 #undef TARGET_DELEGITIMIZE_ADDRESS
1647 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1648
1649 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1650 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1651
1652 #undef TARGET_LEGITIMATE_COMBINED_INSN
1653 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1654
1655 #undef TARGET_ASM_FUNCTION_PROLOGUE
1656 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1657 #undef TARGET_ASM_FUNCTION_EPILOGUE
1658 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1659
1660 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1661 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1662
1663 #undef TARGET_LEGITIMIZE_ADDRESS
1664 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1665
1666 #undef TARGET_SCHED_VARIABLE_ISSUE
1667 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1668
1669 #undef TARGET_SCHED_ISSUE_RATE
1670 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1671 #undef TARGET_SCHED_ADJUST_COST
1672 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1673 #undef TARGET_SCHED_ADJUST_PRIORITY
1674 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1675 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1676 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1677 #undef TARGET_SCHED_INIT
1678 #define TARGET_SCHED_INIT rs6000_sched_init
1679 #undef TARGET_SCHED_FINISH
1680 #define TARGET_SCHED_FINISH rs6000_sched_finish
1681 #undef TARGET_SCHED_REORDER
1682 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1683 #undef TARGET_SCHED_REORDER2
1684 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1685
1686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1688
1689 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1690 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1691
1692 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1693 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1694 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1695 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1696 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1697 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1698 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1699 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1700
1701 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1702 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1703
1704 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1705 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1706 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1707 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1708 rs6000_builtin_support_vector_misalignment
1709 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1710 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1711 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1712 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1713 rs6000_builtin_vectorization_cost
1714 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1715 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1716 rs6000_preferred_simd_mode
1717 #undef TARGET_VECTORIZE_INIT_COST
1718 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1719 #undef TARGET_VECTORIZE_ADD_STMT_COST
1720 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1721 #undef TARGET_VECTORIZE_FINISH_COST
1722 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1723 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1724 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1725
1726 #undef TARGET_INIT_BUILTINS
1727 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1728 #undef TARGET_BUILTIN_DECL
1729 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1730
1731 #undef TARGET_FOLD_BUILTIN
1732 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1733 #undef TARGET_GIMPLE_FOLD_BUILTIN
1734 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1735
1736 #undef TARGET_EXPAND_BUILTIN
1737 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1738
1739 #undef TARGET_MANGLE_TYPE
1740 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1741
1742 #undef TARGET_INIT_LIBFUNCS
1743 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1744
1745 #if TARGET_MACHO
1746 #undef TARGET_BINDS_LOCAL_P
1747 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1748 #endif
1749
1750 #undef TARGET_MS_BITFIELD_LAYOUT_P
1751 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1752
1753 #undef TARGET_ASM_OUTPUT_MI_THUNK
1754 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1755
1756 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1757 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1758
1759 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1760 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1761
1762 #undef TARGET_REGISTER_MOVE_COST
1763 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1764 #undef TARGET_MEMORY_MOVE_COST
1765 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1766 #undef TARGET_CANNOT_COPY_INSN_P
1767 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1768 #undef TARGET_RTX_COSTS
1769 #define TARGET_RTX_COSTS rs6000_rtx_costs
1770 #undef TARGET_ADDRESS_COST
1771 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1772
1773 #undef TARGET_DWARF_REGISTER_SPAN
1774 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1775
1776 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1777 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1778
1779 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1780 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1781
1782 #undef TARGET_PROMOTE_FUNCTION_MODE
1783 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1784
1785 #undef TARGET_RETURN_IN_MEMORY
1786 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1787
1788 #undef TARGET_RETURN_IN_MSB
1789 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1790
1791 #undef TARGET_SETUP_INCOMING_VARARGS
1792 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1793
1794 /* Always strict argument naming on rs6000. */
1795 #undef TARGET_STRICT_ARGUMENT_NAMING
1796 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1797 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1798 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1799 #undef TARGET_SPLIT_COMPLEX_ARG
1800 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1801 #undef TARGET_MUST_PASS_IN_STACK
1802 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1803 #undef TARGET_PASS_BY_REFERENCE
1804 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1805 #undef TARGET_ARG_PARTIAL_BYTES
1806 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1807 #undef TARGET_FUNCTION_ARG_ADVANCE
1808 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1809 #undef TARGET_FUNCTION_ARG
1810 #define TARGET_FUNCTION_ARG rs6000_function_arg
1811 #undef TARGET_FUNCTION_ARG_BOUNDARY
1812 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1813
1814 #undef TARGET_BUILD_BUILTIN_VA_LIST
1815 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1816
1817 #undef TARGET_EXPAND_BUILTIN_VA_START
1818 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1819
1820 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1821 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1822
1823 #undef TARGET_EH_RETURN_FILTER_MODE
1824 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1825
1826 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1827 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1828
1829 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1830 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1831
1832 #undef TARGET_FLOATN_MODE
1833 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1834
1835 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1836 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1837
1838 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1839 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1840
1841 #undef TARGET_MD_ASM_ADJUST
1842 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1843
1844 #undef TARGET_OPTION_OVERRIDE
1845 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1846
1847 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1848 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1849 rs6000_builtin_vectorized_function
1850
1851 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1852 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1853 rs6000_builtin_md_vectorized_function
1854
1855 #undef TARGET_STACK_PROTECT_GUARD
1856 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1857
1858 #if !TARGET_MACHO
1859 #undef TARGET_STACK_PROTECT_FAIL
1860 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1861 #endif
1862
1863 #ifdef HAVE_AS_TLS
1864 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1865 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1866 #endif
1867
1868 /* Use a 32-bit anchor range. This leads to sequences like:
1869
1870 addis tmp,anchor,high
1871 add dest,tmp,low
1872
1873 where tmp itself acts as an anchor, and can be shared between
1874 accesses to the same 64k page. */
1875 #undef TARGET_MIN_ANCHOR_OFFSET
1876 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1877 #undef TARGET_MAX_ANCHOR_OFFSET
1878 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1879 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1880 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1881 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1882 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1883
1884 #undef TARGET_BUILTIN_RECIPROCAL
1885 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1886
1887 #undef TARGET_EXPAND_TO_RTL_HOOK
1888 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1889
1890 #undef TARGET_INSTANTIATE_DECLS
1891 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1892
1893 #undef TARGET_SECONDARY_RELOAD
1894 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1895
1896 #undef TARGET_LEGITIMATE_ADDRESS_P
1897 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1898
1899 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1900 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1901
1902 #undef TARGET_LRA_P
1903 #define TARGET_LRA_P rs6000_lra_p
1904
1905 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1906 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1907
1908 #undef TARGET_CAN_ELIMINATE
1909 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1910
1911 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1912 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1913
1914 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1915 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1916
1917 #undef TARGET_TRAMPOLINE_INIT
1918 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1919
1920 #undef TARGET_FUNCTION_VALUE
1921 #define TARGET_FUNCTION_VALUE rs6000_function_value
1922
1923 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1924 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1925
1926 #undef TARGET_OPTION_SAVE
1927 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1928
1929 #undef TARGET_OPTION_RESTORE
1930 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1931
1932 #undef TARGET_OPTION_PRINT
1933 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1934
1935 #undef TARGET_CAN_INLINE_P
1936 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1937
1938 #undef TARGET_SET_CURRENT_FUNCTION
1939 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1940
1941 #undef TARGET_LEGITIMATE_CONSTANT_P
1942 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1943
1944 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1945 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1946
1947 #undef TARGET_CAN_USE_DOLOOP_P
1948 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1949
1950 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1951 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1952
1953 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1954 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1955 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1956 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1957 #undef TARGET_UNWIND_WORD_MODE
1958 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1959
1960 #undef TARGET_OFFLOAD_OPTIONS
1961 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1962
1963 #undef TARGET_C_MODE_FOR_SUFFIX
1964 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1965
1966 #undef TARGET_INVALID_BINARY_OP
1967 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1968
1969 #undef TARGET_OPTAB_SUPPORTED_P
1970 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1971
1972 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1973 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1974 \f
1975
1976 /* Processor table. */
1977 struct rs6000_ptt
1978 {
1979 const char *const name; /* Canonical processor name. */
1980 const enum processor_type processor; /* Processor type enum value. */
1981 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1982 };
1983
1984 static struct rs6000_ptt const processor_target_table[] =
1985 {
1986 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1987 #include "powerpcspe-cpus.def"
1988 #undef RS6000_CPU
1989 };
1990
1991 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1992 name is invalid. */
1993
1994 static int
1995 rs6000_cpu_name_lookup (const char *name)
1996 {
1997 size_t i;
1998
1999 if (name != NULL)
2000 {
2001 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2002 if (! strcmp (name, processor_target_table[i].name))
2003 return (int)i;
2004 }
2005
2006 return -1;
2007 }
2008
2009 \f
2010 /* Return number of consecutive hard regs needed starting at reg REGNO
2011 to hold something of mode MODE.
2012 This is ordinarily the length in words of a value of mode MODE
2013 but can be less for certain modes in special long registers.
2014
2015 For the SPE, GPRs are 64 bits but only 32 bits are visible in
2016 scalar instructions. The upper 32 bits are only available to the
2017 SIMD instructions.
2018
2019 POWER and PowerPC GPRs hold 32 bits worth;
2020 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2021
2022 static int
2023 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2024 {
2025 unsigned HOST_WIDE_INT reg_size;
2026
2027 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2028 128-bit floating point that can go in vector registers, which has VSX
2029 memory addressing. */
2030 if (FP_REGNO_P (regno))
2031 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2032 ? UNITS_PER_VSX_WORD
2033 : UNITS_PER_FP_WORD);
2034
2035 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2036 reg_size = UNITS_PER_SPE_WORD;
2037
2038 else if (ALTIVEC_REGNO_P (regno))
2039 reg_size = UNITS_PER_ALTIVEC_WORD;
2040
2041 /* The value returned for SCmode in the E500 double case is 2 for
2042 ABI compatibility; storing an SCmode value in a single register
2043 would require function_arg and rs6000_spe_function_arg to handle
2044 SCmode so as to pass the value correctly in a pair of
2045 registers. */
2046 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
2047 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
2048 reg_size = UNITS_PER_FP_WORD;
2049
2050 else
2051 reg_size = UNITS_PER_WORD;
2052
2053 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2054 }
2055
2056 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2057 MODE. */
2058 static int
2059 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
2060 {
2061 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2062
2063 if (COMPLEX_MODE_P (mode))
2064 mode = GET_MODE_INNER (mode);
2065
2066 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2067 register combinations, and use PTImode where we need to deal with quad
2068 word memory operations. Don't allow quad words in the argument or frame
2069 pointer registers, just registers 0..31. */
2070 if (mode == PTImode)
2071 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2072 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2073 && ((regno & 1) == 0));
2074
2075 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2076 implementations. Don't allow an item to be split between a FP register
2077 and an Altivec register. Allow TImode in all VSX registers if the user
2078 asked for it. */
2079 if (TARGET_VSX && VSX_REGNO_P (regno)
2080 && (VECTOR_MEM_VSX_P (mode)
2081 || FLOAT128_VECTOR_P (mode)
2082 || reg_addr[mode].scalar_in_vmx_p
2083 || (TARGET_VSX_TIMODE && mode == TImode)
2084 || (TARGET_VADDUQM && mode == V1TImode)))
2085 {
2086 if (FP_REGNO_P (regno))
2087 return FP_REGNO_P (last_regno);
2088
2089 if (ALTIVEC_REGNO_P (regno))
2090 {
2091 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2092 return 0;
2093
2094 return ALTIVEC_REGNO_P (last_regno);
2095 }
2096 }
2097
2098 /* The GPRs can hold any mode, but values bigger than one register
2099 cannot go past R31. */
2100 if (INT_REGNO_P (regno))
2101 return INT_REGNO_P (last_regno);
2102
2103 /* The float registers (except for VSX vector modes) can only hold floating
2104 modes and DImode. */
2105 if (FP_REGNO_P (regno))
2106 {
2107 if (FLOAT128_VECTOR_P (mode))
2108 return false;
2109
2110 if (SCALAR_FLOAT_MODE_P (mode)
2111 && (mode != TDmode || (regno % 2) == 0)
2112 && FP_REGNO_P (last_regno))
2113 return 1;
2114
2115 if (GET_MODE_CLASS (mode) == MODE_INT)
2116 {
2117 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2118 return 1;
2119
2120 if (TARGET_VSX_SMALL_INTEGER)
2121 {
2122 if (mode == SImode)
2123 return 1;
2124
2125 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2126 return 1;
2127 }
2128 }
2129
2130 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2131 && PAIRED_VECTOR_MODE (mode))
2132 return 1;
2133
2134 return 0;
2135 }
2136
2137 /* The CR register can only hold CC modes. */
2138 if (CR_REGNO_P (regno))
2139 return GET_MODE_CLASS (mode) == MODE_CC;
2140
2141 if (CA_REGNO_P (regno))
2142 return mode == Pmode || mode == SImode;
2143
2144 /* AltiVec only in AldyVec registers. */
2145 if (ALTIVEC_REGNO_P (regno))
2146 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2147 || mode == V1TImode);
2148
2149 /* ...but GPRs can hold SIMD data on the SPE in one register. */
2150 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
2151 return 1;
2152
2153 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2154 and it must be able to fit within the register set. */
2155
2156 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2157 }
2158
2159 /* Print interesting facts about registers. */
2160 static void
2161 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2162 {
2163 int r, m;
2164
2165 for (r = first_regno; r <= last_regno; ++r)
2166 {
2167 const char *comma = "";
2168 int len;
2169
2170 if (first_regno == last_regno)
2171 fprintf (stderr, "%s:\t", reg_name);
2172 else
2173 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2174
2175 len = 8;
2176 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2177 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2178 {
2179 if (len > 70)
2180 {
2181 fprintf (stderr, ",\n\t");
2182 len = 8;
2183 comma = "";
2184 }
2185
2186 if (rs6000_hard_regno_nregs[m][r] > 1)
2187 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2188 rs6000_hard_regno_nregs[m][r]);
2189 else
2190 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2191
2192 comma = ", ";
2193 }
2194
2195 if (call_used_regs[r])
2196 {
2197 if (len > 70)
2198 {
2199 fprintf (stderr, ",\n\t");
2200 len = 8;
2201 comma = "";
2202 }
2203
2204 len += fprintf (stderr, "%s%s", comma, "call-used");
2205 comma = ", ";
2206 }
2207
2208 if (fixed_regs[r])
2209 {
2210 if (len > 70)
2211 {
2212 fprintf (stderr, ",\n\t");
2213 len = 8;
2214 comma = "";
2215 }
2216
2217 len += fprintf (stderr, "%s%s", comma, "fixed");
2218 comma = ", ";
2219 }
2220
2221 if (len > 70)
2222 {
2223 fprintf (stderr, ",\n\t");
2224 comma = "";
2225 }
2226
2227 len += fprintf (stderr, "%sreg-class = %s", comma,
2228 reg_class_names[(int)rs6000_regno_regclass[r]]);
2229 comma = ", ";
2230
2231 if (len > 70)
2232 {
2233 fprintf (stderr, ",\n\t");
2234 comma = "";
2235 }
2236
2237 fprintf (stderr, "%sregno = %d\n", comma, r);
2238 }
2239 }
2240
2241 static const char *
2242 rs6000_debug_vector_unit (enum rs6000_vector v)
2243 {
2244 const char *ret;
2245
2246 switch (v)
2247 {
2248 case VECTOR_NONE: ret = "none"; break;
2249 case VECTOR_ALTIVEC: ret = "altivec"; break;
2250 case VECTOR_VSX: ret = "vsx"; break;
2251 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2252 case VECTOR_PAIRED: ret = "paired"; break;
2253 case VECTOR_SPE: ret = "spe"; break;
2254 case VECTOR_OTHER: ret = "other"; break;
2255 default: ret = "unknown"; break;
2256 }
2257
2258 return ret;
2259 }
2260
2261 /* Inner function printing just the address mask for a particular reload
2262 register class. */
2263 DEBUG_FUNCTION char *
2264 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2265 {
2266 static char ret[8];
2267 char *p = ret;
2268
2269 if ((mask & RELOAD_REG_VALID) != 0)
2270 *p++ = 'v';
2271 else if (keep_spaces)
2272 *p++ = ' ';
2273
2274 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2275 *p++ = 'm';
2276 else if (keep_spaces)
2277 *p++ = ' ';
2278
2279 if ((mask & RELOAD_REG_INDEXED) != 0)
2280 *p++ = 'i';
2281 else if (keep_spaces)
2282 *p++ = ' ';
2283
2284 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2285 *p++ = 'O';
2286 else if ((mask & RELOAD_REG_OFFSET) != 0)
2287 *p++ = 'o';
2288 else if (keep_spaces)
2289 *p++ = ' ';
2290
2291 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2292 *p++ = '+';
2293 else if (keep_spaces)
2294 *p++ = ' ';
2295
2296 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2297 *p++ = '+';
2298 else if (keep_spaces)
2299 *p++ = ' ';
2300
2301 if ((mask & RELOAD_REG_AND_M16) != 0)
2302 *p++ = '&';
2303 else if (keep_spaces)
2304 *p++ = ' ';
2305
2306 *p = '\0';
2307
2308 return ret;
2309 }
2310
2311 /* Print the address masks in a human readble fashion. */
2312 DEBUG_FUNCTION void
2313 rs6000_debug_print_mode (ssize_t m)
2314 {
2315 ssize_t rc;
2316 int spaces = 0;
2317 bool fuse_extra_p;
2318
2319 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2320 for (rc = 0; rc < N_RELOAD_REG; rc++)
2321 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2322 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2323
2324 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2325 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2326 fprintf (stderr, " Reload=%c%c",
2327 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2328 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2329 else
2330 spaces += sizeof (" Reload=sl") - 1;
2331
2332 if (reg_addr[m].scalar_in_vmx_p)
2333 {
2334 fprintf (stderr, "%*s Upper=y", spaces, "");
2335 spaces = 0;
2336 }
2337 else
2338 spaces += sizeof (" Upper=y") - 1;
2339
2340 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2341 || reg_addr[m].fused_toc);
2342 if (!fuse_extra_p)
2343 {
2344 for (rc = 0; rc < N_RELOAD_REG; rc++)
2345 {
2346 if (rc != RELOAD_REG_ANY)
2347 {
2348 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2349 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2350 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2351 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2352 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2353 {
2354 fuse_extra_p = true;
2355 break;
2356 }
2357 }
2358 }
2359 }
2360
2361 if (fuse_extra_p)
2362 {
2363 fprintf (stderr, "%*s Fuse:", spaces, "");
2364 spaces = 0;
2365
2366 for (rc = 0; rc < N_RELOAD_REG; rc++)
2367 {
2368 if (rc != RELOAD_REG_ANY)
2369 {
2370 char load, store;
2371
2372 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2373 load = 'l';
2374 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2375 load = 'L';
2376 else
2377 load = '-';
2378
2379 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2380 store = 's';
2381 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2382 store = 'S';
2383 else
2384 store = '-';
2385
2386 if (load == '-' && store == '-')
2387 spaces += 5;
2388 else
2389 {
2390 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2391 reload_reg_map[rc].name[0], load, store);
2392 spaces = 0;
2393 }
2394 }
2395 }
2396
2397 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2398 {
2399 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2400 spaces = 0;
2401 }
2402 else
2403 spaces += sizeof (" P8gpr") - 1;
2404
2405 if (reg_addr[m].fused_toc)
2406 {
2407 fprintf (stderr, "%*sToc", (spaces + 1), "");
2408 spaces = 0;
2409 }
2410 else
2411 spaces += sizeof (" Toc") - 1;
2412 }
2413 else
2414 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2415
2416 if (rs6000_vector_unit[m] != VECTOR_NONE
2417 || rs6000_vector_mem[m] != VECTOR_NONE)
2418 {
2419 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2420 spaces, "",
2421 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2422 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2423 }
2424
2425 fputs ("\n", stderr);
2426 }
2427
2428 #define DEBUG_FMT_ID "%-32s= "
2429 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2430 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2431 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2432
2433 /* Print various interesting information with -mdebug=reg. */
2434 static void
2435 rs6000_debug_reg_global (void)
2436 {
2437 static const char *const tf[2] = { "false", "true" };
2438 const char *nl = (const char *)0;
2439 int m;
2440 size_t m1, m2, v;
2441 char costly_num[20];
2442 char nop_num[20];
2443 char flags_buffer[40];
2444 const char *costly_str;
2445 const char *nop_str;
2446 const char *trace_str;
2447 const char *abi_str;
2448 const char *cmodel_str;
2449 struct cl_target_option cl_opts;
2450
2451 /* Modes we want tieable information on. */
2452 static const machine_mode print_tieable_modes[] = {
2453 QImode,
2454 HImode,
2455 SImode,
2456 DImode,
2457 TImode,
2458 PTImode,
2459 SFmode,
2460 DFmode,
2461 TFmode,
2462 IFmode,
2463 KFmode,
2464 SDmode,
2465 DDmode,
2466 TDmode,
2467 V8QImode,
2468 V4HImode,
2469 V2SImode,
2470 V16QImode,
2471 V8HImode,
2472 V4SImode,
2473 V2DImode,
2474 V1TImode,
2475 V32QImode,
2476 V16HImode,
2477 V8SImode,
2478 V4DImode,
2479 V2TImode,
2480 V2SFmode,
2481 V4SFmode,
2482 V2DFmode,
2483 V8SFmode,
2484 V4DFmode,
2485 CCmode,
2486 CCUNSmode,
2487 CCEQmode,
2488 };
2489
2490 /* Virtual regs we are interested in. */
2491 const static struct {
2492 int regno; /* register number. */
2493 const char *name; /* register name. */
2494 } virtual_regs[] = {
2495 { STACK_POINTER_REGNUM, "stack pointer:" },
2496 { TOC_REGNUM, "toc: " },
2497 { STATIC_CHAIN_REGNUM, "static chain: " },
2498 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2499 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2500 { ARG_POINTER_REGNUM, "arg pointer: " },
2501 { FRAME_POINTER_REGNUM, "frame pointer:" },
2502 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2503 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2504 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2505 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2506 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2507 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2508 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2509 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2510 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2511 };
2512
2513 fputs ("\nHard register information:\n", stderr);
2514 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2515 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2516 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2517 LAST_ALTIVEC_REGNO,
2518 "vs");
2519 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2520 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2521 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2522 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2523 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2524 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2525 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2526 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2527
2528 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2529 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2530 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2531
2532 fprintf (stderr,
2533 "\n"
2534 "d reg_class = %s\n"
2535 "f reg_class = %s\n"
2536 "v reg_class = %s\n"
2537 "wa reg_class = %s\n"
2538 "wb reg_class = %s\n"
2539 "wd reg_class = %s\n"
2540 "we reg_class = %s\n"
2541 "wf reg_class = %s\n"
2542 "wg reg_class = %s\n"
2543 "wh reg_class = %s\n"
2544 "wi reg_class = %s\n"
2545 "wj reg_class = %s\n"
2546 "wk reg_class = %s\n"
2547 "wl reg_class = %s\n"
2548 "wm reg_class = %s\n"
2549 "wo reg_class = %s\n"
2550 "wp reg_class = %s\n"
2551 "wq reg_class = %s\n"
2552 "wr reg_class = %s\n"
2553 "ws reg_class = %s\n"
2554 "wt reg_class = %s\n"
2555 "wu reg_class = %s\n"
2556 "wv reg_class = %s\n"
2557 "ww reg_class = %s\n"
2558 "wx reg_class = %s\n"
2559 "wy reg_class = %s\n"
2560 "wz reg_class = %s\n"
2561 "wA reg_class = %s\n"
2562 "wH reg_class = %s\n"
2563 "wI reg_class = %s\n"
2564 "wJ reg_class = %s\n"
2565 "wK reg_class = %s\n"
2566 "\n",
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2568 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2569 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2570 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2571 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2572 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2573 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2574 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2575 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2576 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2577 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2578 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2579 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2580 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2581 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2582 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2583 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2584 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2585 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2586 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2587 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2588 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2589 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2590 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2591 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2592 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2593 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2594 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2595 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2596 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2597 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2598 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2599
2600 nl = "\n";
2601 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2602 rs6000_debug_print_mode (m);
2603
2604 fputs ("\n", stderr);
2605
2606 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2607 {
2608 machine_mode mode1 = print_tieable_modes[m1];
2609 bool first_time = true;
2610
2611 nl = (const char *)0;
2612 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2613 {
2614 machine_mode mode2 = print_tieable_modes[m2];
2615 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2616 {
2617 if (first_time)
2618 {
2619 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2620 nl = "\n";
2621 first_time = false;
2622 }
2623
2624 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2625 }
2626 }
2627
2628 if (!first_time)
2629 fputs ("\n", stderr);
2630 }
2631
2632 if (nl)
2633 fputs (nl, stderr);
2634
2635 if (rs6000_recip_control)
2636 {
2637 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2638
2639 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2640 if (rs6000_recip_bits[m])
2641 {
2642 fprintf (stderr,
2643 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2644 GET_MODE_NAME (m),
2645 (RS6000_RECIP_AUTO_RE_P (m)
2646 ? "auto"
2647 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2648 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2649 ? "auto"
2650 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2651 }
2652
2653 fputs ("\n", stderr);
2654 }
2655
2656 if (rs6000_cpu_index >= 0)
2657 {
2658 const char *name = processor_target_table[rs6000_cpu_index].name;
2659 HOST_WIDE_INT flags
2660 = processor_target_table[rs6000_cpu_index].target_enable;
2661
2662 sprintf (flags_buffer, "-mcpu=%s flags", name);
2663 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2664 }
2665 else
2666 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2667
2668 if (rs6000_tune_index >= 0)
2669 {
2670 const char *name = processor_target_table[rs6000_tune_index].name;
2671 HOST_WIDE_INT flags
2672 = processor_target_table[rs6000_tune_index].target_enable;
2673
2674 sprintf (flags_buffer, "-mtune=%s flags", name);
2675 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2676 }
2677 else
2678 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2679
2680 cl_target_option_save (&cl_opts, &global_options);
2681 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2682 rs6000_isa_flags);
2683
2684 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2685 rs6000_isa_flags_explicit);
2686
2687 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2688 rs6000_builtin_mask);
2689
2690 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2691
2692 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2693 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2694
2695 switch (rs6000_sched_costly_dep)
2696 {
2697 case max_dep_latency:
2698 costly_str = "max_dep_latency";
2699 break;
2700
2701 case no_dep_costly:
2702 costly_str = "no_dep_costly";
2703 break;
2704
2705 case all_deps_costly:
2706 costly_str = "all_deps_costly";
2707 break;
2708
2709 case true_store_to_load_dep_costly:
2710 costly_str = "true_store_to_load_dep_costly";
2711 break;
2712
2713 case store_to_load_dep_costly:
2714 costly_str = "store_to_load_dep_costly";
2715 break;
2716
2717 default:
2718 costly_str = costly_num;
2719 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2720 break;
2721 }
2722
2723 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2724
2725 switch (rs6000_sched_insert_nops)
2726 {
2727 case sched_finish_regroup_exact:
2728 nop_str = "sched_finish_regroup_exact";
2729 break;
2730
2731 case sched_finish_pad_groups:
2732 nop_str = "sched_finish_pad_groups";
2733 break;
2734
2735 case sched_finish_none:
2736 nop_str = "sched_finish_none";
2737 break;
2738
2739 default:
2740 nop_str = nop_num;
2741 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2742 break;
2743 }
2744
2745 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2746
2747 switch (rs6000_sdata)
2748 {
2749 default:
2750 case SDATA_NONE:
2751 break;
2752
2753 case SDATA_DATA:
2754 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2755 break;
2756
2757 case SDATA_SYSV:
2758 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2759 break;
2760
2761 case SDATA_EABI:
2762 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2763 break;
2764
2765 }
2766
2767 switch (rs6000_traceback)
2768 {
2769 case traceback_default: trace_str = "default"; break;
2770 case traceback_none: trace_str = "none"; break;
2771 case traceback_part: trace_str = "part"; break;
2772 case traceback_full: trace_str = "full"; break;
2773 default: trace_str = "unknown"; break;
2774 }
2775
2776 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2777
2778 switch (rs6000_current_cmodel)
2779 {
2780 case CMODEL_SMALL: cmodel_str = "small"; break;
2781 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2782 case CMODEL_LARGE: cmodel_str = "large"; break;
2783 default: cmodel_str = "unknown"; break;
2784 }
2785
2786 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2787
2788 switch (rs6000_current_abi)
2789 {
2790 case ABI_NONE: abi_str = "none"; break;
2791 case ABI_AIX: abi_str = "aix"; break;
2792 case ABI_ELFv2: abi_str = "ELFv2"; break;
2793 case ABI_V4: abi_str = "V4"; break;
2794 case ABI_DARWIN: abi_str = "darwin"; break;
2795 default: abi_str = "unknown"; break;
2796 }
2797
2798 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2799
2800 if (rs6000_altivec_abi)
2801 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2802
2803 if (rs6000_spe_abi)
2804 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2805
2806 if (rs6000_darwin64_abi)
2807 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2808
2809 if (rs6000_float_gprs)
2810 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2811
2812 fprintf (stderr, DEBUG_FMT_S, "fprs",
2813 (TARGET_FPRS ? "true" : "false"));
2814
2815 fprintf (stderr, DEBUG_FMT_S, "single_float",
2816 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2817
2818 fprintf (stderr, DEBUG_FMT_S, "double_float",
2819 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2820
2821 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2822 (TARGET_SOFT_FLOAT ? "true" : "false"));
2823
2824 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2825 (TARGET_E500_SINGLE ? "true" : "false"));
2826
2827 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2828 (TARGET_E500_DOUBLE ? "true" : "false"));
2829
2830 if (TARGET_LINK_STACK)
2831 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2832
2833 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2834
2835 if (TARGET_P8_FUSION)
2836 {
2837 char options[80];
2838
2839 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2840 if (TARGET_TOC_FUSION)
2841 strcat (options, ", toc");
2842
2843 if (TARGET_P8_FUSION_SIGN)
2844 strcat (options, ", sign");
2845
2846 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2847 }
2848
2849 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2850 TARGET_SECURE_PLT ? "secure" : "bss");
2851 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2852 aix_struct_return ? "aix" : "sysv");
2853 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2854 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2855 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2856 tf[!!rs6000_align_branch_targets]);
2857 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2858 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2859 rs6000_long_double_type_size);
2860 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2861 (int)rs6000_sched_restricted_insns_priority);
2862 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2863 (int)END_BUILTINS);
2864 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2865 (int)RS6000_BUILTIN_COUNT);
2866
2867 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2868 (int)TARGET_FLOAT128_ENABLE_TYPE);
2869
2870 if (TARGET_VSX)
2871 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2872 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2873
2874 if (TARGET_DIRECT_MOVE_128)
2875 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2876 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2877 }
2878
2879 \f
2880 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2881 legitimate address support to figure out the appropriate addressing to
2882 use. */
2883
2884 static void
2885 rs6000_setup_reg_addr_masks (void)
2886 {
2887 ssize_t rc, reg, m, nregs;
2888 addr_mask_type any_addr_mask, addr_mask;
2889
2890 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2891 {
2892 machine_mode m2 = (machine_mode) m;
2893 bool complex_p = false;
2894 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2895 size_t msize;
2896
2897 if (COMPLEX_MODE_P (m2))
2898 {
2899 complex_p = true;
2900 m2 = GET_MODE_INNER (m2);
2901 }
2902
2903 msize = GET_MODE_SIZE (m2);
2904
2905 /* SDmode is special in that we want to access it only via REG+REG
2906 addressing on power7 and above, since we want to use the LFIWZX and
2907 STFIWZX instructions to load it. */
2908 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2909
2910 any_addr_mask = 0;
2911 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2912 {
2913 addr_mask = 0;
2914 reg = reload_reg_map[rc].reg;
2915
2916 /* Can mode values go in the GPR/FPR/Altivec registers? */
2917 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2918 {
2919 bool small_int_vsx_p = (small_int_p
2920 && (rc == RELOAD_REG_FPR
2921 || rc == RELOAD_REG_VMX));
2922
2923 nregs = rs6000_hard_regno_nregs[m][reg];
2924 addr_mask |= RELOAD_REG_VALID;
2925
2926 /* Indicate if the mode takes more than 1 physical register. If
2927 it takes a single register, indicate it can do REG+REG
2928 addressing. Small integers in VSX registers can only do
2929 REG+REG addressing. */
2930 if (small_int_vsx_p)
2931 addr_mask |= RELOAD_REG_INDEXED;
2932 else if (nregs > 1 || m == BLKmode || complex_p)
2933 addr_mask |= RELOAD_REG_MULTIPLE;
2934 else
2935 addr_mask |= RELOAD_REG_INDEXED;
2936
2937 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2938 addressing. Restrict addressing on SPE for 64-bit types
2939 because of the SUBREG hackery used to address 64-bit floats in
2940 '32-bit' GPRs. If we allow scalars into Altivec registers,
2941 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2942
2943 if (TARGET_UPDATE
2944 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2945 && msize <= 8
2946 && !VECTOR_MODE_P (m2)
2947 && !FLOAT128_VECTOR_P (m2)
2948 && !complex_p
2949 && !small_int_vsx_p
2950 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2951 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2952 && !(TARGET_E500_DOUBLE && msize == 8))
2953 {
2954 addr_mask |= RELOAD_REG_PRE_INCDEC;
2955
2956 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2957 we don't allow PRE_MODIFY for some multi-register
2958 operations. */
2959 switch (m)
2960 {
2961 default:
2962 addr_mask |= RELOAD_REG_PRE_MODIFY;
2963 break;
2964
2965 case E_DImode:
2966 if (TARGET_POWERPC64)
2967 addr_mask |= RELOAD_REG_PRE_MODIFY;
2968 break;
2969
2970 case E_DFmode:
2971 case E_DDmode:
2972 if (TARGET_DF_INSN)
2973 addr_mask |= RELOAD_REG_PRE_MODIFY;
2974 break;
2975 }
2976 }
2977 }
2978
2979 /* GPR and FPR registers can do REG+OFFSET addressing, except
2980 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2981 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2982 if ((addr_mask != 0) && !indexed_only_p
2983 && msize <= 8
2984 && (rc == RELOAD_REG_GPR
2985 || ((msize == 8 || m2 == SFmode)
2986 && (rc == RELOAD_REG_FPR
2987 || (rc == RELOAD_REG_VMX
2988 && TARGET_P9_DFORM_SCALAR)))))
2989 addr_mask |= RELOAD_REG_OFFSET;
2990
2991 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2992 instructions are enabled. The offset for 128-bit VSX registers is
2993 only 12-bits. While GPRs can handle the full offset range, VSX
2994 registers can only handle the restricted range. */
2995 else if ((addr_mask != 0) && !indexed_only_p
2996 && msize == 16 && TARGET_P9_DFORM_VECTOR
2997 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2998 || (m2 == TImode && TARGET_VSX_TIMODE)))
2999 {
3000 addr_mask |= RELOAD_REG_OFFSET;
3001 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3002 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3003 }
3004
3005 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3006 addressing on 128-bit types. */
3007 if (rc == RELOAD_REG_VMX && msize == 16
3008 && (addr_mask & RELOAD_REG_VALID) != 0)
3009 addr_mask |= RELOAD_REG_AND_M16;
3010
3011 reg_addr[m].addr_mask[rc] = addr_mask;
3012 any_addr_mask |= addr_mask;
3013 }
3014
3015 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3016 }
3017 }
3018
3019 \f
3020 /* Initialize the various global tables that are based on register size. */
3021 static void
3022 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3023 {
3024 ssize_t r, m, c;
3025 int align64;
3026 int align32;
3027
3028 /* Precalculate REGNO_REG_CLASS. */
3029 rs6000_regno_regclass[0] = GENERAL_REGS;
3030 for (r = 1; r < 32; ++r)
3031 rs6000_regno_regclass[r] = BASE_REGS;
3032
3033 for (r = 32; r < 64; ++r)
3034 rs6000_regno_regclass[r] = FLOAT_REGS;
3035
3036 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3037 rs6000_regno_regclass[r] = NO_REGS;
3038
3039 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3040 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3041
3042 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3043 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3044 rs6000_regno_regclass[r] = CR_REGS;
3045
3046 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3047 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3048 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3049 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3050 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3051 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
3052 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
3053 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3054 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3055 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3056 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3057 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3058
3059 /* Precalculate register class to simpler reload register class. We don't
3060 need all of the register classes that are combinations of different
3061 classes, just the simple ones that have constraint letters. */
3062 for (c = 0; c < N_REG_CLASSES; c++)
3063 reg_class_to_reg_type[c] = NO_REG_TYPE;
3064
3065 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3066 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3067 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3068 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3069 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3070 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3071 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3072 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3073 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3074 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3075 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
3076 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
3077
3078 if (TARGET_VSX)
3079 {
3080 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3081 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3082 }
3083 else
3084 {
3085 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3086 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3087 }
3088
3089 /* Precalculate the valid memory formats as well as the vector information,
3090 this must be set up before the rs6000_hard_regno_nregs_internal calls
3091 below. */
3092 gcc_assert ((int)VECTOR_NONE == 0);
3093 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3094 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3095
3096 gcc_assert ((int)CODE_FOR_nothing == 0);
3097 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3098
3099 gcc_assert ((int)NO_REGS == 0);
3100 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3101
3102 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3103 believes it can use native alignment or still uses 128-bit alignment. */
3104 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3105 {
3106 align64 = 64;
3107 align32 = 32;
3108 }
3109 else
3110 {
3111 align64 = 128;
3112 align32 = 128;
3113 }
3114
3115 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3116 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3117 if (TARGET_FLOAT128_TYPE)
3118 {
3119 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3120 rs6000_vector_align[KFmode] = 128;
3121
3122 if (FLOAT128_IEEE_P (TFmode))
3123 {
3124 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3125 rs6000_vector_align[TFmode] = 128;
3126 }
3127 }
3128
3129 /* V2DF mode, VSX only. */
3130 if (TARGET_VSX)
3131 {
3132 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3133 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3134 rs6000_vector_align[V2DFmode] = align64;
3135 }
3136
3137 /* V4SF mode, either VSX or Altivec. */
3138 if (TARGET_VSX)
3139 {
3140 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3141 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3142 rs6000_vector_align[V4SFmode] = align32;
3143 }
3144 else if (TARGET_ALTIVEC)
3145 {
3146 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3147 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3148 rs6000_vector_align[V4SFmode] = align32;
3149 }
3150
3151 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3152 and stores. */
3153 if (TARGET_ALTIVEC)
3154 {
3155 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3156 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3157 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3158 rs6000_vector_align[V4SImode] = align32;
3159 rs6000_vector_align[V8HImode] = align32;
3160 rs6000_vector_align[V16QImode] = align32;
3161
3162 if (TARGET_VSX)
3163 {
3164 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3165 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3166 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3167 }
3168 else
3169 {
3170 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3171 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3172 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3173 }
3174 }
3175
3176 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3177 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3178 if (TARGET_VSX)
3179 {
3180 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3181 rs6000_vector_unit[V2DImode]
3182 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3183 rs6000_vector_align[V2DImode] = align64;
3184
3185 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3186 rs6000_vector_unit[V1TImode]
3187 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3188 rs6000_vector_align[V1TImode] = 128;
3189 }
3190
3191 /* DFmode, see if we want to use the VSX unit. Memory is handled
3192 differently, so don't set rs6000_vector_mem. */
3193 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3194 {
3195 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3196 rs6000_vector_align[DFmode] = 64;
3197 }
3198
3199 /* SFmode, see if we want to use the VSX unit. */
3200 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3201 {
3202 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3203 rs6000_vector_align[SFmode] = 32;
3204 }
3205
3206 /* Allow TImode in VSX register and set the VSX memory macros. */
3207 if (TARGET_VSX && TARGET_VSX_TIMODE)
3208 {
3209 rs6000_vector_mem[TImode] = VECTOR_VSX;
3210 rs6000_vector_align[TImode] = align64;
3211 }
3212
3213 /* TODO add SPE and paired floating point vector support. */
3214
3215 /* Register class constraints for the constraints that depend on compile
3216 switches. When the VSX code was added, different constraints were added
3217 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3218 of the VSX registers are used. The register classes for scalar floating
3219 point types is set, based on whether we allow that type into the upper
3220 (Altivec) registers. GCC has register classes to target the Altivec
3221 registers for load/store operations, to select using a VSX memory
3222 operation instead of the traditional floating point operation. The
3223 constraints are:
3224
3225 d - Register class to use with traditional DFmode instructions.
3226 f - Register class to use with traditional SFmode instructions.
3227 v - Altivec register.
3228 wa - Any VSX register.
3229 wc - Reserved to represent individual CR bits (used in LLVM).
3230 wd - Preferred register class for V2DFmode.
3231 wf - Preferred register class for V4SFmode.
3232 wg - Float register for power6x move insns.
3233 wh - FP register for direct move instructions.
3234 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3235 wj - FP or VSX register to hold 64-bit integers for direct moves.
3236 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3237 wl - Float register if we can do 32-bit signed int loads.
3238 wm - VSX register for ISA 2.07 direct move operations.
3239 wn - always NO_REGS.
3240 wr - GPR if 64-bit mode is permitted.
3241 ws - Register class to do ISA 2.06 DF operations.
3242 wt - VSX register for TImode in VSX registers.
3243 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3244 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3245 ww - Register class to do SF conversions in with VSX operations.
3246 wx - Float register if we can do 32-bit int stores.
3247 wy - Register class to do ISA 2.07 SF operations.
3248 wz - Float register if we can do 32-bit unsigned int loads.
3249 wH - Altivec register if SImode is allowed in VSX registers.
3250 wI - VSX register if SImode is allowed in VSX registers.
3251 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3252 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3253
3254 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3255 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3256
3257 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
3258 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3259
3260 if (TARGET_VSX)
3261 {
3262 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3263 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3264 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3265
3266 if (TARGET_VSX_TIMODE)
3267 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3268
3269 if (TARGET_UPPER_REGS_DF) /* DFmode */
3270 {
3271 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3272 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3273 }
3274 else
3275 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3276
3277 if (TARGET_UPPER_REGS_DI) /* DImode */
3278 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3279 else
3280 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3281 }
3282
3283 /* Add conditional constraints based on various options, to allow us to
3284 collapse multiple insn patterns. */
3285 if (TARGET_ALTIVEC)
3286 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3287
3288 if (TARGET_MFPGPR) /* DFmode */
3289 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3290
3291 if (TARGET_LFIWAX)
3292 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3293
3294 if (TARGET_DIRECT_MOVE)
3295 {
3296 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3297 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3298 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3299 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3300 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3301 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3302 }
3303
3304 if (TARGET_POWERPC64)
3305 {
3306 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3307 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3308 }
3309
3310 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3311 {
3312 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3313 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3314 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3315 }
3316 else if (TARGET_P8_VECTOR)
3317 {
3318 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3319 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3320 }
3321 else if (TARGET_VSX)
3322 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3323
3324 if (TARGET_STFIWX)
3325 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3326
3327 if (TARGET_LFIWZX)
3328 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3329
3330 if (TARGET_FLOAT128_TYPE)
3331 {
3332 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3333 if (FLOAT128_IEEE_P (TFmode))
3334 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3335 }
3336
3337 /* Support for new D-form instructions. */
3338 if (TARGET_P9_DFORM_SCALAR)
3339 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3340
3341 /* Support for ISA 3.0 (power9) vectors. */
3342 if (TARGET_P9_VECTOR)
3343 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3344
3345 /* Support for new direct moves (ISA 3.0 + 64bit). */
3346 if (TARGET_DIRECT_MOVE_128)
3347 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3348
3349 /* Support small integers in VSX registers. */
3350 if (TARGET_VSX_SMALL_INTEGER)
3351 {
3352 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3353 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3354 if (TARGET_P9_VECTOR)
3355 {
3356 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3357 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3358 }
3359 }
3360
3361 /* Set up the reload helper and direct move functions. */
3362 if (TARGET_VSX || TARGET_ALTIVEC)
3363 {
3364 if (TARGET_64BIT)
3365 {
3366 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3367 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3368 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3369 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3370 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3371 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3372 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3373 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3374 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3375 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3376 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3377 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3378 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3379 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3380 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3381 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3382 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3383 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3384 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3385 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3386
3387 if (FLOAT128_VECTOR_P (KFmode))
3388 {
3389 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3390 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3391 }
3392
3393 if (FLOAT128_VECTOR_P (TFmode))
3394 {
3395 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3396 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3397 }
3398
3399 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3400 available. */
3401 if (TARGET_NO_SDMODE_STACK)
3402 {
3403 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3404 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3405 }
3406
3407 if (TARGET_VSX_TIMODE)
3408 {
3409 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3410 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3411 }
3412
3413 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3414 {
3415 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3416 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3417 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3418 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3419 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3420 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3421 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3422 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3423 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3424
3425 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3426 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3427 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3428 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3429 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3430 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3431 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3432 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3433 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3434
3435 if (FLOAT128_VECTOR_P (KFmode))
3436 {
3437 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3438 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3439 }
3440
3441 if (FLOAT128_VECTOR_P (TFmode))
3442 {
3443 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3444 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3445 }
3446 }
3447 }
3448 else
3449 {
3450 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3451 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3452 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3453 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3454 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3455 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3456 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3457 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3458 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3459 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3460 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3461 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3462 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3463 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3464 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3465 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3466 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3467 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3468 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3469 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3470
3471 if (FLOAT128_VECTOR_P (KFmode))
3472 {
3473 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3474 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3475 }
3476
3477 if (FLOAT128_IEEE_P (TFmode))
3478 {
3479 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3480 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3481 }
3482
3483 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3484 available. */
3485 if (TARGET_NO_SDMODE_STACK)
3486 {
3487 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3488 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3489 }
3490
3491 if (TARGET_VSX_TIMODE)
3492 {
3493 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3494 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3495 }
3496
3497 if (TARGET_DIRECT_MOVE)
3498 {
3499 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3500 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3501 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3502 }
3503 }
3504
3505 if (TARGET_UPPER_REGS_DF)
3506 reg_addr[DFmode].scalar_in_vmx_p = true;
3507
3508 if (TARGET_UPPER_REGS_DI)
3509 reg_addr[DImode].scalar_in_vmx_p = true;
3510
3511 if (TARGET_UPPER_REGS_SF)
3512 reg_addr[SFmode].scalar_in_vmx_p = true;
3513
3514 if (TARGET_VSX_SMALL_INTEGER)
3515 {
3516 reg_addr[SImode].scalar_in_vmx_p = true;
3517 if (TARGET_P9_VECTOR)
3518 {
3519 reg_addr[HImode].scalar_in_vmx_p = true;
3520 reg_addr[QImode].scalar_in_vmx_p = true;
3521 }
3522 }
3523 }
3524
3525 /* Setup the fusion operations. */
3526 if (TARGET_P8_FUSION)
3527 {
3528 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3529 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3530 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3531 if (TARGET_64BIT)
3532 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3533 }
3534
3535 if (TARGET_P9_FUSION)
3536 {
3537 struct fuse_insns {
3538 enum machine_mode mode; /* mode of the fused type. */
3539 enum machine_mode pmode; /* pointer mode. */
3540 enum rs6000_reload_reg_type rtype; /* register type. */
3541 enum insn_code load; /* load insn. */
3542 enum insn_code store; /* store insn. */
3543 };
3544
3545 static const struct fuse_insns addis_insns[] = {
3546 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3547 CODE_FOR_fusion_vsx_di_sf_load,
3548 CODE_FOR_fusion_vsx_di_sf_store },
3549
3550 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3551 CODE_FOR_fusion_vsx_si_sf_load,
3552 CODE_FOR_fusion_vsx_si_sf_store },
3553
3554 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3555 CODE_FOR_fusion_vsx_di_df_load,
3556 CODE_FOR_fusion_vsx_di_df_store },
3557
3558 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3559 CODE_FOR_fusion_vsx_si_df_load,
3560 CODE_FOR_fusion_vsx_si_df_store },
3561
3562 { E_DImode, E_DImode, RELOAD_REG_FPR,
3563 CODE_FOR_fusion_vsx_di_di_load,
3564 CODE_FOR_fusion_vsx_di_di_store },
3565
3566 { E_DImode, E_SImode, RELOAD_REG_FPR,
3567 CODE_FOR_fusion_vsx_si_di_load,
3568 CODE_FOR_fusion_vsx_si_di_store },
3569
3570 { E_QImode, E_DImode, RELOAD_REG_GPR,
3571 CODE_FOR_fusion_gpr_di_qi_load,
3572 CODE_FOR_fusion_gpr_di_qi_store },
3573
3574 { E_QImode, E_SImode, RELOAD_REG_GPR,
3575 CODE_FOR_fusion_gpr_si_qi_load,
3576 CODE_FOR_fusion_gpr_si_qi_store },
3577
3578 { E_HImode, E_DImode, RELOAD_REG_GPR,
3579 CODE_FOR_fusion_gpr_di_hi_load,
3580 CODE_FOR_fusion_gpr_di_hi_store },
3581
3582 { E_HImode, E_SImode, RELOAD_REG_GPR,
3583 CODE_FOR_fusion_gpr_si_hi_load,
3584 CODE_FOR_fusion_gpr_si_hi_store },
3585
3586 { E_SImode, E_DImode, RELOAD_REG_GPR,
3587 CODE_FOR_fusion_gpr_di_si_load,
3588 CODE_FOR_fusion_gpr_di_si_store },
3589
3590 { E_SImode, E_SImode, RELOAD_REG_GPR,
3591 CODE_FOR_fusion_gpr_si_si_load,
3592 CODE_FOR_fusion_gpr_si_si_store },
3593
3594 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3595 CODE_FOR_fusion_gpr_di_sf_load,
3596 CODE_FOR_fusion_gpr_di_sf_store },
3597
3598 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3599 CODE_FOR_fusion_gpr_si_sf_load,
3600 CODE_FOR_fusion_gpr_si_sf_store },
3601
3602 { E_DImode, E_DImode, RELOAD_REG_GPR,
3603 CODE_FOR_fusion_gpr_di_di_load,
3604 CODE_FOR_fusion_gpr_di_di_store },
3605
3606 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3607 CODE_FOR_fusion_gpr_di_df_load,
3608 CODE_FOR_fusion_gpr_di_df_store },
3609 };
3610
3611 machine_mode cur_pmode = Pmode;
3612 size_t i;
3613
3614 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3615 {
3616 machine_mode xmode = addis_insns[i].mode;
3617 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3618
3619 if (addis_insns[i].pmode != cur_pmode)
3620 continue;
3621
3622 if (rtype == RELOAD_REG_FPR
3623 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3624 continue;
3625
3626 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3627 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3628
3629 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3630 {
3631 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3632 = addis_insns[i].load;
3633 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3634 = addis_insns[i].store;
3635 }
3636 }
3637 }
3638
3639 /* Note which types we support fusing TOC setup plus memory insn. We only do
3640 fused TOCs for medium/large code models. */
3641 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3642 && (TARGET_CMODEL != CMODEL_SMALL))
3643 {
3644 reg_addr[QImode].fused_toc = true;
3645 reg_addr[HImode].fused_toc = true;
3646 reg_addr[SImode].fused_toc = true;
3647 reg_addr[DImode].fused_toc = true;
3648 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3649 {
3650 if (TARGET_SINGLE_FLOAT)
3651 reg_addr[SFmode].fused_toc = true;
3652 if (TARGET_DOUBLE_FLOAT)
3653 reg_addr[DFmode].fused_toc = true;
3654 }
3655 }
3656
3657 /* Precalculate HARD_REGNO_NREGS. */
3658 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3659 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3660 rs6000_hard_regno_nregs[m][r]
3661 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3662
3663 /* Precalculate HARD_REGNO_MODE_OK. */
3664 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3665 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3666 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3667 rs6000_hard_regno_mode_ok_p[m][r] = true;
3668
3669 /* Precalculate CLASS_MAX_NREGS sizes. */
3670 for (c = 0; c < LIM_REG_CLASSES; ++c)
3671 {
3672 int reg_size;
3673
3674 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3675 reg_size = UNITS_PER_VSX_WORD;
3676
3677 else if (c == ALTIVEC_REGS)
3678 reg_size = UNITS_PER_ALTIVEC_WORD;
3679
3680 else if (c == FLOAT_REGS)
3681 reg_size = UNITS_PER_FP_WORD;
3682
3683 else
3684 reg_size = UNITS_PER_WORD;
3685
3686 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3687 {
3688 machine_mode m2 = (machine_mode)m;
3689 int reg_size2 = reg_size;
3690
3691 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3692 in VSX. */
3693 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3694 reg_size2 = UNITS_PER_FP_WORD;
3695
3696 rs6000_class_max_nregs[m][c]
3697 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3698 }
3699 }
3700
3701 if (TARGET_E500_DOUBLE)
3702 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3703
3704 /* Calculate which modes to automatically generate code to use a the
3705 reciprocal divide and square root instructions. In the future, possibly
3706 automatically generate the instructions even if the user did not specify
3707 -mrecip. The older machines double precision reciprocal sqrt estimate is
3708 not accurate enough. */
3709 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3710 if (TARGET_FRES)
3711 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3712 if (TARGET_FRE)
3713 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3714 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3715 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3716 if (VECTOR_UNIT_VSX_P (V2DFmode))
3717 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3718
3719 if (TARGET_FRSQRTES)
3720 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3721 if (TARGET_FRSQRTE)
3722 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3723 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3724 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3725 if (VECTOR_UNIT_VSX_P (V2DFmode))
3726 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3727
3728 if (rs6000_recip_control)
3729 {
3730 if (!flag_finite_math_only)
3731 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3732 if (flag_trapping_math)
3733 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3734 if (!flag_reciprocal_math)
3735 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3736 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3737 {
3738 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3739 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3740 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3741
3742 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3743 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3744 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3745
3746 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3747 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3748 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3749
3750 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3751 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3752 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3753
3754 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3755 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3756 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3757
3758 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3759 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3760 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3761
3762 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3763 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3764 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3765
3766 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3767 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3768 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3769 }
3770 }
3771
3772 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3773 legitimate address support to figure out the appropriate addressing to
3774 use. */
3775 rs6000_setup_reg_addr_masks ();
3776
3777 if (global_init_p || TARGET_DEBUG_TARGET)
3778 {
3779 if (TARGET_DEBUG_REG)
3780 rs6000_debug_reg_global ();
3781
3782 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3783 fprintf (stderr,
3784 "SImode variable mult cost = %d\n"
3785 "SImode constant mult cost = %d\n"
3786 "SImode short constant mult cost = %d\n"
3787 "DImode multipliciation cost = %d\n"
3788 "SImode division cost = %d\n"
3789 "DImode division cost = %d\n"
3790 "Simple fp operation cost = %d\n"
3791 "DFmode multiplication cost = %d\n"
3792 "SFmode division cost = %d\n"
3793 "DFmode division cost = %d\n"
3794 "cache line size = %d\n"
3795 "l1 cache size = %d\n"
3796 "l2 cache size = %d\n"
3797 "simultaneous prefetches = %d\n"
3798 "\n",
3799 rs6000_cost->mulsi,
3800 rs6000_cost->mulsi_const,
3801 rs6000_cost->mulsi_const9,
3802 rs6000_cost->muldi,
3803 rs6000_cost->divsi,
3804 rs6000_cost->divdi,
3805 rs6000_cost->fp,
3806 rs6000_cost->dmul,
3807 rs6000_cost->sdiv,
3808 rs6000_cost->ddiv,
3809 rs6000_cost->cache_line_size,
3810 rs6000_cost->l1_cache_size,
3811 rs6000_cost->l2_cache_size,
3812 rs6000_cost->simultaneous_prefetches);
3813 }
3814 }
3815
3816 #if TARGET_MACHO
3817 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3818
3819 static void
3820 darwin_rs6000_override_options (void)
3821 {
3822 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3823 off. */
3824 rs6000_altivec_abi = 1;
3825 TARGET_ALTIVEC_VRSAVE = 1;
3826 rs6000_current_abi = ABI_DARWIN;
3827
3828 if (DEFAULT_ABI == ABI_DARWIN
3829 && TARGET_64BIT)
3830 darwin_one_byte_bool = 1;
3831
3832 if (TARGET_64BIT && ! TARGET_POWERPC64)
3833 {
3834 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3835 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3836 }
3837 if (flag_mkernel)
3838 {
3839 rs6000_default_long_calls = 1;
3840 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3841 }
3842
3843 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3844 Altivec. */
3845 if (!flag_mkernel && !flag_apple_kext
3846 && TARGET_64BIT
3847 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3848 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3849
3850 /* Unless the user (not the configurer) has explicitly overridden
3851 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3852 G4 unless targeting the kernel. */
3853 if (!flag_mkernel
3854 && !flag_apple_kext
3855 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3856 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3857 && ! global_options_set.x_rs6000_cpu_index)
3858 {
3859 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3860 }
3861 }
3862 #endif
3863
3864 /* If not otherwise specified by a target, make 'long double' equivalent to
3865 'double'. */
3866
3867 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3868 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3869 #endif
3870
3871 /* Return the builtin mask of the various options used that could affect which
3872 builtins were used. In the past we used target_flags, but we've run out of
3873 bits, and some options like SPE and PAIRED are no longer in
3874 target_flags. */
3875
3876 HOST_WIDE_INT
3877 rs6000_builtin_mask_calculate (void)
3878 {
3879 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3880 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3881 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3882 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3883 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3884 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3885 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3886 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3887 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3888 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3889 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3890 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3891 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3892 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3893 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3894 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3895 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3896 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3897 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3898 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3899 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3900 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3901 }
3902
3903 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3904 to clobber the XER[CA] bit because clobbering that bit without telling
3905 the compiler worked just fine with versions of GCC before GCC 5, and
3906 breaking a lot of older code in ways that are hard to track down is
3907 not such a great idea. */
3908
3909 static rtx_insn *
3910 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3911 vec<const char *> &/*constraints*/,
3912 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3913 {
3914 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3915 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3916 return NULL;
3917 }
3918
3919 /* Override command line options.
3920
3921 Combine build-specific configuration information with options
3922 specified on the command line to set various state variables which
3923 influence code generation, optimization, and expansion of built-in
3924 functions. Assure that command-line configuration preferences are
3925 compatible with each other and with the build configuration; issue
3926 warnings while adjusting configuration or error messages while
3927 rejecting configuration.
3928
3929 Upon entry to this function:
3930
3931 This function is called once at the beginning of
3932 compilation, and then again at the start and end of compiling
3933 each section of code that has a different configuration, as
3934 indicated, for example, by adding the
3935
3936 __attribute__((__target__("cpu=power9")))
3937
3938 qualifier to a function definition or, for example, by bracketing
3939 code between
3940
3941 #pragma GCC target("altivec")
3942
3943 and
3944
3945 #pragma GCC reset_options
3946
3947 directives. Parameter global_init_p is true for the initial
3948 invocation, which initializes global variables, and false for all
3949 subsequent invocations.
3950
3951
3952 Various global state information is assumed to be valid. This
3953 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3954 default CPU specified at build configure time, TARGET_DEFAULT,
3955 representing the default set of option flags for the default
3956 target, and global_options_set.x_rs6000_isa_flags, representing
3957 which options were requested on the command line.
3958
3959 Upon return from this function:
3960
3961 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3962 was set by name on the command line. Additionally, if certain
3963 attributes are automatically enabled or disabled by this function
3964 in order to assure compatibility between options and
3965 configuration, the flags associated with those attributes are
3966 also set. By setting these "explicit bits", we avoid the risk
3967 that other code might accidentally overwrite these particular
3968 attributes with "default values".
3969
3970 The various bits of rs6000_isa_flags are set to indicate the
3971 target options that have been selected for the most current
3972 compilation efforts. This has the effect of also turning on the
3973 associated TARGET_XXX values since these are macros which are
3974 generally defined to test the corresponding bit of the
3975 rs6000_isa_flags variable.
3976
3977 The variable rs6000_builtin_mask is set to represent the target
3978 options for the most current compilation efforts, consistent with
3979 the current contents of rs6000_isa_flags. This variable controls
3980 expansion of built-in functions.
3981
3982 Various other global variables and fields of global structures
3983 (over 50 in all) are initialized to reflect the desired options
3984 for the most current compilation efforts. */
3985
3986 static bool
3987 rs6000_option_override_internal (bool global_init_p)
3988 {
3989 bool ret = true;
3990 bool have_cpu = false;
3991
3992 /* The default cpu requested at configure time, if any. */
3993 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3994
3995 HOST_WIDE_INT set_masks;
3996 HOST_WIDE_INT ignore_masks;
3997 int cpu_index;
3998 int tune_index;
3999 struct cl_target_option *main_target_opt
4000 = ((global_init_p || target_option_default_node == NULL)
4001 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4002
4003 /* Print defaults. */
4004 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4005 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4006
4007 /* Remember the explicit arguments. */
4008 if (global_init_p)
4009 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4010
4011 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4012 library functions, so warn about it. The flag may be useful for
4013 performance studies from time to time though, so don't disable it
4014 entirely. */
4015 if (global_options_set.x_rs6000_alignment_flags
4016 && rs6000_alignment_flags == MASK_ALIGN_POWER
4017 && DEFAULT_ABI == ABI_DARWIN
4018 && TARGET_64BIT)
4019 warning (0, "-malign-power is not supported for 64-bit Darwin;"
4020 " it is incompatible with the installed C and C++ libraries");
4021
4022 /* Numerous experiment shows that IRA based loop pressure
4023 calculation works better for RTL loop invariant motion on targets
4024 with enough (>= 32) registers. It is an expensive optimization.
4025 So it is on only for peak performance. */
4026 if (optimize >= 3 && global_init_p
4027 && !global_options_set.x_flag_ira_loop_pressure)
4028 flag_ira_loop_pressure = 1;
4029
4030 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4031 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4032 options were already specified. */
4033 if (flag_sanitize & SANITIZE_USER_ADDRESS
4034 && !global_options_set.x_flag_asynchronous_unwind_tables)
4035 flag_asynchronous_unwind_tables = 1;
4036
4037 /* Set the pointer size. */
4038 if (TARGET_64BIT)
4039 {
4040 rs6000_pmode = DImode;
4041 rs6000_pointer_size = 64;
4042 }
4043 else
4044 {
4045 rs6000_pmode = SImode;
4046 rs6000_pointer_size = 32;
4047 }
4048
4049 /* Some OSs don't support saving the high part of 64-bit registers on context
4050 switch. Other OSs don't support saving Altivec registers. On those OSs,
4051 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4052 if the user wants either, the user must explicitly specify them and we
4053 won't interfere with the user's specification. */
4054
4055 set_masks = POWERPC_MASKS;
4056 #ifdef OS_MISSING_POWERPC64
4057 if (OS_MISSING_POWERPC64)
4058 set_masks &= ~OPTION_MASK_POWERPC64;
4059 #endif
4060 #ifdef OS_MISSING_ALTIVEC
4061 if (OS_MISSING_ALTIVEC)
4062 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4063 | OTHER_VSX_VECTOR_MASKS);
4064 #endif
4065
4066 /* Don't override by the processor default if given explicitly. */
4067 set_masks &= ~rs6000_isa_flags_explicit;
4068
4069 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4070 the cpu in a target attribute or pragma, but did not specify a tuning
4071 option, use the cpu for the tuning option rather than the option specified
4072 with -mtune on the command line. Process a '--with-cpu' configuration
4073 request as an implicit --cpu. */
4074 if (rs6000_cpu_index >= 0)
4075 {
4076 cpu_index = rs6000_cpu_index;
4077 have_cpu = true;
4078 }
4079 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4080 {
4081 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4082 have_cpu = true;
4083 }
4084 else if (implicit_cpu)
4085 {
4086 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4087 have_cpu = true;
4088 }
4089 else
4090 {
4091 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4092 const char *default_cpu = ((!TARGET_POWERPC64)
4093 ? "powerpc"
4094 : ((BYTES_BIG_ENDIAN)
4095 ? "powerpc64"
4096 : "powerpc64le"));
4097
4098 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4099 have_cpu = false;
4100 }
4101
4102 gcc_assert (cpu_index >= 0);
4103
4104 if (have_cpu)
4105 {
4106 #ifndef HAVE_AS_POWER9
4107 if (processor_target_table[rs6000_cpu_index].processor
4108 == PROCESSOR_POWER9)
4109 {
4110 have_cpu = false;
4111 warning (0, "will not generate power9 instructions because "
4112 "assembler lacks power9 support");
4113 }
4114 #endif
4115 #ifndef HAVE_AS_POWER8
4116 if (processor_target_table[rs6000_cpu_index].processor
4117 == PROCESSOR_POWER8)
4118 {
4119 have_cpu = false;
4120 warning (0, "will not generate power8 instructions because "
4121 "assembler lacks power8 support");
4122 }
4123 #endif
4124 #ifndef HAVE_AS_POPCNTD
4125 if (processor_target_table[rs6000_cpu_index].processor
4126 == PROCESSOR_POWER7)
4127 {
4128 have_cpu = false;
4129 warning (0, "will not generate power7 instructions because "
4130 "assembler lacks power7 support");
4131 }
4132 #endif
4133 #ifndef HAVE_AS_DFP
4134 if (processor_target_table[rs6000_cpu_index].processor
4135 == PROCESSOR_POWER6)
4136 {
4137 have_cpu = false;
4138 warning (0, "will not generate power6 instructions because "
4139 "assembler lacks power6 support");
4140 }
4141 #endif
4142 #ifndef HAVE_AS_POPCNTB
4143 if (processor_target_table[rs6000_cpu_index].processor
4144 == PROCESSOR_POWER5)
4145 {
4146 have_cpu = false;
4147 warning (0, "will not generate power5 instructions because "
4148 "assembler lacks power5 support");
4149 }
4150 #endif
4151
4152 if (!have_cpu)
4153 {
4154 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4155 const char *default_cpu = (!TARGET_POWERPC64
4156 ? "powerpc"
4157 : (BYTES_BIG_ENDIAN
4158 ? "powerpc64"
4159 : "powerpc64le"));
4160
4161 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4162 }
4163 }
4164
4165 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4166 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4167 with those from the cpu, except for options that were explicitly set. If
4168 we don't have a cpu, do not override the target bits set in
4169 TARGET_DEFAULT. */
4170 if (have_cpu)
4171 {
4172 rs6000_isa_flags &= ~set_masks;
4173 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4174 & set_masks);
4175 }
4176 else
4177 {
4178 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4179 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4180 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4181 to using rs6000_isa_flags, we need to do the initialization here.
4182
4183 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4184 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4185 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4186 : processor_target_table[cpu_index].target_enable);
4187 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4188 }
4189
4190 if (rs6000_tune_index >= 0)
4191 tune_index = rs6000_tune_index;
4192 else if (have_cpu)
4193 rs6000_tune_index = tune_index = cpu_index;
4194 else
4195 {
4196 size_t i;
4197 enum processor_type tune_proc
4198 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4199
4200 tune_index = -1;
4201 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4202 if (processor_target_table[i].processor == tune_proc)
4203 {
4204 rs6000_tune_index = tune_index = i;
4205 break;
4206 }
4207 }
4208
4209 gcc_assert (tune_index >= 0);
4210 rs6000_cpu = processor_target_table[tune_index].processor;
4211
4212 /* Pick defaults for SPE related control flags. Do this early to make sure
4213 that the TARGET_ macros are representative ASAP. */
4214 {
4215 int spe_capable_cpu =
4216 (rs6000_cpu == PROCESSOR_PPC8540
4217 || rs6000_cpu == PROCESSOR_PPC8548);
4218
4219 if (!global_options_set.x_rs6000_spe_abi)
4220 rs6000_spe_abi = spe_capable_cpu;
4221
4222 if (!global_options_set.x_rs6000_spe)
4223 rs6000_spe = spe_capable_cpu;
4224
4225 if (!global_options_set.x_rs6000_float_gprs)
4226 rs6000_float_gprs =
4227 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
4228 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
4229 : 0);
4230 }
4231
4232 if (global_options_set.x_rs6000_spe_abi
4233 && rs6000_spe_abi
4234 && !TARGET_SPE_ABI)
4235 error ("not configured for SPE ABI");
4236
4237 if (global_options_set.x_rs6000_spe
4238 && rs6000_spe
4239 && !TARGET_SPE)
4240 error ("not configured for SPE instruction set");
4241
4242 if (main_target_opt != NULL
4243 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
4244 || (main_target_opt->x_rs6000_spe != rs6000_spe)
4245 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
4246 error ("target attribute or pragma changes SPE ABI");
4247
4248 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4249 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4250 || rs6000_cpu == PROCESSOR_PPCE5500)
4251 {
4252 if (TARGET_ALTIVEC)
4253 error ("AltiVec not supported in this target");
4254 if (TARGET_SPE)
4255 error ("SPE not supported in this target");
4256 }
4257 if (rs6000_cpu == PROCESSOR_PPCE6500)
4258 {
4259 if (TARGET_SPE)
4260 error ("SPE not supported in this target");
4261 }
4262
4263 /* Disable Cell microcode if we are optimizing for the Cell
4264 and not optimizing for size. */
4265 if (rs6000_gen_cell_microcode == -1)
4266 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
4267 && !optimize_size);
4268
4269 /* If we are optimizing big endian systems for space and it's OK to
4270 use instructions that would be microcoded on the Cell, use the
4271 load/store multiple and string instructions. */
4272 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
4273 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4274 | OPTION_MASK_STRING);
4275
4276 /* Don't allow -mmultiple or -mstring on little endian systems
4277 unless the cpu is a 750, because the hardware doesn't support the
4278 instructions used in little endian mode, and causes an alignment
4279 trap. The 750 does not cause an alignment trap (except when the
4280 target is unaligned). */
4281
4282 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4283 {
4284 if (TARGET_MULTIPLE)
4285 {
4286 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4287 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4288 warning (0, "-mmultiple is not supported on little endian systems");
4289 }
4290
4291 if (TARGET_STRING)
4292 {
4293 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4294 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4295 warning (0, "-mstring is not supported on little endian systems");
4296 }
4297 }
4298
4299 /* If little-endian, default to -mstrict-align on older processors.
4300 Testing for htm matches power8 and later. */
4301 if (!BYTES_BIG_ENDIAN
4302 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4303 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4304
4305 /* -maltivec={le,be} implies -maltivec. */
4306 if (rs6000_altivec_element_order != 0)
4307 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4308
4309 /* Disallow -maltivec=le in big endian mode for now. This is not
4310 known to be useful for anyone. */
4311 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4312 {
4313 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4314 rs6000_altivec_element_order = 0;
4315 }
4316
4317 /* Add some warnings for VSX. */
4318 if (TARGET_VSX)
4319 {
4320 const char *msg = NULL;
4321 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
4322 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4323 {
4324 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4325 msg = N_("-mvsx requires hardware floating point");
4326 else
4327 {
4328 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4329 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4330 }
4331 }
4332 else if (TARGET_PAIRED_FLOAT)
4333 msg = N_("-mvsx and -mpaired are incompatible");
4334 else if (TARGET_AVOID_XFORM > 0)
4335 msg = N_("-mvsx needs indexed addressing");
4336 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4337 & OPTION_MASK_ALTIVEC))
4338 {
4339 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4340 msg = N_("-mvsx and -mno-altivec are incompatible");
4341 else
4342 msg = N_("-mno-altivec disables vsx");
4343 }
4344
4345 if (msg)
4346 {
4347 warning (0, msg);
4348 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4349 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4350 }
4351 }
4352
4353 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4354 the -mcpu setting to enable options that conflict. */
4355 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4356 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4357 | OPTION_MASK_ALTIVEC
4358 | OPTION_MASK_VSX)) != 0)
4359 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4360 | OPTION_MASK_DIRECT_MOVE)
4361 & ~rs6000_isa_flags_explicit);
4362
4363 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4364 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4365
4366 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4367 off all of the options that depend on those flags. */
4368 ignore_masks = rs6000_disable_incompatible_switches ();
4369
4370 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4371 unless the user explicitly used the -mno-<option> to disable the code. */
4372 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4373 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4374 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4375 else if (TARGET_P9_MINMAX)
4376 {
4377 if (have_cpu)
4378 {
4379 if (cpu_index == PROCESSOR_POWER9)
4380 {
4381 /* legacy behavior: allow -mcpu-power9 with certain
4382 capabilities explicitly disabled. */
4383 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4384 /* However, reject this automatic fix if certain
4385 capabilities required for TARGET_P9_MINMAX support
4386 have been explicitly disabled. */
4387 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4388 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4389 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4390 | OPTION_MASK_UPPER_REGS_DF))
4391 error ("-mpower9-minmax incompatible with explicitly disabled options");
4392 }
4393 else
4394 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4395 "<xxx> less than power9");
4396 }
4397 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4398 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4399 & rs6000_isa_flags_explicit))
4400 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4401 were explicitly cleared. */
4402 error ("-mpower9-minmax incompatible with explicitly disabled options");
4403 else
4404 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4405 }
4406 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4407 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4408 else if (TARGET_VSX)
4409 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4410 else if (TARGET_POPCNTD)
4411 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4412 else if (TARGET_DFP)
4413 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4414 else if (TARGET_CMPB)
4415 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4416 else if (TARGET_FPRND)
4417 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4418 else if (TARGET_POPCNTB)
4419 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4420 else if (TARGET_ALTIVEC)
4421 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4422
4423 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4424 {
4425 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4426 error ("-mcrypto requires -maltivec");
4427 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4428 }
4429
4430 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4431 {
4432 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4433 error ("-mdirect-move requires -mvsx");
4434 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4435 }
4436
4437 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4438 {
4439 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4440 error ("-mpower8-vector requires -maltivec");
4441 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4442 }
4443
4444 if (TARGET_P8_VECTOR && !TARGET_VSX)
4445 {
4446 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4447 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4448 error ("-mpower8-vector requires -mvsx");
4449 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4450 {
4451 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4452 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4453 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4454 }
4455 else
4456 {
4457 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4458 not explicit. */
4459 rs6000_isa_flags |= OPTION_MASK_VSX;
4460 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4461 }
4462 }
4463
4464 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4465 {
4466 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4467 error ("-mvsx-timode requires -mvsx");
4468 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4469 }
4470
4471 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4472 {
4473 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4474 error ("-mhard-dfp requires -mhard-float");
4475 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4476 }
4477
4478 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4479 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4480 set the individual option. */
4481 if (TARGET_UPPER_REGS > 0)
4482 {
4483 if (TARGET_VSX
4484 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4485 {
4486 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4487 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4488 }
4489 if (TARGET_VSX
4490 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4491 {
4492 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4493 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4494 }
4495 if (TARGET_P8_VECTOR
4496 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4497 {
4498 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4499 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4500 }
4501 }
4502 else if (TARGET_UPPER_REGS == 0)
4503 {
4504 if (TARGET_VSX
4505 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4506 {
4507 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4508 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4509 }
4510 if (TARGET_VSX
4511 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4512 {
4513 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4514 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4515 }
4516 if (TARGET_P8_VECTOR
4517 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4518 {
4519 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4520 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4521 }
4522 }
4523
4524 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4525 {
4526 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4527 error ("-mupper-regs-df requires -mvsx");
4528 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4529 }
4530
4531 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4532 {
4533 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4534 error ("-mupper-regs-di requires -mvsx");
4535 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4536 }
4537
4538 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4539 {
4540 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4541 error ("-mupper-regs-sf requires -mpower8-vector");
4542 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4543 }
4544
4545 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4546 silently turn off quad memory mode. */
4547 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4548 {
4549 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4550 warning (0, N_("-mquad-memory requires 64-bit mode"));
4551
4552 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4553 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4554
4555 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4556 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4557 }
4558
4559 /* Non-atomic quad memory load/store are disabled for little endian, since
4560 the words are reversed, but atomic operations can still be done by
4561 swapping the words. */
4562 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4563 {
4564 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4565 warning (0, N_("-mquad-memory is not available in little endian mode"));
4566
4567 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4568 }
4569
4570 /* Assume if the user asked for normal quad memory instructions, they want
4571 the atomic versions as well, unless they explicity told us not to use quad
4572 word atomic instructions. */
4573 if (TARGET_QUAD_MEMORY
4574 && !TARGET_QUAD_MEMORY_ATOMIC
4575 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4576 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4577
4578 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4579 generating power8 instructions. */
4580 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4581 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4582 & OPTION_MASK_P8_FUSION);
4583
4584 /* Setting additional fusion flags turns on base fusion. */
4585 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4586 {
4587 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4588 {
4589 if (TARGET_P8_FUSION_SIGN)
4590 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4591
4592 if (TARGET_TOC_FUSION)
4593 error ("-mtoc-fusion requires -mpower8-fusion");
4594
4595 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4596 }
4597 else
4598 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4599 }
4600
4601 /* Power9 fusion is a superset over power8 fusion. */
4602 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4603 {
4604 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4605 {
4606 /* We prefer to not mention undocumented options in
4607 error messages. However, if users have managed to select
4608 power9-fusion without selecting power8-fusion, they
4609 already know about undocumented flags. */
4610 error ("-mpower9-fusion requires -mpower8-fusion");
4611 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4612 }
4613 else
4614 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4615 }
4616
4617 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4618 generating power9 instructions. */
4619 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4620 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4621 & OPTION_MASK_P9_FUSION);
4622
4623 /* Power8 does not fuse sign extended loads with the addis. If we are
4624 optimizing at high levels for speed, convert a sign extended load into a
4625 zero extending load, and an explicit sign extension. */
4626 if (TARGET_P8_FUSION
4627 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4628 && optimize_function_for_speed_p (cfun)
4629 && optimize >= 3)
4630 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4631
4632 /* TOC fusion requires 64-bit and medium/large code model. */
4633 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4634 {
4635 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4636 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4637 warning (0, N_("-mtoc-fusion requires 64-bit"));
4638 }
4639
4640 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4641 {
4642 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4643 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4644 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4645 }
4646
4647 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4648 model. */
4649 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4650 && (TARGET_CMODEL != CMODEL_SMALL)
4651 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4652 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4653
4654 /* ISA 3.0 vector instructions include ISA 2.07. */
4655 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4656 {
4657 /* We prefer to not mention undocumented options in
4658 error messages. However, if users have managed to select
4659 power9-vector without selecting power8-vector, they
4660 already know about undocumented flags. */
4661 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4662 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4663 error ("-mpower9-vector requires -mpower8-vector");
4664 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4665 {
4666 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4667 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4668 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4669 }
4670 else
4671 {
4672 /* OPTION_MASK_P9_VECTOR is explicit and
4673 OPTION_MASK_P8_VECTOR is not explicit. */
4674 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4675 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4676 }
4677 }
4678
4679 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4680 -mpower9-dform-vector. */
4681 if (TARGET_P9_DFORM_BOTH > 0)
4682 {
4683 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4684 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4685
4686 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4687 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4688 }
4689 else if (TARGET_P9_DFORM_BOTH == 0)
4690 {
4691 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4692 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4693
4694 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4695 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4696 }
4697
4698 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4699 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4700 {
4701 /* We prefer to not mention undocumented options in
4702 error messages. However, if users have managed to select
4703 power9-dform without selecting power9-vector, they
4704 already know about undocumented flags. */
4705 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4706 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4707 | OPTION_MASK_P9_DFORM_VECTOR)))
4708 error ("-mpower9-dform requires -mpower9-vector");
4709 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4710 {
4711 rs6000_isa_flags &=
4712 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4713 rs6000_isa_flags_explicit |=
4714 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4715 }
4716 else
4717 {
4718 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4719 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4720 may be explicit. */
4721 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4722 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4723 }
4724 }
4725
4726 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4727 && !TARGET_DIRECT_MOVE)
4728 {
4729 /* We prefer to not mention undocumented options in
4730 error messages. However, if users have managed to select
4731 power9-dform without selecting direct-move, they
4732 already know about undocumented flags. */
4733 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4734 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4735 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4736 (TARGET_P9_DFORM_BOTH == 1)))
4737 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4738 " require -mdirect-move");
4739 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4740 {
4741 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4742 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4743 }
4744 else
4745 {
4746 rs6000_isa_flags &=
4747 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4748 rs6000_isa_flags_explicit |=
4749 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4750 }
4751 }
4752
4753 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4754 {
4755 /* We prefer to not mention undocumented options in
4756 error messages. However, if users have managed to select
4757 power9-dform without selecting upper-regs-df, they
4758 already know about undocumented flags. */
4759 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4760 error ("-mpower9-dform requires -mupper-regs-df");
4761 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4762 }
4763
4764 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4765 {
4766 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4767 error ("-mpower9-dform requires -mupper-regs-sf");
4768 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4769 }
4770
4771 /* Enable LRA by default. */
4772 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4773 rs6000_isa_flags |= OPTION_MASK_LRA;
4774
4775 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4776 but do show up with -mno-lra. Given -mlra will become the default once
4777 PR 69847 is fixed, turn off the options with problems by default if
4778 -mno-lra was used, and warn if the user explicitly asked for the option.
4779
4780 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4781 Enable -mvsx-timode by default if LRA and VSX. */
4782 if (!TARGET_LRA)
4783 {
4784 if (TARGET_VSX_TIMODE)
4785 {
4786 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4787 warning (0, "-mvsx-timode might need -mlra");
4788
4789 else
4790 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4791 }
4792 }
4793
4794 else
4795 {
4796 if (TARGET_VSX && !TARGET_VSX_TIMODE
4797 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4798 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4799 }
4800
4801 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4802 support. If we only have ISA 2.06 support, and the user did not specify
4803 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4804 but we don't enable the full vectorization support */
4805 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4806 TARGET_ALLOW_MOVMISALIGN = 1;
4807
4808 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4809 {
4810 if (TARGET_ALLOW_MOVMISALIGN > 0
4811 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4812 error ("-mallow-movmisalign requires -mvsx");
4813
4814 TARGET_ALLOW_MOVMISALIGN = 0;
4815 }
4816
4817 /* Determine when unaligned vector accesses are permitted, and when
4818 they are preferred over masked Altivec loads. Note that if
4819 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4820 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4821 not true. */
4822 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4823 {
4824 if (!TARGET_VSX)
4825 {
4826 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4827 error ("-mefficient-unaligned-vsx requires -mvsx");
4828
4829 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4830 }
4831
4832 else if (!TARGET_ALLOW_MOVMISALIGN)
4833 {
4834 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4835 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4836
4837 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4838 }
4839 }
4840
4841 /* Check whether we should allow small integers into VSX registers. We
4842 require direct move to prevent the register allocator from having to move
4843 variables through memory to do moves. SImode can be used on ISA 2.07,
4844 while HImode and QImode require ISA 3.0. */
4845 if (TARGET_VSX_SMALL_INTEGER
4846 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4847 {
4848 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4849 error ("-mvsx-small-integer requires -mpower8-vector, "
4850 "-mupper-regs-di, and -mdirect-move");
4851
4852 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4853 }
4854
4855 /* Set long double size before the IEEE 128-bit tests. */
4856 if (!global_options_set.x_rs6000_long_double_type_size)
4857 {
4858 if (main_target_opt != NULL
4859 && (main_target_opt->x_rs6000_long_double_type_size
4860 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4861 error ("target attribute or pragma changes long double size");
4862 else
4863 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4864 }
4865
4866 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4867 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4868 pick up this default. */
4869 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4870 if (!global_options_set.x_rs6000_ieeequad)
4871 rs6000_ieeequad = 1;
4872 #endif
4873
4874 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4875 sytems, but don't enable the __float128 keyword. */
4876 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4877 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4878 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4879 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4880
4881 /* IEEE 128-bit floating point requires VSX support. */
4882 if (!TARGET_VSX)
4883 {
4884 if (TARGET_FLOAT128_KEYWORD)
4885 {
4886 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4887 error ("-mfloat128 requires VSX support");
4888
4889 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4890 | OPTION_MASK_FLOAT128_KEYWORD
4891 | OPTION_MASK_FLOAT128_HW);
4892 }
4893
4894 else if (TARGET_FLOAT128_TYPE)
4895 {
4896 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4897 error ("-mfloat128-type requires VSX support");
4898
4899 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4900 | OPTION_MASK_FLOAT128_KEYWORD
4901 | OPTION_MASK_FLOAT128_HW);
4902 }
4903 }
4904
4905 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4906 128-bit floating point support to be enabled. */
4907 if (!TARGET_FLOAT128_TYPE)
4908 {
4909 if (TARGET_FLOAT128_KEYWORD)
4910 {
4911 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4912 {
4913 error ("-mfloat128 requires -mfloat128-type");
4914 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4915 | OPTION_MASK_FLOAT128_KEYWORD
4916 | OPTION_MASK_FLOAT128_HW);
4917 }
4918 else
4919 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4920 }
4921
4922 if (TARGET_FLOAT128_HW)
4923 {
4924 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4925 {
4926 error ("-mfloat128-hardware requires -mfloat128-type");
4927 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4928 }
4929 else
4930 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4931 | OPTION_MASK_FLOAT128_KEYWORD
4932 | OPTION_MASK_FLOAT128_HW);
4933 }
4934 }
4935
4936 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4937 -mfloat128-hardware by default. However, don't enable the __float128
4938 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4939 -mfloat128 option as well if it was not already set. */
4940 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4941 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4942 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4943 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4944
4945 if (TARGET_FLOAT128_HW
4946 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4947 {
4948 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4949 error ("-mfloat128-hardware requires full ISA 3.0 support");
4950
4951 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4952 }
4953
4954 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4955 {
4956 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4957 error ("-mfloat128-hardware requires -m64");
4958
4959 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4960 }
4961
4962 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4963 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4964 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4965 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4966
4967 /* Print the options after updating the defaults. */
4968 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4969 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4970
4971 /* E500mc does "better" if we inline more aggressively. Respect the
4972 user's opinion, though. */
4973 if (rs6000_block_move_inline_limit == 0
4974 && (rs6000_cpu == PROCESSOR_PPCE500MC
4975 || rs6000_cpu == PROCESSOR_PPCE500MC64
4976 || rs6000_cpu == PROCESSOR_PPCE5500
4977 || rs6000_cpu == PROCESSOR_PPCE6500))
4978 rs6000_block_move_inline_limit = 128;
4979
4980 /* store_one_arg depends on expand_block_move to handle at least the
4981 size of reg_parm_stack_space. */
4982 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4983 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4984
4985 if (global_init_p)
4986 {
4987 /* If the appropriate debug option is enabled, replace the target hooks
4988 with debug versions that call the real version and then prints
4989 debugging information. */
4990 if (TARGET_DEBUG_COST)
4991 {
4992 targetm.rtx_costs = rs6000_debug_rtx_costs;
4993 targetm.address_cost = rs6000_debug_address_cost;
4994 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4995 }
4996
4997 if (TARGET_DEBUG_ADDR)
4998 {
4999 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
5000 targetm.legitimize_address = rs6000_debug_legitimize_address;
5001 rs6000_secondary_reload_class_ptr
5002 = rs6000_debug_secondary_reload_class;
5003 rs6000_secondary_memory_needed_ptr
5004 = rs6000_debug_secondary_memory_needed;
5005 rs6000_cannot_change_mode_class_ptr
5006 = rs6000_debug_cannot_change_mode_class;
5007 rs6000_preferred_reload_class_ptr
5008 = rs6000_debug_preferred_reload_class;
5009 rs6000_legitimize_reload_address_ptr
5010 = rs6000_debug_legitimize_reload_address;
5011 rs6000_mode_dependent_address_ptr
5012 = rs6000_debug_mode_dependent_address;
5013 }
5014
5015 if (rs6000_veclibabi_name)
5016 {
5017 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
5018 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
5019 else
5020 {
5021 error ("unknown vectorization library ABI type (%s) for "
5022 "-mveclibabi= switch", rs6000_veclibabi_name);
5023 ret = false;
5024 }
5025 }
5026 }
5027
5028 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
5029 target attribute or pragma which automatically enables both options,
5030 unless the altivec ABI was set. This is set by default for 64-bit, but
5031 not for 32-bit. */
5032 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5033 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
5034 | OPTION_MASK_FLOAT128_TYPE
5035 | OPTION_MASK_FLOAT128_KEYWORD)
5036 & ~rs6000_isa_flags_explicit);
5037
5038 /* Enable Altivec ABI for AIX -maltivec. */
5039 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
5040 {
5041 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
5042 error ("target attribute or pragma changes AltiVec ABI");
5043 else
5044 rs6000_altivec_abi = 1;
5045 }
5046
5047 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
5048 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
5049 be explicitly overridden in either case. */
5050 if (TARGET_ELF)
5051 {
5052 if (!global_options_set.x_rs6000_altivec_abi
5053 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
5054 {
5055 if (main_target_opt != NULL &&
5056 !main_target_opt->x_rs6000_altivec_abi)
5057 error ("target attribute or pragma changes AltiVec ABI");
5058 else
5059 rs6000_altivec_abi = 1;
5060 }
5061 }
5062
5063 /* Set the Darwin64 ABI as default for 64-bit Darwin.
5064 So far, the only darwin64 targets are also MACH-O. */
5065 if (TARGET_MACHO
5066 && DEFAULT_ABI == ABI_DARWIN
5067 && TARGET_64BIT)
5068 {
5069 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
5070 error ("target attribute or pragma changes darwin64 ABI");
5071 else
5072 {
5073 rs6000_darwin64_abi = 1;
5074 /* Default to natural alignment, for better performance. */
5075 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
5076 }
5077 }
5078
5079 /* Place FP constants in the constant pool instead of TOC
5080 if section anchors enabled. */
5081 if (flag_section_anchors
5082 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
5083 TARGET_NO_FP_IN_TOC = 1;
5084
5085 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5086 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
5087
5088 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5089 SUBTARGET_OVERRIDE_OPTIONS;
5090 #endif
5091 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5092 SUBSUBTARGET_OVERRIDE_OPTIONS;
5093 #endif
5094 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
5095 SUB3TARGET_OVERRIDE_OPTIONS;
5096 #endif
5097
5098 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
5099 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
5100
5101 /* For the E500 family of cores, reset the single/double FP flags to let us
5102 check that they remain constant across attributes or pragmas. Also,
5103 clear a possible request for string instructions, not supported and which
5104 we might have silently queried above for -Os.
5105
5106 For other families, clear ISEL in case it was set implicitly.
5107 */
5108
5109 switch (rs6000_cpu)
5110 {
5111 case PROCESSOR_PPC8540:
5112 case PROCESSOR_PPC8548:
5113 case PROCESSOR_PPCE500MC:
5114 case PROCESSOR_PPCE500MC64:
5115 case PROCESSOR_PPCE5500:
5116 case PROCESSOR_PPCE6500:
5117
5118 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
5119 rs6000_double_float = TARGET_E500_DOUBLE;
5120
5121 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5122
5123 break;
5124
5125 default:
5126
5127 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5128 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5129
5130 break;
5131 }
5132
5133 if (main_target_opt)
5134 {
5135 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5136 error ("target attribute or pragma changes single precision floating "
5137 "point");
5138 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5139 error ("target attribute or pragma changes double precision floating "
5140 "point");
5141 }
5142
5143 /* Detect invalid option combinations with E500. */
5144 CHECK_E500_OPTIONS;
5145
5146 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5147 && rs6000_cpu != PROCESSOR_POWER5
5148 && rs6000_cpu != PROCESSOR_POWER6
5149 && rs6000_cpu != PROCESSOR_POWER7
5150 && rs6000_cpu != PROCESSOR_POWER8
5151 && rs6000_cpu != PROCESSOR_POWER9
5152 && rs6000_cpu != PROCESSOR_PPCA2
5153 && rs6000_cpu != PROCESSOR_CELL
5154 && rs6000_cpu != PROCESSOR_PPC476);
5155 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5156 || rs6000_cpu == PROCESSOR_POWER5
5157 || rs6000_cpu == PROCESSOR_POWER7
5158 || rs6000_cpu == PROCESSOR_POWER8);
5159 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5160 || rs6000_cpu == PROCESSOR_POWER5
5161 || rs6000_cpu == PROCESSOR_POWER6
5162 || rs6000_cpu == PROCESSOR_POWER7
5163 || rs6000_cpu == PROCESSOR_POWER8
5164 || rs6000_cpu == PROCESSOR_POWER9
5165 || rs6000_cpu == PROCESSOR_PPCE500MC
5166 || rs6000_cpu == PROCESSOR_PPCE500MC64
5167 || rs6000_cpu == PROCESSOR_PPCE5500
5168 || rs6000_cpu == PROCESSOR_PPCE6500);
5169
5170 /* Allow debug switches to override the above settings. These are set to -1
5171 in powerpcspe.opt to indicate the user hasn't directly set the switch. */
5172 if (TARGET_ALWAYS_HINT >= 0)
5173 rs6000_always_hint = TARGET_ALWAYS_HINT;
5174
5175 if (TARGET_SCHED_GROUPS >= 0)
5176 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5177
5178 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5179 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5180
5181 rs6000_sched_restricted_insns_priority
5182 = (rs6000_sched_groups ? 1 : 0);
5183
5184 /* Handle -msched-costly-dep option. */
5185 rs6000_sched_costly_dep
5186 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5187
5188 if (rs6000_sched_costly_dep_str)
5189 {
5190 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5191 rs6000_sched_costly_dep = no_dep_costly;
5192 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5193 rs6000_sched_costly_dep = all_deps_costly;
5194 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5195 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5196 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5197 rs6000_sched_costly_dep = store_to_load_dep_costly;
5198 else
5199 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5200 atoi (rs6000_sched_costly_dep_str));
5201 }
5202
5203 /* Handle -minsert-sched-nops option. */
5204 rs6000_sched_insert_nops
5205 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5206
5207 if (rs6000_sched_insert_nops_str)
5208 {
5209 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5210 rs6000_sched_insert_nops = sched_finish_none;
5211 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5212 rs6000_sched_insert_nops = sched_finish_pad_groups;
5213 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5214 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5215 else
5216 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5217 atoi (rs6000_sched_insert_nops_str));
5218 }
5219
5220 /* Handle stack protector */
5221 if (!global_options_set.x_rs6000_stack_protector_guard)
5222 #ifdef TARGET_THREAD_SSP_OFFSET
5223 rs6000_stack_protector_guard = SSP_TLS;
5224 #else
5225 rs6000_stack_protector_guard = SSP_GLOBAL;
5226 #endif
5227
5228 #ifdef TARGET_THREAD_SSP_OFFSET
5229 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5230 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5231 #endif
5232
5233 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5234 {
5235 char *endp;
5236 const char *str = rs6000_stack_protector_guard_offset_str;
5237
5238 errno = 0;
5239 long offset = strtol (str, &endp, 0);
5240 if (!*str || *endp || errno)
5241 error ("%qs is not a valid number "
5242 "in -mstack-protector-guard-offset=", str);
5243
5244 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5245 || (TARGET_64BIT && (offset & 3)))
5246 error ("%qs is not a valid offset "
5247 "in -mstack-protector-guard-offset=", str);
5248
5249 rs6000_stack_protector_guard_offset = offset;
5250 }
5251
5252 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5253 {
5254 const char *str = rs6000_stack_protector_guard_reg_str;
5255 int reg = decode_reg_name (str);
5256
5257 if (!IN_RANGE (reg, 1, 31))
5258 error ("%qs is not a valid base register "
5259 "in -mstack-protector-guard-reg=", str);
5260
5261 rs6000_stack_protector_guard_reg = reg;
5262 }
5263
5264 if (rs6000_stack_protector_guard == SSP_TLS
5265 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5266 error ("-mstack-protector-guard=tls needs a valid base register");
5267
5268 if (global_init_p)
5269 {
5270 #ifdef TARGET_REGNAMES
5271 /* If the user desires alternate register names, copy in the
5272 alternate names now. */
5273 if (TARGET_REGNAMES)
5274 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5275 #endif
5276
5277 /* Set aix_struct_return last, after the ABI is determined.
5278 If -maix-struct-return or -msvr4-struct-return was explicitly
5279 used, don't override with the ABI default. */
5280 if (!global_options_set.x_aix_struct_return)
5281 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5282
5283 #if 0
5284 /* IBM XL compiler defaults to unsigned bitfields. */
5285 if (TARGET_XL_COMPAT)
5286 flag_signed_bitfields = 0;
5287 #endif
5288
5289 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5290 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5291
5292 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5293
5294 /* We can only guarantee the availability of DI pseudo-ops when
5295 assembling for 64-bit targets. */
5296 if (!TARGET_64BIT)
5297 {
5298 targetm.asm_out.aligned_op.di = NULL;
5299 targetm.asm_out.unaligned_op.di = NULL;
5300 }
5301
5302
5303 /* Set branch target alignment, if not optimizing for size. */
5304 if (!optimize_size)
5305 {
5306 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5307 aligned 8byte to avoid misprediction by the branch predictor. */
5308 if (rs6000_cpu == PROCESSOR_TITAN
5309 || rs6000_cpu == PROCESSOR_CELL)
5310 {
5311 if (align_functions <= 0)
5312 align_functions = 8;
5313 if (align_jumps <= 0)
5314 align_jumps = 8;
5315 if (align_loops <= 0)
5316 align_loops = 8;
5317 }
5318 if (rs6000_align_branch_targets)
5319 {
5320 if (align_functions <= 0)
5321 align_functions = 16;
5322 if (align_jumps <= 0)
5323 align_jumps = 16;
5324 if (align_loops <= 0)
5325 {
5326 can_override_loop_align = 1;
5327 align_loops = 16;
5328 }
5329 }
5330 if (align_jumps_max_skip <= 0)
5331 align_jumps_max_skip = 15;
5332 if (align_loops_max_skip <= 0)
5333 align_loops_max_skip = 15;
5334 }
5335
5336 /* Arrange to save and restore machine status around nested functions. */
5337 init_machine_status = rs6000_init_machine_status;
5338
5339 /* We should always be splitting complex arguments, but we can't break
5340 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5341 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5342 targetm.calls.split_complex_arg = NULL;
5343
5344 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5345 if (DEFAULT_ABI == ABI_AIX)
5346 targetm.calls.custom_function_descriptors = 0;
5347 }
5348
5349 /* Initialize rs6000_cost with the appropriate target costs. */
5350 if (optimize_size)
5351 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5352 else
5353 switch (rs6000_cpu)
5354 {
5355 case PROCESSOR_RS64A:
5356 rs6000_cost = &rs64a_cost;
5357 break;
5358
5359 case PROCESSOR_MPCCORE:
5360 rs6000_cost = &mpccore_cost;
5361 break;
5362
5363 case PROCESSOR_PPC403:
5364 rs6000_cost = &ppc403_cost;
5365 break;
5366
5367 case PROCESSOR_PPC405:
5368 rs6000_cost = &ppc405_cost;
5369 break;
5370
5371 case PROCESSOR_PPC440:
5372 rs6000_cost = &ppc440_cost;
5373 break;
5374
5375 case PROCESSOR_PPC476:
5376 rs6000_cost = &ppc476_cost;
5377 break;
5378
5379 case PROCESSOR_PPC601:
5380 rs6000_cost = &ppc601_cost;
5381 break;
5382
5383 case PROCESSOR_PPC603:
5384 rs6000_cost = &ppc603_cost;
5385 break;
5386
5387 case PROCESSOR_PPC604:
5388 rs6000_cost = &ppc604_cost;
5389 break;
5390
5391 case PROCESSOR_PPC604e:
5392 rs6000_cost = &ppc604e_cost;
5393 break;
5394
5395 case PROCESSOR_PPC620:
5396 rs6000_cost = &ppc620_cost;
5397 break;
5398
5399 case PROCESSOR_PPC630:
5400 rs6000_cost = &ppc630_cost;
5401 break;
5402
5403 case PROCESSOR_CELL:
5404 rs6000_cost = &ppccell_cost;
5405 break;
5406
5407 case PROCESSOR_PPC750:
5408 case PROCESSOR_PPC7400:
5409 rs6000_cost = &ppc750_cost;
5410 break;
5411
5412 case PROCESSOR_PPC7450:
5413 rs6000_cost = &ppc7450_cost;
5414 break;
5415
5416 case PROCESSOR_PPC8540:
5417 case PROCESSOR_PPC8548:
5418 rs6000_cost = &ppc8540_cost;
5419 break;
5420
5421 case PROCESSOR_PPCE300C2:
5422 case PROCESSOR_PPCE300C3:
5423 rs6000_cost = &ppce300c2c3_cost;
5424 break;
5425
5426 case PROCESSOR_PPCE500MC:
5427 rs6000_cost = &ppce500mc_cost;
5428 break;
5429
5430 case PROCESSOR_PPCE500MC64:
5431 rs6000_cost = &ppce500mc64_cost;
5432 break;
5433
5434 case PROCESSOR_PPCE5500:
5435 rs6000_cost = &ppce5500_cost;
5436 break;
5437
5438 case PROCESSOR_PPCE6500:
5439 rs6000_cost = &ppce6500_cost;
5440 break;
5441
5442 case PROCESSOR_TITAN:
5443 rs6000_cost = &titan_cost;
5444 break;
5445
5446 case PROCESSOR_POWER4:
5447 case PROCESSOR_POWER5:
5448 rs6000_cost = &power4_cost;
5449 break;
5450
5451 case PROCESSOR_POWER6:
5452 rs6000_cost = &power6_cost;
5453 break;
5454
5455 case PROCESSOR_POWER7:
5456 rs6000_cost = &power7_cost;
5457 break;
5458
5459 case PROCESSOR_POWER8:
5460 rs6000_cost = &power8_cost;
5461 break;
5462
5463 case PROCESSOR_POWER9:
5464 rs6000_cost = &power9_cost;
5465 break;
5466
5467 case PROCESSOR_PPCA2:
5468 rs6000_cost = &ppca2_cost;
5469 break;
5470
5471 default:
5472 gcc_unreachable ();
5473 }
5474
5475 if (global_init_p)
5476 {
5477 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5478 rs6000_cost->simultaneous_prefetches,
5479 global_options.x_param_values,
5480 global_options_set.x_param_values);
5481 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5482 global_options.x_param_values,
5483 global_options_set.x_param_values);
5484 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5485 rs6000_cost->cache_line_size,
5486 global_options.x_param_values,
5487 global_options_set.x_param_values);
5488 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5489 global_options.x_param_values,
5490 global_options_set.x_param_values);
5491
5492 /* Increase loop peeling limits based on performance analysis. */
5493 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5494 global_options.x_param_values,
5495 global_options_set.x_param_values);
5496 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5497 global_options.x_param_values,
5498 global_options_set.x_param_values);
5499
5500 /* Use the 'model' -fsched-pressure algorithm by default. */
5501 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5502 SCHED_PRESSURE_MODEL,
5503 global_options.x_param_values,
5504 global_options_set.x_param_values);
5505
5506 /* If using typedef char *va_list, signal that
5507 __builtin_va_start (&ap, 0) can be optimized to
5508 ap = __builtin_next_arg (0). */
5509 if (DEFAULT_ABI != ABI_V4)
5510 targetm.expand_builtin_va_start = NULL;
5511 }
5512
5513 /* Set up single/double float flags.
5514 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5515 then set both flags. */
5516 if (TARGET_HARD_FLOAT && TARGET_FPRS
5517 && rs6000_single_float == 0 && rs6000_double_float == 0)
5518 rs6000_single_float = rs6000_double_float = 1;
5519
5520 /* If not explicitly specified via option, decide whether to generate indexed
5521 load/store instructions. A value of -1 indicates that the
5522 initial value of this variable has not been overwritten. During
5523 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5524 if (TARGET_AVOID_XFORM == -1)
5525 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5526 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5527 need indexed accesses and the type used is the scalar type of the element
5528 being loaded or stored. */
5529 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5530 && !TARGET_ALTIVEC);
5531
5532 /* Set the -mrecip options. */
5533 if (rs6000_recip_name)
5534 {
5535 char *p = ASTRDUP (rs6000_recip_name);
5536 char *q;
5537 unsigned int mask, i;
5538 bool invert;
5539
5540 while ((q = strtok (p, ",")) != NULL)
5541 {
5542 p = NULL;
5543 if (*q == '!')
5544 {
5545 invert = true;
5546 q++;
5547 }
5548 else
5549 invert = false;
5550
5551 if (!strcmp (q, "default"))
5552 mask = ((TARGET_RECIP_PRECISION)
5553 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5554 else
5555 {
5556 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5557 if (!strcmp (q, recip_options[i].string))
5558 {
5559 mask = recip_options[i].mask;
5560 break;
5561 }
5562
5563 if (i == ARRAY_SIZE (recip_options))
5564 {
5565 error ("unknown option for -mrecip=%s", q);
5566 invert = false;
5567 mask = 0;
5568 ret = false;
5569 }
5570 }
5571
5572 if (invert)
5573 rs6000_recip_control &= ~mask;
5574 else
5575 rs6000_recip_control |= mask;
5576 }
5577 }
5578
5579 /* Set the builtin mask of the various options used that could affect which
5580 builtins were used. In the past we used target_flags, but we've run out
5581 of bits, and some options like SPE and PAIRED are no longer in
5582 target_flags. */
5583 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5584 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5585 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5586 rs6000_builtin_mask);
5587
5588 /* Initialize all of the registers. */
5589 rs6000_init_hard_regno_mode_ok (global_init_p);
5590
5591 /* Save the initial options in case the user does function specific options */
5592 if (global_init_p)
5593 target_option_default_node = target_option_current_node
5594 = build_target_option_node (&global_options);
5595
5596 /* If not explicitly specified via option, decide whether to generate the
5597 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5598 if (TARGET_LINK_STACK == -1)
5599 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5600
5601 return ret;
5602 }
5603
5604 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5605 define the target cpu type. */
5606
5607 static void
5608 rs6000_option_override (void)
5609 {
5610 (void) rs6000_option_override_internal (true);
5611 }
5612
5613 \f
5614 /* Implement targetm.vectorize.builtin_mask_for_load. */
5615 static tree
5616 rs6000_builtin_mask_for_load (void)
5617 {
5618 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5619 if ((TARGET_ALTIVEC && !TARGET_VSX)
5620 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5621 return altivec_builtin_mask_for_load;
5622 else
5623 return 0;
5624 }
5625
5626 /* Implement LOOP_ALIGN. */
5627 int
5628 rs6000_loop_align (rtx label)
5629 {
5630 basic_block bb;
5631 int ninsns;
5632
5633 /* Don't override loop alignment if -falign-loops was specified. */
5634 if (!can_override_loop_align)
5635 return align_loops_log;
5636
5637 bb = BLOCK_FOR_INSN (label);
5638 ninsns = num_loop_insns(bb->loop_father);
5639
5640 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5641 if (ninsns > 4 && ninsns <= 8
5642 && (rs6000_cpu == PROCESSOR_POWER4
5643 || rs6000_cpu == PROCESSOR_POWER5
5644 || rs6000_cpu == PROCESSOR_POWER6
5645 || rs6000_cpu == PROCESSOR_POWER7
5646 || rs6000_cpu == PROCESSOR_POWER8
5647 || rs6000_cpu == PROCESSOR_POWER9))
5648 return 5;
5649 else
5650 return align_loops_log;
5651 }
5652
5653 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5654 static int
5655 rs6000_loop_align_max_skip (rtx_insn *label)
5656 {
5657 return (1 << rs6000_loop_align (label)) - 1;
5658 }
5659
5660 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5661 after applying N number of iterations. This routine does not determine
5662 how may iterations are required to reach desired alignment. */
5663
5664 static bool
5665 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5666 {
5667 if (is_packed)
5668 return false;
5669
5670 if (TARGET_32BIT)
5671 {
5672 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5673 return true;
5674
5675 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5676 return true;
5677
5678 return false;
5679 }
5680 else
5681 {
5682 if (TARGET_MACHO)
5683 return false;
5684
5685 /* Assuming that all other types are naturally aligned. CHECKME! */
5686 return true;
5687 }
5688 }
5689
5690 /* Return true if the vector misalignment factor is supported by the
5691 target. */
5692 static bool
5693 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5694 const_tree type,
5695 int misalignment,
5696 bool is_packed)
5697 {
5698 if (TARGET_VSX)
5699 {
5700 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5701 return true;
5702
5703 /* Return if movmisalign pattern is not supported for this mode. */
5704 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5705 return false;
5706
5707 if (misalignment == -1)
5708 {
5709 /* Misalignment factor is unknown at compile time but we know
5710 it's word aligned. */
5711 if (rs6000_vector_alignment_reachable (type, is_packed))
5712 {
5713 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5714
5715 if (element_size == 64 || element_size == 32)
5716 return true;
5717 }
5718
5719 return false;
5720 }
5721
5722 /* VSX supports word-aligned vector. */
5723 if (misalignment % 4 == 0)
5724 return true;
5725 }
5726 return false;
5727 }
5728
5729 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5730 static int
5731 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5732 tree vectype, int misalign)
5733 {
5734 unsigned elements;
5735 tree elem_type;
5736
5737 switch (type_of_cost)
5738 {
5739 case scalar_stmt:
5740 case scalar_load:
5741 case scalar_store:
5742 case vector_stmt:
5743 case vector_load:
5744 case vector_store:
5745 case vec_to_scalar:
5746 case scalar_to_vec:
5747 case cond_branch_not_taken:
5748 return 1;
5749
5750 case vec_perm:
5751 if (TARGET_VSX)
5752 return 3;
5753 else
5754 return 1;
5755
5756 case vec_promote_demote:
5757 if (TARGET_VSX)
5758 return 4;
5759 else
5760 return 1;
5761
5762 case cond_branch_taken:
5763 return 3;
5764
5765 case unaligned_load:
5766 if (TARGET_P9_VECTOR)
5767 return 3;
5768
5769 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5770 return 1;
5771
5772 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5773 {
5774 elements = TYPE_VECTOR_SUBPARTS (vectype);
5775 if (elements == 2)
5776 /* Double word aligned. */
5777 return 2;
5778
5779 if (elements == 4)
5780 {
5781 switch (misalign)
5782 {
5783 case 8:
5784 /* Double word aligned. */
5785 return 2;
5786
5787 case -1:
5788 /* Unknown misalignment. */
5789 case 4:
5790 case 12:
5791 /* Word aligned. */
5792 return 22;
5793
5794 default:
5795 gcc_unreachable ();
5796 }
5797 }
5798 }
5799
5800 if (TARGET_ALTIVEC)
5801 /* Misaligned loads are not supported. */
5802 gcc_unreachable ();
5803
5804 return 2;
5805
5806 case unaligned_store:
5807 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5808 return 1;
5809
5810 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5811 {
5812 elements = TYPE_VECTOR_SUBPARTS (vectype);
5813 if (elements == 2)
5814 /* Double word aligned. */
5815 return 2;
5816
5817 if (elements == 4)
5818 {
5819 switch (misalign)
5820 {
5821 case 8:
5822 /* Double word aligned. */
5823 return 2;
5824
5825 case -1:
5826 /* Unknown misalignment. */
5827 case 4:
5828 case 12:
5829 /* Word aligned. */
5830 return 23;
5831
5832 default:
5833 gcc_unreachable ();
5834 }
5835 }
5836 }
5837
5838 if (TARGET_ALTIVEC)
5839 /* Misaligned stores are not supported. */
5840 gcc_unreachable ();
5841
5842 return 2;
5843
5844 case vec_construct:
5845 /* This is a rough approximation assuming non-constant elements
5846 constructed into a vector via element insertion. FIXME:
5847 vec_construct is not granular enough for uniformly good
5848 decisions. If the initialization is a splat, this is
5849 cheaper than we estimate. Improve this someday. */
5850 elem_type = TREE_TYPE (vectype);
5851 /* 32-bit vectors loaded into registers are stored as double
5852 precision, so we need 2 permutes, 2 converts, and 1 merge
5853 to construct a vector of short floats from them. */
5854 if (SCALAR_FLOAT_TYPE_P (elem_type)
5855 && TYPE_PRECISION (elem_type) == 32)
5856 return 5;
5857 /* On POWER9, integer vector types are built up in GPRs and then
5858 use a direct move (2 cycles). For POWER8 this is even worse,
5859 as we need two direct moves and a merge, and the direct moves
5860 are five cycles. */
5861 else if (INTEGRAL_TYPE_P (elem_type))
5862 {
5863 if (TARGET_P9_VECTOR)
5864 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5865 else
5866 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5867 }
5868 else
5869 /* V2DFmode doesn't need a direct move. */
5870 return 2;
5871
5872 default:
5873 gcc_unreachable ();
5874 }
5875 }
5876
5877 /* Implement targetm.vectorize.preferred_simd_mode. */
5878
5879 static machine_mode
5880 rs6000_preferred_simd_mode (scalar_mode mode)
5881 {
5882 if (TARGET_VSX)
5883 switch (mode)
5884 {
5885 case E_DFmode:
5886 return V2DFmode;
5887 default:;
5888 }
5889 if (TARGET_ALTIVEC || TARGET_VSX)
5890 switch (mode)
5891 {
5892 case E_SFmode:
5893 return V4SFmode;
5894 case E_TImode:
5895 return V1TImode;
5896 case E_DImode:
5897 return V2DImode;
5898 case E_SImode:
5899 return V4SImode;
5900 case E_HImode:
5901 return V8HImode;
5902 case E_QImode:
5903 return V16QImode;
5904 default:;
5905 }
5906 if (TARGET_SPE)
5907 switch (mode)
5908 {
5909 case E_SFmode:
5910 return V2SFmode;
5911 case E_SImode:
5912 return V2SImode;
5913 default:;
5914 }
5915 if (TARGET_PAIRED_FLOAT
5916 && mode == SFmode)
5917 return V2SFmode;
5918 return word_mode;
5919 }
5920
5921 typedef struct _rs6000_cost_data
5922 {
5923 struct loop *loop_info;
5924 unsigned cost[3];
5925 } rs6000_cost_data;
5926
5927 /* Test for likely overcommitment of vector hardware resources. If a
5928 loop iteration is relatively large, and too large a percentage of
5929 instructions in the loop are vectorized, the cost model may not
5930 adequately reflect delays from unavailable vector resources.
5931 Penalize the loop body cost for this case. */
5932
5933 static void
5934 rs6000_density_test (rs6000_cost_data *data)
5935 {
5936 const int DENSITY_PCT_THRESHOLD = 85;
5937 const int DENSITY_SIZE_THRESHOLD = 70;
5938 const int DENSITY_PENALTY = 10;
5939 struct loop *loop = data->loop_info;
5940 basic_block *bbs = get_loop_body (loop);
5941 int nbbs = loop->num_nodes;
5942 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5943 int i, density_pct;
5944
5945 for (i = 0; i < nbbs; i++)
5946 {
5947 basic_block bb = bbs[i];
5948 gimple_stmt_iterator gsi;
5949
5950 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5951 {
5952 gimple *stmt = gsi_stmt (gsi);
5953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5954
5955 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5956 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5957 not_vec_cost++;
5958 }
5959 }
5960
5961 free (bbs);
5962 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5963
5964 if (density_pct > DENSITY_PCT_THRESHOLD
5965 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5966 {
5967 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_NOTE, vect_location,
5970 "density %d%%, cost %d exceeds threshold, penalizing "
5971 "loop body cost by %d%%", density_pct,
5972 vec_cost + not_vec_cost, DENSITY_PENALTY);
5973 }
5974 }
5975
5976 /* Implement targetm.vectorize.init_cost. */
5977
5978 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5979 instruction is needed by the vectorization. */
5980 static bool rs6000_vect_nonmem;
5981
5982 static void *
5983 rs6000_init_cost (struct loop *loop_info)
5984 {
5985 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5986 data->loop_info = loop_info;
5987 data->cost[vect_prologue] = 0;
5988 data->cost[vect_body] = 0;
5989 data->cost[vect_epilogue] = 0;
5990 rs6000_vect_nonmem = false;
5991 return data;
5992 }
5993
5994 /* Implement targetm.vectorize.add_stmt_cost. */
5995
5996 static unsigned
5997 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5998 struct _stmt_vec_info *stmt_info, int misalign,
5999 enum vect_cost_model_location where)
6000 {
6001 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6002 unsigned retval = 0;
6003
6004 if (flag_vect_cost_model)
6005 {
6006 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6007 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
6008 misalign);
6009 /* Statements in an inner loop relative to the loop being
6010 vectorized are weighted more heavily. The value here is
6011 arbitrary and could potentially be improved with analysis. */
6012 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6013 count *= 50; /* FIXME. */
6014
6015 retval = (unsigned) (count * stmt_cost);
6016 cost_data->cost[where] += retval;
6017
6018 /* Check whether we're doing something other than just a copy loop.
6019 Not all such loops may be profitably vectorized; see
6020 rs6000_finish_cost. */
6021 if ((kind == vec_to_scalar || kind == vec_perm
6022 || kind == vec_promote_demote || kind == vec_construct
6023 || kind == scalar_to_vec)
6024 || (where == vect_body && kind == vector_stmt))
6025 rs6000_vect_nonmem = true;
6026 }
6027
6028 return retval;
6029 }
6030
6031 /* Implement targetm.vectorize.finish_cost. */
6032
6033 static void
6034 rs6000_finish_cost (void *data, unsigned *prologue_cost,
6035 unsigned *body_cost, unsigned *epilogue_cost)
6036 {
6037 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
6038
6039 if (cost_data->loop_info)
6040 rs6000_density_test (cost_data);
6041
6042 /* Don't vectorize minimum-vectorization-factor, simple copy loops
6043 that require versioning for any reason. The vectorization is at
6044 best a wash inside the loop, and the versioning checks make
6045 profitability highly unlikely and potentially quite harmful. */
6046 if (cost_data->loop_info)
6047 {
6048 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
6049 if (!rs6000_vect_nonmem
6050 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
6051 && LOOP_REQUIRES_VERSIONING (vec_info))
6052 cost_data->cost[vect_body] += 10000;
6053 }
6054
6055 *prologue_cost = cost_data->cost[vect_prologue];
6056 *body_cost = cost_data->cost[vect_body];
6057 *epilogue_cost = cost_data->cost[vect_epilogue];
6058 }
6059
6060 /* Implement targetm.vectorize.destroy_cost_data. */
6061
6062 static void
6063 rs6000_destroy_cost_data (void *data)
6064 {
6065 free (data);
6066 }
6067
6068 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
6069 library with vectorized intrinsics. */
6070
6071 static tree
6072 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
6073 tree type_in)
6074 {
6075 char name[32];
6076 const char *suffix = NULL;
6077 tree fntype, new_fndecl, bdecl = NULL_TREE;
6078 int n_args = 1;
6079 const char *bname;
6080 machine_mode el_mode, in_mode;
6081 int n, in_n;
6082
6083 /* Libmass is suitable for unsafe math only as it does not correctly support
6084 parts of IEEE with the required precision such as denormals. Only support
6085 it if we have VSX to use the simd d2 or f4 functions.
6086 XXX: Add variable length support. */
6087 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
6088 return NULL_TREE;
6089
6090 el_mode = TYPE_MODE (TREE_TYPE (type_out));
6091 n = TYPE_VECTOR_SUBPARTS (type_out);
6092 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6093 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6094 if (el_mode != in_mode
6095 || n != in_n)
6096 return NULL_TREE;
6097
6098 switch (fn)
6099 {
6100 CASE_CFN_ATAN2:
6101 CASE_CFN_HYPOT:
6102 CASE_CFN_POW:
6103 n_args = 2;
6104 gcc_fallthrough ();
6105
6106 CASE_CFN_ACOS:
6107 CASE_CFN_ACOSH:
6108 CASE_CFN_ASIN:
6109 CASE_CFN_ASINH:
6110 CASE_CFN_ATAN:
6111 CASE_CFN_ATANH:
6112 CASE_CFN_CBRT:
6113 CASE_CFN_COS:
6114 CASE_CFN_COSH:
6115 CASE_CFN_ERF:
6116 CASE_CFN_ERFC:
6117 CASE_CFN_EXP2:
6118 CASE_CFN_EXP:
6119 CASE_CFN_EXPM1:
6120 CASE_CFN_LGAMMA:
6121 CASE_CFN_LOG10:
6122 CASE_CFN_LOG1P:
6123 CASE_CFN_LOG2:
6124 CASE_CFN_LOG:
6125 CASE_CFN_SIN:
6126 CASE_CFN_SINH:
6127 CASE_CFN_SQRT:
6128 CASE_CFN_TAN:
6129 CASE_CFN_TANH:
6130 if (el_mode == DFmode && n == 2)
6131 {
6132 bdecl = mathfn_built_in (double_type_node, fn);
6133 suffix = "d2"; /* pow -> powd2 */
6134 }
6135 else if (el_mode == SFmode && n == 4)
6136 {
6137 bdecl = mathfn_built_in (float_type_node, fn);
6138 suffix = "4"; /* powf -> powf4 */
6139 }
6140 else
6141 return NULL_TREE;
6142 if (!bdecl)
6143 return NULL_TREE;
6144 break;
6145
6146 default:
6147 return NULL_TREE;
6148 }
6149
6150 gcc_assert (suffix != NULL);
6151 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6152 if (!bname)
6153 return NULL_TREE;
6154
6155 strcpy (name, bname + sizeof ("__builtin_") - 1);
6156 strcat (name, suffix);
6157
6158 if (n_args == 1)
6159 fntype = build_function_type_list (type_out, type_in, NULL);
6160 else if (n_args == 2)
6161 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6162 else
6163 gcc_unreachable ();
6164
6165 /* Build a function declaration for the vectorized function. */
6166 new_fndecl = build_decl (BUILTINS_LOCATION,
6167 FUNCTION_DECL, get_identifier (name), fntype);
6168 TREE_PUBLIC (new_fndecl) = 1;
6169 DECL_EXTERNAL (new_fndecl) = 1;
6170 DECL_IS_NOVOPS (new_fndecl) = 1;
6171 TREE_READONLY (new_fndecl) = 1;
6172
6173 return new_fndecl;
6174 }
6175
6176 /* Returns a function decl for a vectorized version of the builtin function
6177 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6178 if it is not available. */
6179
6180 static tree
6181 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6182 tree type_in)
6183 {
6184 machine_mode in_mode, out_mode;
6185 int in_n, out_n;
6186
6187 if (TARGET_DEBUG_BUILTIN)
6188 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6189 combined_fn_name (combined_fn (fn)),
6190 GET_MODE_NAME (TYPE_MODE (type_out)),
6191 GET_MODE_NAME (TYPE_MODE (type_in)));
6192
6193 if (TREE_CODE (type_out) != VECTOR_TYPE
6194 || TREE_CODE (type_in) != VECTOR_TYPE
6195 || !TARGET_VECTORIZE_BUILTINS)
6196 return NULL_TREE;
6197
6198 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6199 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6200 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6201 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6202
6203 switch (fn)
6204 {
6205 CASE_CFN_COPYSIGN:
6206 if (VECTOR_UNIT_VSX_P (V2DFmode)
6207 && out_mode == DFmode && out_n == 2
6208 && in_mode == DFmode && in_n == 2)
6209 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6210 if (VECTOR_UNIT_VSX_P (V4SFmode)
6211 && out_mode == SFmode && out_n == 4
6212 && in_mode == SFmode && in_n == 4)
6213 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6214 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6215 && out_mode == SFmode && out_n == 4
6216 && in_mode == SFmode && in_n == 4)
6217 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6218 break;
6219 CASE_CFN_CEIL:
6220 if (VECTOR_UNIT_VSX_P (V2DFmode)
6221 && out_mode == DFmode && out_n == 2
6222 && in_mode == DFmode && in_n == 2)
6223 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6224 if (VECTOR_UNIT_VSX_P (V4SFmode)
6225 && out_mode == SFmode && out_n == 4
6226 && in_mode == SFmode && in_n == 4)
6227 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6228 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6229 && out_mode == SFmode && out_n == 4
6230 && in_mode == SFmode && in_n == 4)
6231 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6232 break;
6233 CASE_CFN_FLOOR:
6234 if (VECTOR_UNIT_VSX_P (V2DFmode)
6235 && out_mode == DFmode && out_n == 2
6236 && in_mode == DFmode && in_n == 2)
6237 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6238 if (VECTOR_UNIT_VSX_P (V4SFmode)
6239 && out_mode == SFmode && out_n == 4
6240 && in_mode == SFmode && in_n == 4)
6241 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6242 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6243 && out_mode == SFmode && out_n == 4
6244 && in_mode == SFmode && in_n == 4)
6245 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6246 break;
6247 CASE_CFN_FMA:
6248 if (VECTOR_UNIT_VSX_P (V2DFmode)
6249 && out_mode == DFmode && out_n == 2
6250 && in_mode == DFmode && in_n == 2)
6251 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6252 if (VECTOR_UNIT_VSX_P (V4SFmode)
6253 && out_mode == SFmode && out_n == 4
6254 && in_mode == SFmode && in_n == 4)
6255 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6256 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6257 && out_mode == SFmode && out_n == 4
6258 && in_mode == SFmode && in_n == 4)
6259 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6260 break;
6261 CASE_CFN_TRUNC:
6262 if (VECTOR_UNIT_VSX_P (V2DFmode)
6263 && out_mode == DFmode && out_n == 2
6264 && in_mode == DFmode && in_n == 2)
6265 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6266 if (VECTOR_UNIT_VSX_P (V4SFmode)
6267 && out_mode == SFmode && out_n == 4
6268 && in_mode == SFmode && in_n == 4)
6269 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6270 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6271 && out_mode == SFmode && out_n == 4
6272 && in_mode == SFmode && in_n == 4)
6273 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6274 break;
6275 CASE_CFN_NEARBYINT:
6276 if (VECTOR_UNIT_VSX_P (V2DFmode)
6277 && flag_unsafe_math_optimizations
6278 && out_mode == DFmode && out_n == 2
6279 && in_mode == DFmode && in_n == 2)
6280 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6281 if (VECTOR_UNIT_VSX_P (V4SFmode)
6282 && flag_unsafe_math_optimizations
6283 && out_mode == SFmode && out_n == 4
6284 && in_mode == SFmode && in_n == 4)
6285 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6286 break;
6287 CASE_CFN_RINT:
6288 if (VECTOR_UNIT_VSX_P (V2DFmode)
6289 && !flag_trapping_math
6290 && out_mode == DFmode && out_n == 2
6291 && in_mode == DFmode && in_n == 2)
6292 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6293 if (VECTOR_UNIT_VSX_P (V4SFmode)
6294 && !flag_trapping_math
6295 && out_mode == SFmode && out_n == 4
6296 && in_mode == SFmode && in_n == 4)
6297 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6298 break;
6299 default:
6300 break;
6301 }
6302
6303 /* Generate calls to libmass if appropriate. */
6304 if (rs6000_veclib_handler)
6305 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6306
6307 return NULL_TREE;
6308 }
6309
6310 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6311
6312 static tree
6313 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6314 tree type_in)
6315 {
6316 machine_mode in_mode, out_mode;
6317 int in_n, out_n;
6318
6319 if (TARGET_DEBUG_BUILTIN)
6320 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6321 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6322 GET_MODE_NAME (TYPE_MODE (type_out)),
6323 GET_MODE_NAME (TYPE_MODE (type_in)));
6324
6325 if (TREE_CODE (type_out) != VECTOR_TYPE
6326 || TREE_CODE (type_in) != VECTOR_TYPE
6327 || !TARGET_VECTORIZE_BUILTINS)
6328 return NULL_TREE;
6329
6330 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6331 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6332 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6333 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6334
6335 enum rs6000_builtins fn
6336 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6337 switch (fn)
6338 {
6339 case RS6000_BUILTIN_RSQRTF:
6340 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6341 && out_mode == SFmode && out_n == 4
6342 && in_mode == SFmode && in_n == 4)
6343 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6344 break;
6345 case RS6000_BUILTIN_RSQRT:
6346 if (VECTOR_UNIT_VSX_P (V2DFmode)
6347 && out_mode == DFmode && out_n == 2
6348 && in_mode == DFmode && in_n == 2)
6349 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6350 break;
6351 case RS6000_BUILTIN_RECIPF:
6352 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6353 && out_mode == SFmode && out_n == 4
6354 && in_mode == SFmode && in_n == 4)
6355 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6356 break;
6357 case RS6000_BUILTIN_RECIP:
6358 if (VECTOR_UNIT_VSX_P (V2DFmode)
6359 && out_mode == DFmode && out_n == 2
6360 && in_mode == DFmode && in_n == 2)
6361 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6362 break;
6363 default:
6364 break;
6365 }
6366 return NULL_TREE;
6367 }
6368 \f
6369 /* Default CPU string for rs6000*_file_start functions. */
6370 static const char *rs6000_default_cpu;
6371
6372 /* Do anything needed at the start of the asm file. */
6373
6374 static void
6375 rs6000_file_start (void)
6376 {
6377 char buffer[80];
6378 const char *start = buffer;
6379 FILE *file = asm_out_file;
6380
6381 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6382
6383 default_file_start ();
6384
6385 if (flag_verbose_asm)
6386 {
6387 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6388
6389 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6390 {
6391 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6392 start = "";
6393 }
6394
6395 if (global_options_set.x_rs6000_cpu_index)
6396 {
6397 fprintf (file, "%s -mcpu=%s", start,
6398 processor_target_table[rs6000_cpu_index].name);
6399 start = "";
6400 }
6401
6402 if (global_options_set.x_rs6000_tune_index)
6403 {
6404 fprintf (file, "%s -mtune=%s", start,
6405 processor_target_table[rs6000_tune_index].name);
6406 start = "";
6407 }
6408
6409 if (PPC405_ERRATUM77)
6410 {
6411 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6412 start = "";
6413 }
6414
6415 #ifdef USING_ELFOS_H
6416 switch (rs6000_sdata)
6417 {
6418 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6419 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6420 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6421 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6422 }
6423
6424 if (rs6000_sdata && g_switch_value)
6425 {
6426 fprintf (file, "%s -G %d", start,
6427 g_switch_value);
6428 start = "";
6429 }
6430 #endif
6431
6432 if (*start == '\0')
6433 putc ('\n', file);
6434 }
6435
6436 #ifdef USING_ELFOS_H
6437 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6438 && !global_options_set.x_rs6000_cpu_index)
6439 {
6440 fputs ("\t.machine ", asm_out_file);
6441 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6442 fputs ("power9\n", asm_out_file);
6443 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6444 fputs ("power8\n", asm_out_file);
6445 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6446 fputs ("power7\n", asm_out_file);
6447 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6448 fputs ("power6\n", asm_out_file);
6449 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6450 fputs ("power5\n", asm_out_file);
6451 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6452 fputs ("power4\n", asm_out_file);
6453 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6454 fputs ("ppc64\n", asm_out_file);
6455 else
6456 fputs ("ppc\n", asm_out_file);
6457 }
6458 #endif
6459
6460 if (DEFAULT_ABI == ABI_ELFv2)
6461 fprintf (file, "\t.abiversion 2\n");
6462 }
6463
6464 \f
6465 /* Return nonzero if this function is known to have a null epilogue. */
6466
6467 int
6468 direct_return (void)
6469 {
6470 if (reload_completed)
6471 {
6472 rs6000_stack_t *info = rs6000_stack_info ();
6473
6474 if (info->first_gp_reg_save == 32
6475 && info->first_fp_reg_save == 64
6476 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6477 && ! info->lr_save_p
6478 && ! info->cr_save_p
6479 && info->vrsave_size == 0
6480 && ! info->push_p)
6481 return 1;
6482 }
6483
6484 return 0;
6485 }
6486
6487 /* Return the number of instructions it takes to form a constant in an
6488 integer register. */
6489
6490 int
6491 num_insns_constant_wide (HOST_WIDE_INT value)
6492 {
6493 /* signed constant loadable with addi */
6494 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6495 return 1;
6496
6497 /* constant loadable with addis */
6498 else if ((value & 0xffff) == 0
6499 && (value >> 31 == -1 || value >> 31 == 0))
6500 return 1;
6501
6502 else if (TARGET_POWERPC64)
6503 {
6504 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6505 HOST_WIDE_INT high = value >> 31;
6506
6507 if (high == 0 || high == -1)
6508 return 2;
6509
6510 high >>= 1;
6511
6512 if (low == 0)
6513 return num_insns_constant_wide (high) + 1;
6514 else if (high == 0)
6515 return num_insns_constant_wide (low) + 1;
6516 else
6517 return (num_insns_constant_wide (high)
6518 + num_insns_constant_wide (low) + 1);
6519 }
6520
6521 else
6522 return 2;
6523 }
6524
6525 int
6526 num_insns_constant (rtx op, machine_mode mode)
6527 {
6528 HOST_WIDE_INT low, high;
6529
6530 switch (GET_CODE (op))
6531 {
6532 case CONST_INT:
6533 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6534 && rs6000_is_valid_and_mask (op, mode))
6535 return 2;
6536 else
6537 return num_insns_constant_wide (INTVAL (op));
6538
6539 case CONST_WIDE_INT:
6540 {
6541 int i;
6542 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6543 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6544 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6545 return ins;
6546 }
6547
6548 case CONST_DOUBLE:
6549 if (mode == SFmode || mode == SDmode)
6550 {
6551 long l;
6552
6553 if (DECIMAL_FLOAT_MODE_P (mode))
6554 REAL_VALUE_TO_TARGET_DECIMAL32
6555 (*CONST_DOUBLE_REAL_VALUE (op), l);
6556 else
6557 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6558 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6559 }
6560
6561 long l[2];
6562 if (DECIMAL_FLOAT_MODE_P (mode))
6563 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6564 else
6565 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6566 high = l[WORDS_BIG_ENDIAN == 0];
6567 low = l[WORDS_BIG_ENDIAN != 0];
6568
6569 if (TARGET_32BIT)
6570 return (num_insns_constant_wide (low)
6571 + num_insns_constant_wide (high));
6572 else
6573 {
6574 if ((high == 0 && low >= 0)
6575 || (high == -1 && low < 0))
6576 return num_insns_constant_wide (low);
6577
6578 else if (rs6000_is_valid_and_mask (op, mode))
6579 return 2;
6580
6581 else if (low == 0)
6582 return num_insns_constant_wide (high) + 1;
6583
6584 else
6585 return (num_insns_constant_wide (high)
6586 + num_insns_constant_wide (low) + 1);
6587 }
6588
6589 default:
6590 gcc_unreachable ();
6591 }
6592 }
6593
6594 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6595 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6596 corresponding element of the vector, but for V4SFmode and V2SFmode,
6597 the corresponding "float" is interpreted as an SImode integer. */
6598
6599 HOST_WIDE_INT
6600 const_vector_elt_as_int (rtx op, unsigned int elt)
6601 {
6602 rtx tmp;
6603
6604 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6605 gcc_assert (GET_MODE (op) != V2DImode
6606 && GET_MODE (op) != V2DFmode);
6607
6608 tmp = CONST_VECTOR_ELT (op, elt);
6609 if (GET_MODE (op) == V4SFmode
6610 || GET_MODE (op) == V2SFmode)
6611 tmp = gen_lowpart (SImode, tmp);
6612 return INTVAL (tmp);
6613 }
6614
6615 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6616 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6617 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6618 all items are set to the same value and contain COPIES replicas of the
6619 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6620 operand and the others are set to the value of the operand's msb. */
6621
6622 static bool
6623 vspltis_constant (rtx op, unsigned step, unsigned copies)
6624 {
6625 machine_mode mode = GET_MODE (op);
6626 machine_mode inner = GET_MODE_INNER (mode);
6627
6628 unsigned i;
6629 unsigned nunits;
6630 unsigned bitsize;
6631 unsigned mask;
6632
6633 HOST_WIDE_INT val;
6634 HOST_WIDE_INT splat_val;
6635 HOST_WIDE_INT msb_val;
6636
6637 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6638 return false;
6639
6640 nunits = GET_MODE_NUNITS (mode);
6641 bitsize = GET_MODE_BITSIZE (inner);
6642 mask = GET_MODE_MASK (inner);
6643
6644 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6645 splat_val = val;
6646 msb_val = val >= 0 ? 0 : -1;
6647
6648 /* Construct the value to be splatted, if possible. If not, return 0. */
6649 for (i = 2; i <= copies; i *= 2)
6650 {
6651 HOST_WIDE_INT small_val;
6652 bitsize /= 2;
6653 small_val = splat_val >> bitsize;
6654 mask >>= bitsize;
6655 if (splat_val != ((HOST_WIDE_INT)
6656 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6657 | (small_val & mask)))
6658 return false;
6659 splat_val = small_val;
6660 }
6661
6662 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6663 if (EASY_VECTOR_15 (splat_val))
6664 ;
6665
6666 /* Also check if we can splat, and then add the result to itself. Do so if
6667 the value is positive, of if the splat instruction is using OP's mode;
6668 for splat_val < 0, the splat and the add should use the same mode. */
6669 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6670 && (splat_val >= 0 || (step == 1 && copies == 1)))
6671 ;
6672
6673 /* Also check if are loading up the most significant bit which can be done by
6674 loading up -1 and shifting the value left by -1. */
6675 else if (EASY_VECTOR_MSB (splat_val, inner))
6676 ;
6677
6678 else
6679 return false;
6680
6681 /* Check if VAL is present in every STEP-th element, and the
6682 other elements are filled with its most significant bit. */
6683 for (i = 1; i < nunits; ++i)
6684 {
6685 HOST_WIDE_INT desired_val;
6686 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6687 if ((i & (step - 1)) == 0)
6688 desired_val = val;
6689 else
6690 desired_val = msb_val;
6691
6692 if (desired_val != const_vector_elt_as_int (op, elt))
6693 return false;
6694 }
6695
6696 return true;
6697 }
6698
6699 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6700 instruction, filling in the bottom elements with 0 or -1.
6701
6702 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6703 for the number of zeroes to shift in, or negative for the number of 0xff
6704 bytes to shift in.
6705
6706 OP is a CONST_VECTOR. */
6707
6708 int
6709 vspltis_shifted (rtx op)
6710 {
6711 machine_mode mode = GET_MODE (op);
6712 machine_mode inner = GET_MODE_INNER (mode);
6713
6714 unsigned i, j;
6715 unsigned nunits;
6716 unsigned mask;
6717
6718 HOST_WIDE_INT val;
6719
6720 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6721 return false;
6722
6723 /* We need to create pseudo registers to do the shift, so don't recognize
6724 shift vector constants after reload. */
6725 if (!can_create_pseudo_p ())
6726 return false;
6727
6728 nunits = GET_MODE_NUNITS (mode);
6729 mask = GET_MODE_MASK (inner);
6730
6731 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6732
6733 /* Check if the value can really be the operand of a vspltis[bhw]. */
6734 if (EASY_VECTOR_15 (val))
6735 ;
6736
6737 /* Also check if we are loading up the most significant bit which can be done
6738 by loading up -1 and shifting the value left by -1. */
6739 else if (EASY_VECTOR_MSB (val, inner))
6740 ;
6741
6742 else
6743 return 0;
6744
6745 /* Check if VAL is present in every STEP-th element until we find elements
6746 that are 0 or all 1 bits. */
6747 for (i = 1; i < nunits; ++i)
6748 {
6749 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6750 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6751
6752 /* If the value isn't the splat value, check for the remaining elements
6753 being 0/-1. */
6754 if (val != elt_val)
6755 {
6756 if (elt_val == 0)
6757 {
6758 for (j = i+1; j < nunits; ++j)
6759 {
6760 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6761 if (const_vector_elt_as_int (op, elt2) != 0)
6762 return 0;
6763 }
6764
6765 return (nunits - i) * GET_MODE_SIZE (inner);
6766 }
6767
6768 else if ((elt_val & mask) == mask)
6769 {
6770 for (j = i+1; j < nunits; ++j)
6771 {
6772 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6773 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6774 return 0;
6775 }
6776
6777 return -((nunits - i) * GET_MODE_SIZE (inner));
6778 }
6779
6780 else
6781 return 0;
6782 }
6783 }
6784
6785 /* If all elements are equal, we don't need to do VLSDOI. */
6786 return 0;
6787 }
6788
6789
6790 /* Return true if OP is of the given MODE and can be synthesized
6791 with a vspltisb, vspltish or vspltisw. */
6792
6793 bool
6794 easy_altivec_constant (rtx op, machine_mode mode)
6795 {
6796 unsigned step, copies;
6797
6798 if (mode == VOIDmode)
6799 mode = GET_MODE (op);
6800 else if (mode != GET_MODE (op))
6801 return false;
6802
6803 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6804 constants. */
6805 if (mode == V2DFmode)
6806 return zero_constant (op, mode);
6807
6808 else if (mode == V2DImode)
6809 {
6810 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6811 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6812 return false;
6813
6814 if (zero_constant (op, mode))
6815 return true;
6816
6817 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6818 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6819 return true;
6820
6821 return false;
6822 }
6823
6824 /* V1TImode is a special container for TImode. Ignore for now. */
6825 else if (mode == V1TImode)
6826 return false;
6827
6828 /* Start with a vspltisw. */
6829 step = GET_MODE_NUNITS (mode) / 4;
6830 copies = 1;
6831
6832 if (vspltis_constant (op, step, copies))
6833 return true;
6834
6835 /* Then try with a vspltish. */
6836 if (step == 1)
6837 copies <<= 1;
6838 else
6839 step >>= 1;
6840
6841 if (vspltis_constant (op, step, copies))
6842 return true;
6843
6844 /* And finally a vspltisb. */
6845 if (step == 1)
6846 copies <<= 1;
6847 else
6848 step >>= 1;
6849
6850 if (vspltis_constant (op, step, copies))
6851 return true;
6852
6853 if (vspltis_shifted (op) != 0)
6854 return true;
6855
6856 return false;
6857 }
6858
6859 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6860 result is OP. Abort if it is not possible. */
6861
6862 rtx
6863 gen_easy_altivec_constant (rtx op)
6864 {
6865 machine_mode mode = GET_MODE (op);
6866 int nunits = GET_MODE_NUNITS (mode);
6867 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6868 unsigned step = nunits / 4;
6869 unsigned copies = 1;
6870
6871 /* Start with a vspltisw. */
6872 if (vspltis_constant (op, step, copies))
6873 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6874
6875 /* Then try with a vspltish. */
6876 if (step == 1)
6877 copies <<= 1;
6878 else
6879 step >>= 1;
6880
6881 if (vspltis_constant (op, step, copies))
6882 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6883
6884 /* And finally a vspltisb. */
6885 if (step == 1)
6886 copies <<= 1;
6887 else
6888 step >>= 1;
6889
6890 if (vspltis_constant (op, step, copies))
6891 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6892
6893 gcc_unreachable ();
6894 }
6895
6896 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6897 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6898
6899 Return the number of instructions needed (1 or 2) into the address pointed
6900 via NUM_INSNS_PTR.
6901
6902 Return the constant that is being split via CONSTANT_PTR. */
6903
6904 bool
6905 xxspltib_constant_p (rtx op,
6906 machine_mode mode,
6907 int *num_insns_ptr,
6908 int *constant_ptr)
6909 {
6910 size_t nunits = GET_MODE_NUNITS (mode);
6911 size_t i;
6912 HOST_WIDE_INT value;
6913 rtx element;
6914
6915 /* Set the returned values to out of bound values. */
6916 *num_insns_ptr = -1;
6917 *constant_ptr = 256;
6918
6919 if (!TARGET_P9_VECTOR)
6920 return false;
6921
6922 if (mode == VOIDmode)
6923 mode = GET_MODE (op);
6924
6925 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6926 return false;
6927
6928 /* Handle (vec_duplicate <constant>). */
6929 if (GET_CODE (op) == VEC_DUPLICATE)
6930 {
6931 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6932 && mode != V2DImode)
6933 return false;
6934
6935 element = XEXP (op, 0);
6936 if (!CONST_INT_P (element))
6937 return false;
6938
6939 value = INTVAL (element);
6940 if (!IN_RANGE (value, -128, 127))
6941 return false;
6942 }
6943
6944 /* Handle (const_vector [...]). */
6945 else if (GET_CODE (op) == CONST_VECTOR)
6946 {
6947 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6948 && mode != V2DImode)
6949 return false;
6950
6951 element = CONST_VECTOR_ELT (op, 0);
6952 if (!CONST_INT_P (element))
6953 return false;
6954
6955 value = INTVAL (element);
6956 if (!IN_RANGE (value, -128, 127))
6957 return false;
6958
6959 for (i = 1; i < nunits; i++)
6960 {
6961 element = CONST_VECTOR_ELT (op, i);
6962 if (!CONST_INT_P (element))
6963 return false;
6964
6965 if (value != INTVAL (element))
6966 return false;
6967 }
6968 }
6969
6970 /* Handle integer constants being loaded into the upper part of the VSX
6971 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6972 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6973 else if (CONST_INT_P (op))
6974 {
6975 if (!SCALAR_INT_MODE_P (mode))
6976 return false;
6977
6978 value = INTVAL (op);
6979 if (!IN_RANGE (value, -128, 127))
6980 return false;
6981
6982 if (!IN_RANGE (value, -1, 0))
6983 {
6984 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6985 return false;
6986
6987 if (EASY_VECTOR_15 (value))
6988 return false;
6989 }
6990 }
6991
6992 else
6993 return false;
6994
6995 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6996 sign extend. Special case 0/-1 to allow getting any VSX register instead
6997 of an Altivec register. */
6998 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6999 && EASY_VECTOR_15 (value))
7000 return false;
7001
7002 /* Return # of instructions and the constant byte for XXSPLTIB. */
7003 if (mode == V16QImode)
7004 *num_insns_ptr = 1;
7005
7006 else if (IN_RANGE (value, -1, 0))
7007 *num_insns_ptr = 1;
7008
7009 else
7010 *num_insns_ptr = 2;
7011
7012 *constant_ptr = (int) value;
7013 return true;
7014 }
7015
7016 const char *
7017 output_vec_const_move (rtx *operands)
7018 {
7019 int cst, cst2, shift;
7020 machine_mode mode;
7021 rtx dest, vec;
7022
7023 dest = operands[0];
7024 vec = operands[1];
7025 mode = GET_MODE (dest);
7026
7027 if (TARGET_VSX)
7028 {
7029 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
7030 int xxspltib_value = 256;
7031 int num_insns = -1;
7032
7033 if (zero_constant (vec, mode))
7034 {
7035 if (TARGET_P9_VECTOR)
7036 return "xxspltib %x0,0";
7037
7038 else if (dest_vmx_p)
7039 return "vspltisw %0,0";
7040
7041 else
7042 return "xxlxor %x0,%x0,%x0";
7043 }
7044
7045 if (all_ones_constant (vec, mode))
7046 {
7047 if (TARGET_P9_VECTOR)
7048 return "xxspltib %x0,255";
7049
7050 else if (dest_vmx_p)
7051 return "vspltisw %0,-1";
7052
7053 else if (TARGET_P8_VECTOR)
7054 return "xxlorc %x0,%x0,%x0";
7055
7056 else
7057 gcc_unreachable ();
7058 }
7059
7060 if (TARGET_P9_VECTOR
7061 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
7062 {
7063 if (num_insns == 1)
7064 {
7065 operands[2] = GEN_INT (xxspltib_value & 0xff);
7066 return "xxspltib %x0,%2";
7067 }
7068
7069 return "#";
7070 }
7071 }
7072
7073 if (TARGET_ALTIVEC)
7074 {
7075 rtx splat_vec;
7076
7077 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
7078 if (zero_constant (vec, mode))
7079 return "vspltisw %0,0";
7080
7081 if (all_ones_constant (vec, mode))
7082 return "vspltisw %0,-1";
7083
7084 /* Do we need to construct a value using VSLDOI? */
7085 shift = vspltis_shifted (vec);
7086 if (shift != 0)
7087 return "#";
7088
7089 splat_vec = gen_easy_altivec_constant (vec);
7090 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
7091 operands[1] = XEXP (splat_vec, 0);
7092 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
7093 return "#";
7094
7095 switch (GET_MODE (splat_vec))
7096 {
7097 case E_V4SImode:
7098 return "vspltisw %0,%1";
7099
7100 case E_V8HImode:
7101 return "vspltish %0,%1";
7102
7103 case E_V16QImode:
7104 return "vspltisb %0,%1";
7105
7106 default:
7107 gcc_unreachable ();
7108 }
7109 }
7110
7111 gcc_assert (TARGET_SPE);
7112
7113 /* Vector constant 0 is handled as a splitter of V2SI, and in the
7114 pattern of V1DI, V4HI, and V2SF.
7115
7116 FIXME: We should probably return # and add post reload
7117 splitters for these, but this way is so easy ;-). */
7118 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
7119 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
7120 operands[1] = CONST_VECTOR_ELT (vec, 0);
7121 operands[2] = CONST_VECTOR_ELT (vec, 1);
7122 if (cst == cst2)
7123 return "li %0,%1\n\tevmergelo %0,%0,%0";
7124 else if (WORDS_BIG_ENDIAN)
7125 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
7126 else
7127 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
7128 }
7129
7130 /* Initialize TARGET of vector PAIRED to VALS. */
7131
7132 void
7133 paired_expand_vector_init (rtx target, rtx vals)
7134 {
7135 machine_mode mode = GET_MODE (target);
7136 int n_elts = GET_MODE_NUNITS (mode);
7137 int n_var = 0;
7138 rtx x, new_rtx, tmp, constant_op, op1, op2;
7139 int i;
7140
7141 for (i = 0; i < n_elts; ++i)
7142 {
7143 x = XVECEXP (vals, 0, i);
7144 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7145 ++n_var;
7146 }
7147 if (n_var == 0)
7148 {
7149 /* Load from constant pool. */
7150 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7151 return;
7152 }
7153
7154 if (n_var == 2)
7155 {
7156 /* The vector is initialized only with non-constants. */
7157 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7158 XVECEXP (vals, 0, 1));
7159
7160 emit_move_insn (target, new_rtx);
7161 return;
7162 }
7163
7164 /* One field is non-constant and the other one is a constant. Load the
7165 constant from the constant pool and use ps_merge instruction to
7166 construct the whole vector. */
7167 op1 = XVECEXP (vals, 0, 0);
7168 op2 = XVECEXP (vals, 0, 1);
7169
7170 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7171
7172 tmp = gen_reg_rtx (GET_MODE (constant_op));
7173 emit_move_insn (tmp, constant_op);
7174
7175 if (CONSTANT_P (op1))
7176 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7177 else
7178 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7179
7180 emit_move_insn (target, new_rtx);
7181 }
7182
7183 void
7184 paired_expand_vector_move (rtx operands[])
7185 {
7186 rtx op0 = operands[0], op1 = operands[1];
7187
7188 emit_move_insn (op0, op1);
7189 }
7190
7191 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7192 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7193 operands for the relation operation COND. This is a recursive
7194 function. */
7195
7196 static void
7197 paired_emit_vector_compare (enum rtx_code rcode,
7198 rtx dest, rtx op0, rtx op1,
7199 rtx cc_op0, rtx cc_op1)
7200 {
7201 rtx tmp = gen_reg_rtx (V2SFmode);
7202 rtx tmp1, max, min;
7203
7204 gcc_assert (TARGET_PAIRED_FLOAT);
7205 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7206
7207 switch (rcode)
7208 {
7209 case LT:
7210 case LTU:
7211 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7212 return;
7213 case GE:
7214 case GEU:
7215 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7216 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7217 return;
7218 case LE:
7219 case LEU:
7220 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7221 return;
7222 case GT:
7223 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7224 return;
7225 case EQ:
7226 tmp1 = gen_reg_rtx (V2SFmode);
7227 max = gen_reg_rtx (V2SFmode);
7228 min = gen_reg_rtx (V2SFmode);
7229 gen_reg_rtx (V2SFmode);
7230
7231 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7232 emit_insn (gen_selv2sf4
7233 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7234 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7235 emit_insn (gen_selv2sf4
7236 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7237 emit_insn (gen_subv2sf3 (tmp1, min, max));
7238 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7239 return;
7240 case NE:
7241 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7242 return;
7243 case UNLE:
7244 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7245 return;
7246 case UNLT:
7247 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7248 return;
7249 case UNGE:
7250 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7251 return;
7252 case UNGT:
7253 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7254 return;
7255 default:
7256 gcc_unreachable ();
7257 }
7258
7259 return;
7260 }
7261
7262 /* Emit vector conditional expression.
7263 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7264 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7265
7266 int
7267 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7268 rtx cond, rtx cc_op0, rtx cc_op1)
7269 {
7270 enum rtx_code rcode = GET_CODE (cond);
7271
7272 if (!TARGET_PAIRED_FLOAT)
7273 return 0;
7274
7275 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7276
7277 return 1;
7278 }
7279
7280 /* Initialize vector TARGET to VALS. */
7281
7282 void
7283 rs6000_expand_vector_init (rtx target, rtx vals)
7284 {
7285 machine_mode mode = GET_MODE (target);
7286 machine_mode inner_mode = GET_MODE_INNER (mode);
7287 int n_elts = GET_MODE_NUNITS (mode);
7288 int n_var = 0, one_var = -1;
7289 bool all_same = true, all_const_zero = true;
7290 rtx x, mem;
7291 int i;
7292
7293 for (i = 0; i < n_elts; ++i)
7294 {
7295 x = XVECEXP (vals, 0, i);
7296 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7297 ++n_var, one_var = i;
7298 else if (x != CONST0_RTX (inner_mode))
7299 all_const_zero = false;
7300
7301 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7302 all_same = false;
7303 }
7304
7305 if (n_var == 0)
7306 {
7307 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7308 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7309 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7310 {
7311 /* Zero register. */
7312 emit_move_insn (target, CONST0_RTX (mode));
7313 return;
7314 }
7315 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7316 {
7317 /* Splat immediate. */
7318 emit_insn (gen_rtx_SET (target, const_vec));
7319 return;
7320 }
7321 else
7322 {
7323 /* Load from constant pool. */
7324 emit_move_insn (target, const_vec);
7325 return;
7326 }
7327 }
7328
7329 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7330 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7331 {
7332 rtx op[2];
7333 size_t i;
7334 size_t num_elements = all_same ? 1 : 2;
7335 for (i = 0; i < num_elements; i++)
7336 {
7337 op[i] = XVECEXP (vals, 0, i);
7338 /* Just in case there is a SUBREG with a smaller mode, do a
7339 conversion. */
7340 if (GET_MODE (op[i]) != inner_mode)
7341 {
7342 rtx tmp = gen_reg_rtx (inner_mode);
7343 convert_move (tmp, op[i], 0);
7344 op[i] = tmp;
7345 }
7346 /* Allow load with splat double word. */
7347 else if (MEM_P (op[i]))
7348 {
7349 if (!all_same)
7350 op[i] = force_reg (inner_mode, op[i]);
7351 }
7352 else if (!REG_P (op[i]))
7353 op[i] = force_reg (inner_mode, op[i]);
7354 }
7355
7356 if (all_same)
7357 {
7358 if (mode == V2DFmode)
7359 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7360 else
7361 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7362 }
7363 else
7364 {
7365 if (mode == V2DFmode)
7366 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7367 else
7368 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7369 }
7370 return;
7371 }
7372
7373 /* Special case initializing vector int if we are on 64-bit systems with
7374 direct move or we have the ISA 3.0 instructions. */
7375 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7376 && TARGET_DIRECT_MOVE_64BIT)
7377 {
7378 if (all_same)
7379 {
7380 rtx element0 = XVECEXP (vals, 0, 0);
7381 if (MEM_P (element0))
7382 element0 = rs6000_address_for_fpconvert (element0);
7383 else
7384 element0 = force_reg (SImode, element0);
7385
7386 if (TARGET_P9_VECTOR)
7387 emit_insn (gen_vsx_splat_v4si (target, element0));
7388 else
7389 {
7390 rtx tmp = gen_reg_rtx (DImode);
7391 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7392 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7393 }
7394 return;
7395 }
7396 else
7397 {
7398 rtx elements[4];
7399 size_t i;
7400
7401 for (i = 0; i < 4; i++)
7402 {
7403 elements[i] = XVECEXP (vals, 0, i);
7404 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7405 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7406 }
7407
7408 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7409 elements[2], elements[3]));
7410 return;
7411 }
7412 }
7413
7414 /* With single precision floating point on VSX, know that internally single
7415 precision is actually represented as a double, and either make 2 V2DF
7416 vectors, and convert these vectors to single precision, or do one
7417 conversion, and splat the result to the other elements. */
7418 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7419 {
7420 if (all_same)
7421 {
7422 rtx element0 = XVECEXP (vals, 0, 0);
7423
7424 if (TARGET_P9_VECTOR)
7425 {
7426 if (MEM_P (element0))
7427 element0 = rs6000_address_for_fpconvert (element0);
7428
7429 emit_insn (gen_vsx_splat_v4sf (target, element0));
7430 }
7431
7432 else
7433 {
7434 rtx freg = gen_reg_rtx (V4SFmode);
7435 rtx sreg = force_reg (SFmode, element0);
7436 rtx cvt = (TARGET_XSCVDPSPN
7437 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7438 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7439
7440 emit_insn (cvt);
7441 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7442 const0_rtx));
7443 }
7444 }
7445 else
7446 {
7447 rtx dbl_even = gen_reg_rtx (V2DFmode);
7448 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7449 rtx flt_even = gen_reg_rtx (V4SFmode);
7450 rtx flt_odd = gen_reg_rtx (V4SFmode);
7451 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7452 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7453 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7454 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7455
7456 /* Use VMRGEW if we can instead of doing a permute. */
7457 if (TARGET_P8_VECTOR)
7458 {
7459 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7460 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7461 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7462 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7463 if (BYTES_BIG_ENDIAN)
7464 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7465 else
7466 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7467 }
7468 else
7469 {
7470 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7471 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7472 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7473 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7474 rs6000_expand_extract_even (target, flt_even, flt_odd);
7475 }
7476 }
7477 return;
7478 }
7479
7480 /* Special case initializing vector short/char that are splats if we are on
7481 64-bit systems with direct move. */
7482 if (all_same && TARGET_DIRECT_MOVE_64BIT
7483 && (mode == V16QImode || mode == V8HImode))
7484 {
7485 rtx op0 = XVECEXP (vals, 0, 0);
7486 rtx di_tmp = gen_reg_rtx (DImode);
7487
7488 if (!REG_P (op0))
7489 op0 = force_reg (GET_MODE_INNER (mode), op0);
7490
7491 if (mode == V16QImode)
7492 {
7493 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7494 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7495 return;
7496 }
7497
7498 if (mode == V8HImode)
7499 {
7500 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7501 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7502 return;
7503 }
7504 }
7505
7506 /* Store value to stack temp. Load vector element. Splat. However, splat
7507 of 64-bit items is not supported on Altivec. */
7508 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7509 {
7510 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7511 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7512 XVECEXP (vals, 0, 0));
7513 x = gen_rtx_UNSPEC (VOIDmode,
7514 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7515 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7516 gen_rtvec (2,
7517 gen_rtx_SET (target, mem),
7518 x)));
7519 x = gen_rtx_VEC_SELECT (inner_mode, target,
7520 gen_rtx_PARALLEL (VOIDmode,
7521 gen_rtvec (1, const0_rtx)));
7522 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7523 return;
7524 }
7525
7526 /* One field is non-constant. Load constant then overwrite
7527 varying field. */
7528 if (n_var == 1)
7529 {
7530 rtx copy = copy_rtx (vals);
7531
7532 /* Load constant part of vector, substitute neighboring value for
7533 varying element. */
7534 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7535 rs6000_expand_vector_init (target, copy);
7536
7537 /* Insert variable. */
7538 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7539 return;
7540 }
7541
7542 /* Construct the vector in memory one field at a time
7543 and load the whole vector. */
7544 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7545 for (i = 0; i < n_elts; i++)
7546 emit_move_insn (adjust_address_nv (mem, inner_mode,
7547 i * GET_MODE_SIZE (inner_mode)),
7548 XVECEXP (vals, 0, i));
7549 emit_move_insn (target, mem);
7550 }
7551
7552 /* Set field ELT of TARGET to VAL. */
7553
7554 void
7555 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7556 {
7557 machine_mode mode = GET_MODE (target);
7558 machine_mode inner_mode = GET_MODE_INNER (mode);
7559 rtx reg = gen_reg_rtx (mode);
7560 rtx mask, mem, x;
7561 int width = GET_MODE_SIZE (inner_mode);
7562 int i;
7563
7564 val = force_reg (GET_MODE (val), val);
7565
7566 if (VECTOR_MEM_VSX_P (mode))
7567 {
7568 rtx insn = NULL_RTX;
7569 rtx elt_rtx = GEN_INT (elt);
7570
7571 if (mode == V2DFmode)
7572 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7573
7574 else if (mode == V2DImode)
7575 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7576
7577 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7578 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7579 {
7580 if (mode == V4SImode)
7581 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7582 else if (mode == V8HImode)
7583 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7584 else if (mode == V16QImode)
7585 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7586 }
7587
7588 if (insn)
7589 {
7590 emit_insn (insn);
7591 return;
7592 }
7593 }
7594
7595 /* Simplify setting single element vectors like V1TImode. */
7596 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7597 {
7598 emit_move_insn (target, gen_lowpart (mode, val));
7599 return;
7600 }
7601
7602 /* Load single variable value. */
7603 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7604 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7605 x = gen_rtx_UNSPEC (VOIDmode,
7606 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7607 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7608 gen_rtvec (2,
7609 gen_rtx_SET (reg, mem),
7610 x)));
7611
7612 /* Linear sequence. */
7613 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7614 for (i = 0; i < 16; ++i)
7615 XVECEXP (mask, 0, i) = GEN_INT (i);
7616
7617 /* Set permute mask to insert element into target. */
7618 for (i = 0; i < width; ++i)
7619 XVECEXP (mask, 0, elt*width + i)
7620 = GEN_INT (i + 0x10);
7621 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7622
7623 if (BYTES_BIG_ENDIAN)
7624 x = gen_rtx_UNSPEC (mode,
7625 gen_rtvec (3, target, reg,
7626 force_reg (V16QImode, x)),
7627 UNSPEC_VPERM);
7628 else
7629 {
7630 if (TARGET_P9_VECTOR)
7631 x = gen_rtx_UNSPEC (mode,
7632 gen_rtvec (3, target, reg,
7633 force_reg (V16QImode, x)),
7634 UNSPEC_VPERMR);
7635 else
7636 {
7637 /* Invert selector. We prefer to generate VNAND on P8 so
7638 that future fusion opportunities can kick in, but must
7639 generate VNOR elsewhere. */
7640 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7641 rtx iorx = (TARGET_P8_VECTOR
7642 ? gen_rtx_IOR (V16QImode, notx, notx)
7643 : gen_rtx_AND (V16QImode, notx, notx));
7644 rtx tmp = gen_reg_rtx (V16QImode);
7645 emit_insn (gen_rtx_SET (tmp, iorx));
7646
7647 /* Permute with operands reversed and adjusted selector. */
7648 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7649 UNSPEC_VPERM);
7650 }
7651 }
7652
7653 emit_insn (gen_rtx_SET (target, x));
7654 }
7655
7656 /* Extract field ELT from VEC into TARGET. */
7657
7658 void
7659 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7660 {
7661 machine_mode mode = GET_MODE (vec);
7662 machine_mode inner_mode = GET_MODE_INNER (mode);
7663 rtx mem;
7664
7665 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7666 {
7667 switch (mode)
7668 {
7669 default:
7670 break;
7671 case E_V1TImode:
7672 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7673 emit_move_insn (target, gen_lowpart (TImode, vec));
7674 break;
7675 case E_V2DFmode:
7676 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7677 return;
7678 case E_V2DImode:
7679 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7680 return;
7681 case E_V4SFmode:
7682 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7683 return;
7684 case E_V16QImode:
7685 if (TARGET_DIRECT_MOVE_64BIT)
7686 {
7687 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7688 return;
7689 }
7690 else
7691 break;
7692 case E_V8HImode:
7693 if (TARGET_DIRECT_MOVE_64BIT)
7694 {
7695 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7696 return;
7697 }
7698 else
7699 break;
7700 case E_V4SImode:
7701 if (TARGET_DIRECT_MOVE_64BIT)
7702 {
7703 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7704 return;
7705 }
7706 break;
7707 }
7708 }
7709 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7710 && TARGET_DIRECT_MOVE_64BIT)
7711 {
7712 if (GET_MODE (elt) != DImode)
7713 {
7714 rtx tmp = gen_reg_rtx (DImode);
7715 convert_move (tmp, elt, 0);
7716 elt = tmp;
7717 }
7718 else if (!REG_P (elt))
7719 elt = force_reg (DImode, elt);
7720
7721 switch (mode)
7722 {
7723 case E_V2DFmode:
7724 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7725 return;
7726
7727 case E_V2DImode:
7728 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7729 return;
7730
7731 case E_V4SFmode:
7732 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7733 return;
7734
7735 case E_V4SImode:
7736 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7737 return;
7738
7739 case E_V8HImode:
7740 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7741 return;
7742
7743 case E_V16QImode:
7744 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7745 return;
7746
7747 default:
7748 gcc_unreachable ();
7749 }
7750 }
7751
7752 gcc_assert (CONST_INT_P (elt));
7753
7754 /* Allocate mode-sized buffer. */
7755 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7756
7757 emit_move_insn (mem, vec);
7758
7759 /* Add offset to field within buffer matching vector element. */
7760 mem = adjust_address_nv (mem, inner_mode,
7761 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7762
7763 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7764 }
7765
7766 /* Helper function to return the register number of a RTX. */
7767 static inline int
7768 regno_or_subregno (rtx op)
7769 {
7770 if (REG_P (op))
7771 return REGNO (op);
7772 else if (SUBREG_P (op))
7773 return subreg_regno (op);
7774 else
7775 gcc_unreachable ();
7776 }
7777
7778 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7779 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7780 temporary (BASE_TMP) to fixup the address. Return the new memory address
7781 that is valid for reads or writes to a given register (SCALAR_REG). */
7782
7783 rtx
7784 rs6000_adjust_vec_address (rtx scalar_reg,
7785 rtx mem,
7786 rtx element,
7787 rtx base_tmp,
7788 machine_mode scalar_mode)
7789 {
7790 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7791 rtx addr = XEXP (mem, 0);
7792 rtx element_offset;
7793 rtx new_addr;
7794 bool valid_addr_p;
7795
7796 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7797 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7798
7799 /* Calculate what we need to add to the address to get the element
7800 address. */
7801 if (CONST_INT_P (element))
7802 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7803 else
7804 {
7805 int byte_shift = exact_log2 (scalar_size);
7806 gcc_assert (byte_shift >= 0);
7807
7808 if (byte_shift == 0)
7809 element_offset = element;
7810
7811 else
7812 {
7813 if (TARGET_POWERPC64)
7814 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7815 else
7816 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7817
7818 element_offset = base_tmp;
7819 }
7820 }
7821
7822 /* Create the new address pointing to the element within the vector. If we
7823 are adding 0, we don't have to change the address. */
7824 if (element_offset == const0_rtx)
7825 new_addr = addr;
7826
7827 /* A simple indirect address can be converted into a reg + offset
7828 address. */
7829 else if (REG_P (addr) || SUBREG_P (addr))
7830 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7831
7832 /* Optimize D-FORM addresses with constant offset with a constant element, to
7833 include the element offset in the address directly. */
7834 else if (GET_CODE (addr) == PLUS)
7835 {
7836 rtx op0 = XEXP (addr, 0);
7837 rtx op1 = XEXP (addr, 1);
7838 rtx insn;
7839
7840 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7841 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7842 {
7843 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7844 rtx offset_rtx = GEN_INT (offset);
7845
7846 if (IN_RANGE (offset, -32768, 32767)
7847 && (scalar_size < 8 || (offset & 0x3) == 0))
7848 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7849 else
7850 {
7851 emit_move_insn (base_tmp, offset_rtx);
7852 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7853 }
7854 }
7855 else
7856 {
7857 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7858 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7859
7860 /* Note, ADDI requires the register being added to be a base
7861 register. If the register was R0, load it up into the temporary
7862 and do the add. */
7863 if (op1_reg_p
7864 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7865 {
7866 insn = gen_add3_insn (base_tmp, op1, element_offset);
7867 gcc_assert (insn != NULL_RTX);
7868 emit_insn (insn);
7869 }
7870
7871 else if (ele_reg_p
7872 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7873 {
7874 insn = gen_add3_insn (base_tmp, element_offset, op1);
7875 gcc_assert (insn != NULL_RTX);
7876 emit_insn (insn);
7877 }
7878
7879 else
7880 {
7881 emit_move_insn (base_tmp, op1);
7882 emit_insn (gen_add2_insn (base_tmp, element_offset));
7883 }
7884
7885 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7886 }
7887 }
7888
7889 else
7890 {
7891 emit_move_insn (base_tmp, addr);
7892 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7893 }
7894
7895 /* If we have a PLUS, we need to see whether the particular register class
7896 allows for D-FORM or X-FORM addressing. */
7897 if (GET_CODE (new_addr) == PLUS)
7898 {
7899 rtx op1 = XEXP (new_addr, 1);
7900 addr_mask_type addr_mask;
7901 int scalar_regno = regno_or_subregno (scalar_reg);
7902
7903 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7904 if (INT_REGNO_P (scalar_regno))
7905 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7906
7907 else if (FP_REGNO_P (scalar_regno))
7908 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7909
7910 else if (ALTIVEC_REGNO_P (scalar_regno))
7911 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7912
7913 else
7914 gcc_unreachable ();
7915
7916 if (REG_P (op1) || SUBREG_P (op1))
7917 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7918 else
7919 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7920 }
7921
7922 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7923 valid_addr_p = true;
7924
7925 else
7926 valid_addr_p = false;
7927
7928 if (!valid_addr_p)
7929 {
7930 emit_move_insn (base_tmp, new_addr);
7931 new_addr = base_tmp;
7932 }
7933
7934 return change_address (mem, scalar_mode, new_addr);
7935 }
7936
7937 /* Split a variable vec_extract operation into the component instructions. */
7938
7939 void
7940 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7941 rtx tmp_altivec)
7942 {
7943 machine_mode mode = GET_MODE (src);
7944 machine_mode scalar_mode = GET_MODE (dest);
7945 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7946 int byte_shift = exact_log2 (scalar_size);
7947
7948 gcc_assert (byte_shift >= 0);
7949
7950 /* If we are given a memory address, optimize to load just the element. We
7951 don't have to adjust the vector element number on little endian
7952 systems. */
7953 if (MEM_P (src))
7954 {
7955 gcc_assert (REG_P (tmp_gpr));
7956 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7957 tmp_gpr, scalar_mode));
7958 return;
7959 }
7960
7961 else if (REG_P (src) || SUBREG_P (src))
7962 {
7963 int bit_shift = byte_shift + 3;
7964 rtx element2;
7965 int dest_regno = regno_or_subregno (dest);
7966 int src_regno = regno_or_subregno (src);
7967 int element_regno = regno_or_subregno (element);
7968
7969 gcc_assert (REG_P (tmp_gpr));
7970
7971 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7972 a general purpose register. */
7973 if (TARGET_P9_VECTOR
7974 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7975 && INT_REGNO_P (dest_regno)
7976 && ALTIVEC_REGNO_P (src_regno)
7977 && INT_REGNO_P (element_regno))
7978 {
7979 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7980 rtx element_si = gen_rtx_REG (SImode, element_regno);
7981
7982 if (mode == V16QImode)
7983 emit_insn (VECTOR_ELT_ORDER_BIG
7984 ? gen_vextublx (dest_si, element_si, src)
7985 : gen_vextubrx (dest_si, element_si, src));
7986
7987 else if (mode == V8HImode)
7988 {
7989 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7990 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7991 emit_insn (VECTOR_ELT_ORDER_BIG
7992 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7993 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7994 }
7995
7996
7997 else
7998 {
7999 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
8000 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
8001 emit_insn (VECTOR_ELT_ORDER_BIG
8002 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
8003 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
8004 }
8005
8006 return;
8007 }
8008
8009
8010 gcc_assert (REG_P (tmp_altivec));
8011
8012 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
8013 an XOR, otherwise we need to subtract. The shift amount is so VSLO
8014 will shift the element into the upper position (adding 3 to convert a
8015 byte shift into a bit shift). */
8016 if (scalar_size == 8)
8017 {
8018 if (!VECTOR_ELT_ORDER_BIG)
8019 {
8020 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
8021 element2 = tmp_gpr;
8022 }
8023 else
8024 element2 = element;
8025
8026 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8027 bit. */
8028 emit_insn (gen_rtx_SET (tmp_gpr,
8029 gen_rtx_AND (DImode,
8030 gen_rtx_ASHIFT (DImode,
8031 element2,
8032 GEN_INT (6)),
8033 GEN_INT (64))));
8034 }
8035 else
8036 {
8037 if (!VECTOR_ELT_ORDER_BIG)
8038 {
8039 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
8040
8041 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8042 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8043 element2 = tmp_gpr;
8044 }
8045 else
8046 element2 = element;
8047
8048 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8049 }
8050
8051 /* Get the value into the lower byte of the Altivec register where VSLO
8052 expects it. */
8053 if (TARGET_P9_VECTOR)
8054 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8055 else if (can_create_pseudo_p ())
8056 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8057 else
8058 {
8059 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8060 emit_move_insn (tmp_di, tmp_gpr);
8061 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8062 }
8063
8064 /* Do the VSLO to get the value into the final location. */
8065 switch (mode)
8066 {
8067 case E_V2DFmode:
8068 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8069 return;
8070
8071 case E_V2DImode:
8072 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8073 return;
8074
8075 case E_V4SFmode:
8076 {
8077 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8078 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8079 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8080 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8081 tmp_altivec));
8082
8083 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8084 return;
8085 }
8086
8087 case E_V4SImode:
8088 case E_V8HImode:
8089 case E_V16QImode:
8090 {
8091 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8092 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8093 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8094 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8095 tmp_altivec));
8096 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8097 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
8098 GEN_INT (64 - (8 * scalar_size))));
8099 return;
8100 }
8101
8102 default:
8103 gcc_unreachable ();
8104 }
8105
8106 return;
8107 }
8108 else
8109 gcc_unreachable ();
8110 }
8111
8112 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
8113 two SImode values. */
8114
8115 static void
8116 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
8117 {
8118 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
8119
8120 if (CONST_INT_P (si1) && CONST_INT_P (si2))
8121 {
8122 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
8123 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
8124
8125 emit_move_insn (dest, GEN_INT (const1 | const2));
8126 return;
8127 }
8128
8129 /* Put si1 into upper 32-bits of dest. */
8130 if (CONST_INT_P (si1))
8131 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8132 else
8133 {
8134 /* Generate RLDIC. */
8135 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8136 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8137 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8138 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8139 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8140 emit_insn (gen_rtx_SET (dest, and_rtx));
8141 }
8142
8143 /* Put si2 into the temporary. */
8144 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8145 if (CONST_INT_P (si2))
8146 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8147 else
8148 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8149
8150 /* Combine the two parts. */
8151 emit_insn (gen_iordi3 (dest, dest, tmp));
8152 return;
8153 }
8154
8155 /* Split a V4SI initialization. */
8156
8157 void
8158 rs6000_split_v4si_init (rtx operands[])
8159 {
8160 rtx dest = operands[0];
8161
8162 /* Destination is a GPR, build up the two DImode parts in place. */
8163 if (REG_P (dest) || SUBREG_P (dest))
8164 {
8165 int d_regno = regno_or_subregno (dest);
8166 rtx scalar1 = operands[1];
8167 rtx scalar2 = operands[2];
8168 rtx scalar3 = operands[3];
8169 rtx scalar4 = operands[4];
8170 rtx tmp1 = operands[5];
8171 rtx tmp2 = operands[6];
8172
8173 /* Even though we only need one temporary (plus the destination, which
8174 has an early clobber constraint, try to use two temporaries, one for
8175 each double word created. That way the 2nd insn scheduling pass can
8176 rearrange things so the two parts are done in parallel. */
8177 if (BYTES_BIG_ENDIAN)
8178 {
8179 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8180 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8181 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8182 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8183 }
8184 else
8185 {
8186 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8187 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8188 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8189 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8190 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8191 }
8192 return;
8193 }
8194
8195 else
8196 gcc_unreachable ();
8197 }
8198
8199 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
8200
8201 bool
8202 invalid_e500_subreg (rtx op, machine_mode mode)
8203 {
8204 if (TARGET_E500_DOUBLE)
8205 {
8206 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
8207 subreg:TI and reg:TF. Decimal float modes are like integer
8208 modes (only low part of each register used) for this
8209 purpose. */
8210 if (GET_CODE (op) == SUBREG
8211 && (mode == SImode || mode == DImode || mode == TImode
8212 || mode == DDmode || mode == TDmode || mode == PTImode)
8213 && REG_P (SUBREG_REG (op))
8214 && (GET_MODE (SUBREG_REG (op)) == DFmode
8215 || GET_MODE (SUBREG_REG (op)) == TFmode
8216 || GET_MODE (SUBREG_REG (op)) == IFmode
8217 || GET_MODE (SUBREG_REG (op)) == KFmode))
8218 return true;
8219
8220 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
8221 reg:TI. */
8222 if (GET_CODE (op) == SUBREG
8223 && (mode == DFmode || mode == TFmode || mode == IFmode
8224 || mode == KFmode)
8225 && REG_P (SUBREG_REG (op))
8226 && (GET_MODE (SUBREG_REG (op)) == DImode
8227 || GET_MODE (SUBREG_REG (op)) == TImode
8228 || GET_MODE (SUBREG_REG (op)) == PTImode
8229 || GET_MODE (SUBREG_REG (op)) == DDmode
8230 || GET_MODE (SUBREG_REG (op)) == TDmode))
8231 return true;
8232 }
8233
8234 if (TARGET_SPE
8235 && GET_CODE (op) == SUBREG
8236 && mode == SImode
8237 && REG_P (SUBREG_REG (op))
8238 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
8239 return true;
8240
8241 return false;
8242 }
8243
8244 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8245 selects whether the alignment is abi mandated, optional, or
8246 both abi and optional alignment. */
8247
8248 unsigned int
8249 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8250 {
8251 if (how != align_opt)
8252 {
8253 if (TREE_CODE (type) == VECTOR_TYPE)
8254 {
8255 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
8256 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
8257 {
8258 if (align < 64)
8259 align = 64;
8260 }
8261 else if (align < 128)
8262 align = 128;
8263 }
8264 else if (TARGET_E500_DOUBLE
8265 && TREE_CODE (type) == REAL_TYPE
8266 && TYPE_MODE (type) == DFmode)
8267 {
8268 if (align < 64)
8269 align = 64;
8270 }
8271 }
8272
8273 if (how != align_abi)
8274 {
8275 if (TREE_CODE (type) == ARRAY_TYPE
8276 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8277 {
8278 if (align < BITS_PER_WORD)
8279 align = BITS_PER_WORD;
8280 }
8281 }
8282
8283 return align;
8284 }
8285
8286 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8287
8288 bool
8289 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8290 {
8291 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8292 {
8293 if (computed != 128)
8294 {
8295 static bool warned;
8296 if (!warned && warn_psabi)
8297 {
8298 warned = true;
8299 inform (input_location,
8300 "the layout of aggregates containing vectors with"
8301 " %d-byte alignment has changed in GCC 5",
8302 computed / BITS_PER_UNIT);
8303 }
8304 }
8305 /* In current GCC there is no special case. */
8306 return false;
8307 }
8308
8309 return false;
8310 }
8311
8312 /* AIX increases natural record alignment to doubleword if the first
8313 field is an FP double while the FP fields remain word aligned. */
8314
8315 unsigned int
8316 rs6000_special_round_type_align (tree type, unsigned int computed,
8317 unsigned int specified)
8318 {
8319 unsigned int align = MAX (computed, specified);
8320 tree field = TYPE_FIELDS (type);
8321
8322 /* Skip all non field decls */
8323 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8324 field = DECL_CHAIN (field);
8325
8326 if (field != NULL && field != type)
8327 {
8328 type = TREE_TYPE (field);
8329 while (TREE_CODE (type) == ARRAY_TYPE)
8330 type = TREE_TYPE (type);
8331
8332 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8333 align = MAX (align, 64);
8334 }
8335
8336 return align;
8337 }
8338
8339 /* Darwin increases record alignment to the natural alignment of
8340 the first field. */
8341
8342 unsigned int
8343 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8344 unsigned int specified)
8345 {
8346 unsigned int align = MAX (computed, specified);
8347
8348 if (TYPE_PACKED (type))
8349 return align;
8350
8351 /* Find the first field, looking down into aggregates. */
8352 do {
8353 tree field = TYPE_FIELDS (type);
8354 /* Skip all non field decls */
8355 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8356 field = DECL_CHAIN (field);
8357 if (! field)
8358 break;
8359 /* A packed field does not contribute any extra alignment. */
8360 if (DECL_PACKED (field))
8361 return align;
8362 type = TREE_TYPE (field);
8363 while (TREE_CODE (type) == ARRAY_TYPE)
8364 type = TREE_TYPE (type);
8365 } while (AGGREGATE_TYPE_P (type));
8366
8367 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8368 align = MAX (align, TYPE_ALIGN (type));
8369
8370 return align;
8371 }
8372
8373 /* Return 1 for an operand in small memory on V.4/eabi. */
8374
8375 int
8376 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8377 machine_mode mode ATTRIBUTE_UNUSED)
8378 {
8379 #if TARGET_ELF
8380 rtx sym_ref;
8381
8382 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8383 return 0;
8384
8385 if (DEFAULT_ABI != ABI_V4)
8386 return 0;
8387
8388 /* Vector and float memory instructions have a limited offset on the
8389 SPE, so using a vector or float variable directly as an operand is
8390 not useful. */
8391 if (TARGET_SPE
8392 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
8393 return 0;
8394
8395 if (GET_CODE (op) == SYMBOL_REF)
8396 sym_ref = op;
8397
8398 else if (GET_CODE (op) != CONST
8399 || GET_CODE (XEXP (op, 0)) != PLUS
8400 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8401 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8402 return 0;
8403
8404 else
8405 {
8406 rtx sum = XEXP (op, 0);
8407 HOST_WIDE_INT summand;
8408
8409 /* We have to be careful here, because it is the referenced address
8410 that must be 32k from _SDA_BASE_, not just the symbol. */
8411 summand = INTVAL (XEXP (sum, 1));
8412 if (summand < 0 || summand > g_switch_value)
8413 return 0;
8414
8415 sym_ref = XEXP (sum, 0);
8416 }
8417
8418 return SYMBOL_REF_SMALL_P (sym_ref);
8419 #else
8420 return 0;
8421 #endif
8422 }
8423
8424 /* Return true if either operand is a general purpose register. */
8425
8426 bool
8427 gpr_or_gpr_p (rtx op0, rtx op1)
8428 {
8429 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8430 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8431 }
8432
8433 /* Return true if this is a move direct operation between GPR registers and
8434 floating point/VSX registers. */
8435
8436 bool
8437 direct_move_p (rtx op0, rtx op1)
8438 {
8439 int regno0, regno1;
8440
8441 if (!REG_P (op0) || !REG_P (op1))
8442 return false;
8443
8444 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8445 return false;
8446
8447 regno0 = REGNO (op0);
8448 regno1 = REGNO (op1);
8449 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8450 return false;
8451
8452 if (INT_REGNO_P (regno0))
8453 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8454
8455 else if (INT_REGNO_P (regno1))
8456 {
8457 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8458 return true;
8459
8460 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8461 return true;
8462 }
8463
8464 return false;
8465 }
8466
8467 /* Return true if the OFFSET is valid for the quad address instructions that
8468 use d-form (register + offset) addressing. */
8469
8470 static inline bool
8471 quad_address_offset_p (HOST_WIDE_INT offset)
8472 {
8473 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8474 }
8475
8476 /* Return true if the ADDR is an acceptable address for a quad memory
8477 operation of mode MODE (either LQ/STQ for general purpose registers, or
8478 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8479 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8480 3.0 LXV/STXV instruction. */
8481
8482 bool
8483 quad_address_p (rtx addr, machine_mode mode, bool strict)
8484 {
8485 rtx op0, op1;
8486
8487 if (GET_MODE_SIZE (mode) != 16)
8488 return false;
8489
8490 if (legitimate_indirect_address_p (addr, strict))
8491 return true;
8492
8493 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8494 return false;
8495
8496 if (GET_CODE (addr) != PLUS)
8497 return false;
8498
8499 op0 = XEXP (addr, 0);
8500 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8501 return false;
8502
8503 op1 = XEXP (addr, 1);
8504 if (!CONST_INT_P (op1))
8505 return false;
8506
8507 return quad_address_offset_p (INTVAL (op1));
8508 }
8509
8510 /* Return true if this is a load or store quad operation. This function does
8511 not handle the atomic quad memory instructions. */
8512
8513 bool
8514 quad_load_store_p (rtx op0, rtx op1)
8515 {
8516 bool ret;
8517
8518 if (!TARGET_QUAD_MEMORY)
8519 ret = false;
8520
8521 else if (REG_P (op0) && MEM_P (op1))
8522 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8523 && quad_memory_operand (op1, GET_MODE (op1))
8524 && !reg_overlap_mentioned_p (op0, op1));
8525
8526 else if (MEM_P (op0) && REG_P (op1))
8527 ret = (quad_memory_operand (op0, GET_MODE (op0))
8528 && quad_int_reg_operand (op1, GET_MODE (op1)));
8529
8530 else
8531 ret = false;
8532
8533 if (TARGET_DEBUG_ADDR)
8534 {
8535 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8536 ret ? "true" : "false");
8537 debug_rtx (gen_rtx_SET (op0, op1));
8538 }
8539
8540 return ret;
8541 }
8542
8543 /* Given an address, return a constant offset term if one exists. */
8544
8545 static rtx
8546 address_offset (rtx op)
8547 {
8548 if (GET_CODE (op) == PRE_INC
8549 || GET_CODE (op) == PRE_DEC)
8550 op = XEXP (op, 0);
8551 else if (GET_CODE (op) == PRE_MODIFY
8552 || GET_CODE (op) == LO_SUM)
8553 op = XEXP (op, 1);
8554
8555 if (GET_CODE (op) == CONST)
8556 op = XEXP (op, 0);
8557
8558 if (GET_CODE (op) == PLUS)
8559 op = XEXP (op, 1);
8560
8561 if (CONST_INT_P (op))
8562 return op;
8563
8564 return NULL_RTX;
8565 }
8566
8567 /* Return true if the MEM operand is a memory operand suitable for use
8568 with a (full width, possibly multiple) gpr load/store. On
8569 powerpc64 this means the offset must be divisible by 4.
8570 Implements 'Y' constraint.
8571
8572 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8573 a constraint function we know the operand has satisfied a suitable
8574 memory predicate. Also accept some odd rtl generated by reload
8575 (see rs6000_legitimize_reload_address for various forms). It is
8576 important that reload rtl be accepted by appropriate constraints
8577 but not by the operand predicate.
8578
8579 Offsetting a lo_sum should not be allowed, except where we know by
8580 alignment that a 32k boundary is not crossed, but see the ???
8581 comment in rs6000_legitimize_reload_address. Note that by
8582 "offsetting" here we mean a further offset to access parts of the
8583 MEM. It's fine to have a lo_sum where the inner address is offset
8584 from a sym, since the same sym+offset will appear in the high part
8585 of the address calculation. */
8586
8587 bool
8588 mem_operand_gpr (rtx op, machine_mode mode)
8589 {
8590 unsigned HOST_WIDE_INT offset;
8591 int extra;
8592 rtx addr = XEXP (op, 0);
8593
8594 op = address_offset (addr);
8595 if (op == NULL_RTX)
8596 return true;
8597
8598 offset = INTVAL (op);
8599 if (TARGET_POWERPC64 && (offset & 3) != 0)
8600 return false;
8601
8602 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8603 if (extra < 0)
8604 extra = 0;
8605
8606 if (GET_CODE (addr) == LO_SUM)
8607 /* For lo_sum addresses, we must allow any offset except one that
8608 causes a wrap, so test only the low 16 bits. */
8609 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8610
8611 return offset + 0x8000 < 0x10000u - extra;
8612 }
8613
8614 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8615 enforce an offset divisible by 4 even for 32-bit. */
8616
8617 bool
8618 mem_operand_ds_form (rtx op, machine_mode mode)
8619 {
8620 unsigned HOST_WIDE_INT offset;
8621 int extra;
8622 rtx addr = XEXP (op, 0);
8623
8624 if (!offsettable_address_p (false, mode, addr))
8625 return false;
8626
8627 op = address_offset (addr);
8628 if (op == NULL_RTX)
8629 return true;
8630
8631 offset = INTVAL (op);
8632 if ((offset & 3) != 0)
8633 return false;
8634
8635 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8636 if (extra < 0)
8637 extra = 0;
8638
8639 if (GET_CODE (addr) == LO_SUM)
8640 /* For lo_sum addresses, we must allow any offset except one that
8641 causes a wrap, so test only the low 16 bits. */
8642 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8643
8644 return offset + 0x8000 < 0x10000u - extra;
8645 }
8646 \f
8647 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8648
8649 static bool
8650 reg_offset_addressing_ok_p (machine_mode mode)
8651 {
8652 switch (mode)
8653 {
8654 case E_V16QImode:
8655 case E_V8HImode:
8656 case E_V4SFmode:
8657 case E_V4SImode:
8658 case E_V2DFmode:
8659 case E_V2DImode:
8660 case E_V1TImode:
8661 case E_TImode:
8662 case E_TFmode:
8663 case E_KFmode:
8664 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8665 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8666 a vector mode, if we want to use the VSX registers to move it around,
8667 we need to restrict ourselves to reg+reg addressing. Similarly for
8668 IEEE 128-bit floating point that is passed in a single vector
8669 register. */
8670 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8671 return mode_supports_vsx_dform_quad (mode);
8672 break;
8673
8674 case E_V4HImode:
8675 case E_V2SImode:
8676 case E_V1DImode:
8677 case E_V2SFmode:
8678 /* Paired vector modes. Only reg+reg addressing is valid. */
8679 if (TARGET_PAIRED_FLOAT)
8680 return false;
8681 break;
8682
8683 case E_SDmode:
8684 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8685 addressing for the LFIWZX and STFIWX instructions. */
8686 if (TARGET_NO_SDMODE_STACK)
8687 return false;
8688 break;
8689
8690 default:
8691 break;
8692 }
8693
8694 return true;
8695 }
8696
8697 static bool
8698 virtual_stack_registers_memory_p (rtx op)
8699 {
8700 int regnum;
8701
8702 if (GET_CODE (op) == REG)
8703 regnum = REGNO (op);
8704
8705 else if (GET_CODE (op) == PLUS
8706 && GET_CODE (XEXP (op, 0)) == REG
8707 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8708 regnum = REGNO (XEXP (op, 0));
8709
8710 else
8711 return false;
8712
8713 return (regnum >= FIRST_VIRTUAL_REGISTER
8714 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8715 }
8716
8717 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8718 is known to not straddle a 32k boundary. This function is used
8719 to determine whether -mcmodel=medium code can use TOC pointer
8720 relative addressing for OP. This means the alignment of the TOC
8721 pointer must also be taken into account, and unfortunately that is
8722 only 8 bytes. */
8723
8724 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8725 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8726 #endif
8727
8728 static bool
8729 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8730 machine_mode mode)
8731 {
8732 tree decl;
8733 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8734
8735 if (GET_CODE (op) != SYMBOL_REF)
8736 return false;
8737
8738 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8739 SYMBOL_REF. */
8740 if (mode_supports_vsx_dform_quad (mode))
8741 return false;
8742
8743 dsize = GET_MODE_SIZE (mode);
8744 decl = SYMBOL_REF_DECL (op);
8745 if (!decl)
8746 {
8747 if (dsize == 0)
8748 return false;
8749
8750 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8751 replacing memory addresses with an anchor plus offset. We
8752 could find the decl by rummaging around in the block->objects
8753 VEC for the given offset but that seems like too much work. */
8754 dalign = BITS_PER_UNIT;
8755 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8756 && SYMBOL_REF_ANCHOR_P (op)
8757 && SYMBOL_REF_BLOCK (op) != NULL)
8758 {
8759 struct object_block *block = SYMBOL_REF_BLOCK (op);
8760
8761 dalign = block->alignment;
8762 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8763 }
8764 else if (CONSTANT_POOL_ADDRESS_P (op))
8765 {
8766 /* It would be nice to have get_pool_align().. */
8767 machine_mode cmode = get_pool_mode (op);
8768
8769 dalign = GET_MODE_ALIGNMENT (cmode);
8770 }
8771 }
8772 else if (DECL_P (decl))
8773 {
8774 dalign = DECL_ALIGN (decl);
8775
8776 if (dsize == 0)
8777 {
8778 /* Allow BLKmode when the entire object is known to not
8779 cross a 32k boundary. */
8780 if (!DECL_SIZE_UNIT (decl))
8781 return false;
8782
8783 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8784 return false;
8785
8786 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8787 if (dsize > 32768)
8788 return false;
8789
8790 dalign /= BITS_PER_UNIT;
8791 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8792 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8793 return dalign >= dsize;
8794 }
8795 }
8796 else
8797 gcc_unreachable ();
8798
8799 /* Find how many bits of the alignment we know for this access. */
8800 dalign /= BITS_PER_UNIT;
8801 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8802 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8803 mask = dalign - 1;
8804 lsb = offset & -offset;
8805 mask &= lsb - 1;
8806 dalign = mask + 1;
8807
8808 return dalign >= dsize;
8809 }
8810
8811 static bool
8812 constant_pool_expr_p (rtx op)
8813 {
8814 rtx base, offset;
8815
8816 split_const (op, &base, &offset);
8817 return (GET_CODE (base) == SYMBOL_REF
8818 && CONSTANT_POOL_ADDRESS_P (base)
8819 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8820 }
8821
8822 static const_rtx tocrel_base, tocrel_offset;
8823
8824 /* Return true if OP is a toc pointer relative address (the output
8825 of create_TOC_reference). If STRICT, do not match non-split
8826 -mcmodel=large/medium toc pointer relative addresses. */
8827
8828 bool
8829 toc_relative_expr_p (const_rtx op, bool strict)
8830 {
8831 if (!TARGET_TOC)
8832 return false;
8833
8834 if (TARGET_CMODEL != CMODEL_SMALL)
8835 {
8836 /* When strict ensure we have everything tidy. */
8837 if (strict
8838 && !(GET_CODE (op) == LO_SUM
8839 && REG_P (XEXP (op, 0))
8840 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8841 return false;
8842
8843 /* When not strict, allow non-split TOC addresses and also allow
8844 (lo_sum (high ..)) TOC addresses created during reload. */
8845 if (GET_CODE (op) == LO_SUM)
8846 op = XEXP (op, 1);
8847 }
8848
8849 tocrel_base = op;
8850 tocrel_offset = const0_rtx;
8851 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8852 {
8853 tocrel_base = XEXP (op, 0);
8854 tocrel_offset = XEXP (op, 1);
8855 }
8856
8857 return (GET_CODE (tocrel_base) == UNSPEC
8858 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8859 }
8860
8861 /* Return true if X is a constant pool address, and also for cmodel=medium
8862 if X is a toc-relative address known to be offsettable within MODE. */
8863
8864 bool
8865 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8866 bool strict)
8867 {
8868 return (toc_relative_expr_p (x, strict)
8869 && (TARGET_CMODEL != CMODEL_MEDIUM
8870 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8871 || mode == QImode
8872 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8873 INTVAL (tocrel_offset), mode)));
8874 }
8875
8876 static bool
8877 legitimate_small_data_p (machine_mode mode, rtx x)
8878 {
8879 return (DEFAULT_ABI == ABI_V4
8880 && !flag_pic && !TARGET_TOC
8881 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8882 && small_data_operand (x, mode));
8883 }
8884
8885 /* SPE offset addressing is limited to 5-bits worth of double words. */
8886 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
8887
8888 bool
8889 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8890 bool strict, bool worst_case)
8891 {
8892 unsigned HOST_WIDE_INT offset;
8893 unsigned int extra;
8894
8895 if (GET_CODE (x) != PLUS)
8896 return false;
8897 if (!REG_P (XEXP (x, 0)))
8898 return false;
8899 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8900 return false;
8901 if (mode_supports_vsx_dform_quad (mode))
8902 return quad_address_p (x, mode, strict);
8903 if (!reg_offset_addressing_ok_p (mode))
8904 return virtual_stack_registers_memory_p (x);
8905 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8906 return true;
8907 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8908 return false;
8909
8910 offset = INTVAL (XEXP (x, 1));
8911 extra = 0;
8912 switch (mode)
8913 {
8914 case E_V4HImode:
8915 case E_V2SImode:
8916 case E_V1DImode:
8917 case E_V2SFmode:
8918 /* SPE vector modes. */
8919 return SPE_CONST_OFFSET_OK (offset);
8920
8921 case E_DFmode:
8922 case E_DDmode:
8923 case E_DImode:
8924 /* On e500v2, we may have:
8925
8926 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
8927
8928 Which gets addressed with evldd instructions. */
8929 if (TARGET_E500_DOUBLE)
8930 return SPE_CONST_OFFSET_OK (offset);
8931
8932 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8933 addressing. */
8934 if (VECTOR_MEM_VSX_P (mode))
8935 return false;
8936
8937 if (!worst_case)
8938 break;
8939 if (!TARGET_POWERPC64)
8940 extra = 4;
8941 else if (offset & 3)
8942 return false;
8943 break;
8944
8945 case E_TFmode:
8946 case E_IFmode:
8947 case E_KFmode:
8948 case E_TDmode:
8949 case E_TImode:
8950 case E_PTImode:
8951 if (TARGET_E500_DOUBLE)
8952 return (SPE_CONST_OFFSET_OK (offset)
8953 && SPE_CONST_OFFSET_OK (offset + 8));
8954
8955 extra = 8;
8956 if (!worst_case)
8957 break;
8958 if (!TARGET_POWERPC64)
8959 extra = 12;
8960 else if (offset & 3)
8961 return false;
8962 break;
8963
8964 default:
8965 break;
8966 }
8967
8968 offset += 0x8000;
8969 return offset < 0x10000 - extra;
8970 }
8971
8972 bool
8973 legitimate_indexed_address_p (rtx x, int strict)
8974 {
8975 rtx op0, op1;
8976
8977 if (GET_CODE (x) != PLUS)
8978 return false;
8979
8980 op0 = XEXP (x, 0);
8981 op1 = XEXP (x, 1);
8982
8983 /* Recognize the rtl generated by reload which we know will later be
8984 replaced with proper base and index regs. */
8985 if (!strict
8986 && reload_in_progress
8987 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8988 && REG_P (op1))
8989 return true;
8990
8991 return (REG_P (op0) && REG_P (op1)
8992 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8993 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8994 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8995 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8996 }
8997
8998 bool
8999 avoiding_indexed_address_p (machine_mode mode)
9000 {
9001 /* Avoid indexed addressing for modes that have non-indexed
9002 load/store instruction forms. */
9003 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9004 }
9005
9006 bool
9007 legitimate_indirect_address_p (rtx x, int strict)
9008 {
9009 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
9010 }
9011
9012 bool
9013 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9014 {
9015 if (!TARGET_MACHO || !flag_pic
9016 || mode != SImode || GET_CODE (x) != MEM)
9017 return false;
9018 x = XEXP (x, 0);
9019
9020 if (GET_CODE (x) != LO_SUM)
9021 return false;
9022 if (GET_CODE (XEXP (x, 0)) != REG)
9023 return false;
9024 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9025 return false;
9026 x = XEXP (x, 1);
9027
9028 return CONSTANT_P (x);
9029 }
9030
9031 static bool
9032 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9033 {
9034 if (GET_CODE (x) != LO_SUM)
9035 return false;
9036 if (GET_CODE (XEXP (x, 0)) != REG)
9037 return false;
9038 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9039 return false;
9040 /* quad word addresses are restricted, and we can't use LO_SUM. */
9041 if (mode_supports_vsx_dform_quad (mode))
9042 return false;
9043 /* Restrict addressing for DI because of our SUBREG hackery. */
9044 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9045 return false;
9046 x = XEXP (x, 1);
9047
9048 if (TARGET_ELF || TARGET_MACHO)
9049 {
9050 bool large_toc_ok;
9051
9052 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9053 return false;
9054 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9055 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9056 recognizes some LO_SUM addresses as valid although this
9057 function says opposite. In most cases, LRA through different
9058 transformations can generate correct code for address reloads.
9059 It can not manage only some LO_SUM cases. So we need to add
9060 code analogous to one in rs6000_legitimize_reload_address for
9061 LOW_SUM here saying that some addresses are still valid. */
9062 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9063 && small_toc_ref (x, VOIDmode));
9064 if (TARGET_TOC && ! large_toc_ok)
9065 return false;
9066 if (GET_MODE_NUNITS (mode) != 1)
9067 return false;
9068 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9069 && !(/* ??? Assume floating point reg based on mode? */
9070 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
9071 && (mode == DFmode || mode == DDmode)))
9072 return false;
9073
9074 return CONSTANT_P (x) || large_toc_ok;
9075 }
9076
9077 return false;
9078 }
9079
9080
9081 /* Try machine-dependent ways of modifying an illegitimate address
9082 to be legitimate. If we find one, return the new, valid address.
9083 This is used from only one place: `memory_address' in explow.c.
9084
9085 OLDX is the address as it was before break_out_memory_refs was
9086 called. In some cases it is useful to look at this to decide what
9087 needs to be done.
9088
9089 It is always safe for this function to do nothing. It exists to
9090 recognize opportunities to optimize the output.
9091
9092 On RS/6000, first check for the sum of a register with a constant
9093 integer that is out of range. If so, generate code to add the
9094 constant with the low-order 16 bits masked to the register and force
9095 this result into another register (this can be done with `cau').
9096 Then generate an address of REG+(CONST&0xffff), allowing for the
9097 possibility of bit 16 being a one.
9098
9099 Then check for the sum of a register and something not constant, try to
9100 load the other things into a register and return the sum. */
9101
9102 static rtx
9103 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9104 machine_mode mode)
9105 {
9106 unsigned int extra;
9107
9108 if (!reg_offset_addressing_ok_p (mode)
9109 || mode_supports_vsx_dform_quad (mode))
9110 {
9111 if (virtual_stack_registers_memory_p (x))
9112 return x;
9113
9114 /* In theory we should not be seeing addresses of the form reg+0,
9115 but just in case it is generated, optimize it away. */
9116 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9117 return force_reg (Pmode, XEXP (x, 0));
9118
9119 /* For TImode with load/store quad, restrict addresses to just a single
9120 pointer, so it works with both GPRs and VSX registers. */
9121 /* Make sure both operands are registers. */
9122 else if (GET_CODE (x) == PLUS
9123 && (mode != TImode || !TARGET_VSX_TIMODE))
9124 return gen_rtx_PLUS (Pmode,
9125 force_reg (Pmode, XEXP (x, 0)),
9126 force_reg (Pmode, XEXP (x, 1)));
9127 else
9128 return force_reg (Pmode, x);
9129 }
9130 if (GET_CODE (x) == SYMBOL_REF)
9131 {
9132 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9133 if (model != 0)
9134 return rs6000_legitimize_tls_address (x, model);
9135 }
9136
9137 extra = 0;
9138 switch (mode)
9139 {
9140 case E_TFmode:
9141 case E_TDmode:
9142 case E_TImode:
9143 case E_PTImode:
9144 case E_IFmode:
9145 case E_KFmode:
9146 /* As in legitimate_offset_address_p we do not assume
9147 worst-case. The mode here is just a hint as to the registers
9148 used. A TImode is usually in gprs, but may actually be in
9149 fprs. Leave worst-case scenario for reload to handle via
9150 insn constraints. PTImode is only GPRs. */
9151 extra = 8;
9152 break;
9153 default:
9154 break;
9155 }
9156
9157 if (GET_CODE (x) == PLUS
9158 && GET_CODE (XEXP (x, 0)) == REG
9159 && GET_CODE (XEXP (x, 1)) == CONST_INT
9160 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9161 >= 0x10000 - extra)
9162 && !(SPE_VECTOR_MODE (mode)
9163 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
9164 {
9165 HOST_WIDE_INT high_int, low_int;
9166 rtx sum;
9167 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9168 if (low_int >= 0x8000 - extra)
9169 low_int = 0;
9170 high_int = INTVAL (XEXP (x, 1)) - low_int;
9171 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9172 GEN_INT (high_int)), 0);
9173 return plus_constant (Pmode, sum, low_int);
9174 }
9175 else if (GET_CODE (x) == PLUS
9176 && GET_CODE (XEXP (x, 0)) == REG
9177 && GET_CODE (XEXP (x, 1)) != CONST_INT
9178 && GET_MODE_NUNITS (mode) == 1
9179 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9180 || (/* ??? Assume floating point reg based on mode? */
9181 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9182 && (mode == DFmode || mode == DDmode)))
9183 && !avoiding_indexed_address_p (mode))
9184 {
9185 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9186 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9187 }
9188 else if (SPE_VECTOR_MODE (mode)
9189 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
9190 {
9191 if (mode == DImode)
9192 return x;
9193 /* We accept [reg + reg] and [reg + OFFSET]. */
9194
9195 if (GET_CODE (x) == PLUS)
9196 {
9197 rtx op1 = XEXP (x, 0);
9198 rtx op2 = XEXP (x, 1);
9199 rtx y;
9200
9201 op1 = force_reg (Pmode, op1);
9202
9203 if (GET_CODE (op2) != REG
9204 && (GET_CODE (op2) != CONST_INT
9205 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
9206 || (GET_MODE_SIZE (mode) > 8
9207 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
9208 op2 = force_reg (Pmode, op2);
9209
9210 /* We can't always do [reg + reg] for these, because [reg +
9211 reg + offset] is not a legitimate addressing mode. */
9212 y = gen_rtx_PLUS (Pmode, op1, op2);
9213
9214 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9215 return force_reg (Pmode, y);
9216 else
9217 return y;
9218 }
9219
9220 return force_reg (Pmode, x);
9221 }
9222 else if ((TARGET_ELF
9223 #if TARGET_MACHO
9224 || !MACHO_DYNAMIC_NO_PIC_P
9225 #endif
9226 )
9227 && TARGET_32BIT
9228 && TARGET_NO_TOC
9229 && ! flag_pic
9230 && GET_CODE (x) != CONST_INT
9231 && GET_CODE (x) != CONST_WIDE_INT
9232 && GET_CODE (x) != CONST_DOUBLE
9233 && CONSTANT_P (x)
9234 && GET_MODE_NUNITS (mode) == 1
9235 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9236 || (/* ??? Assume floating point reg based on mode? */
9237 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
9238 && (mode == DFmode || mode == DDmode))))
9239 {
9240 rtx reg = gen_reg_rtx (Pmode);
9241 if (TARGET_ELF)
9242 emit_insn (gen_elf_high (reg, x));
9243 else
9244 emit_insn (gen_macho_high (reg, x));
9245 return gen_rtx_LO_SUM (Pmode, reg, x);
9246 }
9247 else if (TARGET_TOC
9248 && GET_CODE (x) == SYMBOL_REF
9249 && constant_pool_expr_p (x)
9250 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9251 return create_TOC_reference (x, NULL_RTX);
9252 else
9253 return x;
9254 }
9255
9256 /* Debug version of rs6000_legitimize_address. */
9257 static rtx
9258 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9259 {
9260 rtx ret;
9261 rtx_insn *insns;
9262
9263 start_sequence ();
9264 ret = rs6000_legitimize_address (x, oldx, mode);
9265 insns = get_insns ();
9266 end_sequence ();
9267
9268 if (ret != x)
9269 {
9270 fprintf (stderr,
9271 "\nrs6000_legitimize_address: mode %s, old code %s, "
9272 "new code %s, modified\n",
9273 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9274 GET_RTX_NAME (GET_CODE (ret)));
9275
9276 fprintf (stderr, "Original address:\n");
9277 debug_rtx (x);
9278
9279 fprintf (stderr, "oldx:\n");
9280 debug_rtx (oldx);
9281
9282 fprintf (stderr, "New address:\n");
9283 debug_rtx (ret);
9284
9285 if (insns)
9286 {
9287 fprintf (stderr, "Insns added:\n");
9288 debug_rtx_list (insns, 20);
9289 }
9290 }
9291 else
9292 {
9293 fprintf (stderr,
9294 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9295 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9296
9297 debug_rtx (x);
9298 }
9299
9300 if (insns)
9301 emit_insn (insns);
9302
9303 return ret;
9304 }
9305
9306 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9307 We need to emit DTP-relative relocations. */
9308
9309 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9310 static void
9311 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9312 {
9313 switch (size)
9314 {
9315 case 4:
9316 fputs ("\t.long\t", file);
9317 break;
9318 case 8:
9319 fputs (DOUBLE_INT_ASM_OP, file);
9320 break;
9321 default:
9322 gcc_unreachable ();
9323 }
9324 output_addr_const (file, x);
9325 if (TARGET_ELF)
9326 fputs ("@dtprel+0x8000", file);
9327 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9328 {
9329 switch (SYMBOL_REF_TLS_MODEL (x))
9330 {
9331 case 0:
9332 break;
9333 case TLS_MODEL_LOCAL_EXEC:
9334 fputs ("@le", file);
9335 break;
9336 case TLS_MODEL_INITIAL_EXEC:
9337 fputs ("@ie", file);
9338 break;
9339 case TLS_MODEL_GLOBAL_DYNAMIC:
9340 case TLS_MODEL_LOCAL_DYNAMIC:
9341 fputs ("@m", file);
9342 break;
9343 default:
9344 gcc_unreachable ();
9345 }
9346 }
9347 }
9348
9349 /* Return true if X is a symbol that refers to real (rather than emulated)
9350 TLS. */
9351
9352 static bool
9353 rs6000_real_tls_symbol_ref_p (rtx x)
9354 {
9355 return (GET_CODE (x) == SYMBOL_REF
9356 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9357 }
9358
9359 /* In the name of slightly smaller debug output, and to cater to
9360 general assembler lossage, recognize various UNSPEC sequences
9361 and turn them back into a direct symbol reference. */
9362
9363 static rtx
9364 rs6000_delegitimize_address (rtx orig_x)
9365 {
9366 rtx x, y, offset;
9367
9368 orig_x = delegitimize_mem_from_attrs (orig_x);
9369 x = orig_x;
9370 if (MEM_P (x))
9371 x = XEXP (x, 0);
9372
9373 y = x;
9374 if (TARGET_CMODEL != CMODEL_SMALL
9375 && GET_CODE (y) == LO_SUM)
9376 y = XEXP (y, 1);
9377
9378 offset = NULL_RTX;
9379 if (GET_CODE (y) == PLUS
9380 && GET_MODE (y) == Pmode
9381 && CONST_INT_P (XEXP (y, 1)))
9382 {
9383 offset = XEXP (y, 1);
9384 y = XEXP (y, 0);
9385 }
9386
9387 if (GET_CODE (y) == UNSPEC
9388 && XINT (y, 1) == UNSPEC_TOCREL)
9389 {
9390 y = XVECEXP (y, 0, 0);
9391
9392 #ifdef HAVE_AS_TLS
9393 /* Do not associate thread-local symbols with the original
9394 constant pool symbol. */
9395 if (TARGET_XCOFF
9396 && GET_CODE (y) == SYMBOL_REF
9397 && CONSTANT_POOL_ADDRESS_P (y)
9398 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9399 return orig_x;
9400 #endif
9401
9402 if (offset != NULL_RTX)
9403 y = gen_rtx_PLUS (Pmode, y, offset);
9404 if (!MEM_P (orig_x))
9405 return y;
9406 else
9407 return replace_equiv_address_nv (orig_x, y);
9408 }
9409
9410 if (TARGET_MACHO
9411 && GET_CODE (orig_x) == LO_SUM
9412 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9413 {
9414 y = XEXP (XEXP (orig_x, 1), 0);
9415 if (GET_CODE (y) == UNSPEC
9416 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9417 return XVECEXP (y, 0, 0);
9418 }
9419
9420 return orig_x;
9421 }
9422
9423 /* Return true if X shouldn't be emitted into the debug info.
9424 The linker doesn't like .toc section references from
9425 .debug_* sections, so reject .toc section symbols. */
9426
9427 static bool
9428 rs6000_const_not_ok_for_debug_p (rtx x)
9429 {
9430 if (GET_CODE (x) == SYMBOL_REF
9431 && CONSTANT_POOL_ADDRESS_P (x))
9432 {
9433 rtx c = get_pool_constant (x);
9434 machine_mode cmode = get_pool_mode (x);
9435 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9436 return true;
9437 }
9438
9439 return false;
9440 }
9441
9442
9443 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9444
9445 static bool
9446 rs6000_legitimate_combined_insn (rtx_insn *insn)
9447 {
9448 int icode = INSN_CODE (insn);
9449
9450 /* Reject creating doloop insns. Combine should not be allowed
9451 to create these for a number of reasons:
9452 1) In a nested loop, if combine creates one of these in an
9453 outer loop and the register allocator happens to allocate ctr
9454 to the outer loop insn, then the inner loop can't use ctr.
9455 Inner loops ought to be more highly optimized.
9456 2) Combine often wants to create one of these from what was
9457 originally a three insn sequence, first combining the three
9458 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9459 allocated ctr, the splitter takes use back to the three insn
9460 sequence. It's better to stop combine at the two insn
9461 sequence.
9462 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9463 insns, the register allocator sometimes uses floating point
9464 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9465 jump insn and output reloads are not implemented for jumps,
9466 the ctrsi/ctrdi splitters need to handle all possible cases.
9467 That's a pain, and it gets to be seriously difficult when a
9468 splitter that runs after reload needs memory to transfer from
9469 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9470 for the difficult case. It's better to not create problems
9471 in the first place. */
9472 if (icode != CODE_FOR_nothing
9473 && (icode == CODE_FOR_ctrsi_internal1
9474 || icode == CODE_FOR_ctrdi_internal1
9475 || icode == CODE_FOR_ctrsi_internal2
9476 || icode == CODE_FOR_ctrdi_internal2
9477 || icode == CODE_FOR_ctrsi_internal3
9478 || icode == CODE_FOR_ctrdi_internal3
9479 || icode == CODE_FOR_ctrsi_internal4
9480 || icode == CODE_FOR_ctrdi_internal4))
9481 return false;
9482
9483 return true;
9484 }
9485
9486 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9487
9488 static GTY(()) rtx rs6000_tls_symbol;
9489 static rtx
9490 rs6000_tls_get_addr (void)
9491 {
9492 if (!rs6000_tls_symbol)
9493 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9494
9495 return rs6000_tls_symbol;
9496 }
9497
9498 /* Construct the SYMBOL_REF for TLS GOT references. */
9499
9500 static GTY(()) rtx rs6000_got_symbol;
9501 static rtx
9502 rs6000_got_sym (void)
9503 {
9504 if (!rs6000_got_symbol)
9505 {
9506 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9507 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9508 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9509 }
9510
9511 return rs6000_got_symbol;
9512 }
9513
9514 /* AIX Thread-Local Address support. */
9515
9516 static rtx
9517 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9518 {
9519 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9520 const char *name;
9521 char *tlsname;
9522
9523 name = XSTR (addr, 0);
9524 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9525 or the symbol will be in TLS private data section. */
9526 if (name[strlen (name) - 1] != ']'
9527 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9528 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9529 {
9530 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9531 strcpy (tlsname, name);
9532 strcat (tlsname,
9533 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9534 tlsaddr = copy_rtx (addr);
9535 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9536 }
9537 else
9538 tlsaddr = addr;
9539
9540 /* Place addr into TOC constant pool. */
9541 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9542
9543 /* Output the TOC entry and create the MEM referencing the value. */
9544 if (constant_pool_expr_p (XEXP (sym, 0))
9545 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9546 {
9547 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9548 mem = gen_const_mem (Pmode, tocref);
9549 set_mem_alias_set (mem, get_TOC_alias_set ());
9550 }
9551 else
9552 return sym;
9553
9554 /* Use global-dynamic for local-dynamic. */
9555 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9556 || model == TLS_MODEL_LOCAL_DYNAMIC)
9557 {
9558 /* Create new TOC reference for @m symbol. */
9559 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9560 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9561 strcpy (tlsname, "*LCM");
9562 strcat (tlsname, name + 3);
9563 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9564 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9565 tocref = create_TOC_reference (modaddr, NULL_RTX);
9566 rtx modmem = gen_const_mem (Pmode, tocref);
9567 set_mem_alias_set (modmem, get_TOC_alias_set ());
9568
9569 rtx modreg = gen_reg_rtx (Pmode);
9570 emit_insn (gen_rtx_SET (modreg, modmem));
9571
9572 tmpreg = gen_reg_rtx (Pmode);
9573 emit_insn (gen_rtx_SET (tmpreg, mem));
9574
9575 dest = gen_reg_rtx (Pmode);
9576 if (TARGET_32BIT)
9577 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9578 else
9579 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9580 return dest;
9581 }
9582 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9583 else if (TARGET_32BIT)
9584 {
9585 tlsreg = gen_reg_rtx (SImode);
9586 emit_insn (gen_tls_get_tpointer (tlsreg));
9587 }
9588 else
9589 tlsreg = gen_rtx_REG (DImode, 13);
9590
9591 /* Load the TOC value into temporary register. */
9592 tmpreg = gen_reg_rtx (Pmode);
9593 emit_insn (gen_rtx_SET (tmpreg, mem));
9594 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9595 gen_rtx_MINUS (Pmode, addr, tlsreg));
9596
9597 /* Add TOC symbol value to TLS pointer. */
9598 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9599
9600 return dest;
9601 }
9602
9603 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9604 this (thread-local) address. */
9605
9606 static rtx
9607 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9608 {
9609 rtx dest, insn;
9610
9611 if (TARGET_XCOFF)
9612 return rs6000_legitimize_tls_address_aix (addr, model);
9613
9614 dest = gen_reg_rtx (Pmode);
9615 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9616 {
9617 rtx tlsreg;
9618
9619 if (TARGET_64BIT)
9620 {
9621 tlsreg = gen_rtx_REG (Pmode, 13);
9622 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9623 }
9624 else
9625 {
9626 tlsreg = gen_rtx_REG (Pmode, 2);
9627 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9628 }
9629 emit_insn (insn);
9630 }
9631 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9632 {
9633 rtx tlsreg, tmp;
9634
9635 tmp = gen_reg_rtx (Pmode);
9636 if (TARGET_64BIT)
9637 {
9638 tlsreg = gen_rtx_REG (Pmode, 13);
9639 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9640 }
9641 else
9642 {
9643 tlsreg = gen_rtx_REG (Pmode, 2);
9644 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9645 }
9646 emit_insn (insn);
9647 if (TARGET_64BIT)
9648 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9649 else
9650 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9651 emit_insn (insn);
9652 }
9653 else
9654 {
9655 rtx r3, got, tga, tmp1, tmp2, call_insn;
9656
9657 /* We currently use relocations like @got@tlsgd for tls, which
9658 means the linker will handle allocation of tls entries, placing
9659 them in the .got section. So use a pointer to the .got section,
9660 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9661 or to secondary GOT sections used by 32-bit -fPIC. */
9662 if (TARGET_64BIT)
9663 got = gen_rtx_REG (Pmode, 2);
9664 else
9665 {
9666 if (flag_pic == 1)
9667 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9668 else
9669 {
9670 rtx gsym = rs6000_got_sym ();
9671 got = gen_reg_rtx (Pmode);
9672 if (flag_pic == 0)
9673 rs6000_emit_move (got, gsym, Pmode);
9674 else
9675 {
9676 rtx mem, lab;
9677
9678 tmp1 = gen_reg_rtx (Pmode);
9679 tmp2 = gen_reg_rtx (Pmode);
9680 mem = gen_const_mem (Pmode, tmp1);
9681 lab = gen_label_rtx ();
9682 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9683 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9684 if (TARGET_LINK_STACK)
9685 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9686 emit_move_insn (tmp2, mem);
9687 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9688 set_unique_reg_note (last, REG_EQUAL, gsym);
9689 }
9690 }
9691 }
9692
9693 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9694 {
9695 tga = rs6000_tls_get_addr ();
9696 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9697 const0_rtx, Pmode);
9698
9699 r3 = gen_rtx_REG (Pmode, 3);
9700 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9701 {
9702 if (TARGET_64BIT)
9703 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9704 else
9705 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9706 }
9707 else if (DEFAULT_ABI == ABI_V4)
9708 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9709 else
9710 gcc_unreachable ();
9711 call_insn = last_call_insn ();
9712 PATTERN (call_insn) = insn;
9713 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9714 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9715 pic_offset_table_rtx);
9716 }
9717 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9718 {
9719 tga = rs6000_tls_get_addr ();
9720 tmp1 = gen_reg_rtx (Pmode);
9721 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9722 const0_rtx, Pmode);
9723
9724 r3 = gen_rtx_REG (Pmode, 3);
9725 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9726 {
9727 if (TARGET_64BIT)
9728 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9729 else
9730 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9731 }
9732 else if (DEFAULT_ABI == ABI_V4)
9733 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9734 else
9735 gcc_unreachable ();
9736 call_insn = last_call_insn ();
9737 PATTERN (call_insn) = insn;
9738 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9739 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9740 pic_offset_table_rtx);
9741
9742 if (rs6000_tls_size == 16)
9743 {
9744 if (TARGET_64BIT)
9745 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9746 else
9747 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9748 }
9749 else if (rs6000_tls_size == 32)
9750 {
9751 tmp2 = gen_reg_rtx (Pmode);
9752 if (TARGET_64BIT)
9753 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9754 else
9755 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9756 emit_insn (insn);
9757 if (TARGET_64BIT)
9758 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9759 else
9760 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9761 }
9762 else
9763 {
9764 tmp2 = gen_reg_rtx (Pmode);
9765 if (TARGET_64BIT)
9766 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9767 else
9768 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9769 emit_insn (insn);
9770 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9771 }
9772 emit_insn (insn);
9773 }
9774 else
9775 {
9776 /* IE, or 64-bit offset LE. */
9777 tmp2 = gen_reg_rtx (Pmode);
9778 if (TARGET_64BIT)
9779 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9780 else
9781 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9782 emit_insn (insn);
9783 if (TARGET_64BIT)
9784 insn = gen_tls_tls_64 (dest, tmp2, addr);
9785 else
9786 insn = gen_tls_tls_32 (dest, tmp2, addr);
9787 emit_insn (insn);
9788 }
9789 }
9790
9791 return dest;
9792 }
9793
9794 /* Only create the global variable for the stack protect guard if we are using
9795 the global flavor of that guard. */
9796 static tree
9797 rs6000_init_stack_protect_guard (void)
9798 {
9799 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9800 return default_stack_protect_guard ();
9801
9802 return NULL_TREE;
9803 }
9804
9805 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9806
9807 static bool
9808 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9809 {
9810 if (GET_CODE (x) == HIGH
9811 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9812 return true;
9813
9814 /* A TLS symbol in the TOC cannot contain a sum. */
9815 if (GET_CODE (x) == CONST
9816 && GET_CODE (XEXP (x, 0)) == PLUS
9817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9818 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9819 return true;
9820
9821 /* Do not place an ELF TLS symbol in the constant pool. */
9822 return TARGET_ELF && tls_referenced_p (x);
9823 }
9824
9825 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9826 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9827 can be addressed relative to the toc pointer. */
9828
9829 static bool
9830 use_toc_relative_ref (rtx sym, machine_mode mode)
9831 {
9832 return ((constant_pool_expr_p (sym)
9833 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9834 get_pool_mode (sym)))
9835 || (TARGET_CMODEL == CMODEL_MEDIUM
9836 && SYMBOL_REF_LOCAL_P (sym)
9837 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9838 }
9839
9840 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9841 replace the input X, or the original X if no replacement is called for.
9842 The output parameter *WIN is 1 if the calling macro should goto WIN,
9843 0 if it should not.
9844
9845 For RS/6000, we wish to handle large displacements off a base
9846 register by splitting the addend across an addiu/addis and the mem insn.
9847 This cuts number of extra insns needed from 3 to 1.
9848
9849 On Darwin, we use this to generate code for floating point constants.
9850 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9851 The Darwin code is inside #if TARGET_MACHO because only then are the
9852 machopic_* functions defined. */
9853 static rtx
9854 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9855 int opnum, int type,
9856 int ind_levels ATTRIBUTE_UNUSED, int *win)
9857 {
9858 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9859 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9860
9861 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9862 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9863 if (reg_offset_p
9864 && opnum == 1
9865 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9866 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9867 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9868 && TARGET_P9_VECTOR)
9869 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9870 && TARGET_P9_VECTOR)))
9871 reg_offset_p = false;
9872
9873 /* We must recognize output that we have already generated ourselves. */
9874 if (GET_CODE (x) == PLUS
9875 && GET_CODE (XEXP (x, 0)) == PLUS
9876 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9877 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9878 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9879 {
9880 if (TARGET_DEBUG_ADDR)
9881 {
9882 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9883 debug_rtx (x);
9884 }
9885 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9886 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9887 opnum, (enum reload_type) type);
9888 *win = 1;
9889 return x;
9890 }
9891
9892 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9893 if (GET_CODE (x) == LO_SUM
9894 && GET_CODE (XEXP (x, 0)) == HIGH)
9895 {
9896 if (TARGET_DEBUG_ADDR)
9897 {
9898 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9899 debug_rtx (x);
9900 }
9901 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9902 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9903 opnum, (enum reload_type) type);
9904 *win = 1;
9905 return x;
9906 }
9907
9908 #if TARGET_MACHO
9909 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9910 && GET_CODE (x) == LO_SUM
9911 && GET_CODE (XEXP (x, 0)) == PLUS
9912 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9913 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9914 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9915 && machopic_operand_p (XEXP (x, 1)))
9916 {
9917 /* Result of previous invocation of this function on Darwin
9918 floating point constant. */
9919 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9920 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9921 opnum, (enum reload_type) type);
9922 *win = 1;
9923 return x;
9924 }
9925 #endif
9926
9927 if (TARGET_CMODEL != CMODEL_SMALL
9928 && reg_offset_p
9929 && !quad_offset_p
9930 && small_toc_ref (x, VOIDmode))
9931 {
9932 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9933 x = gen_rtx_LO_SUM (Pmode, hi, x);
9934 if (TARGET_DEBUG_ADDR)
9935 {
9936 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9937 debug_rtx (x);
9938 }
9939 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9940 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9941 opnum, (enum reload_type) type);
9942 *win = 1;
9943 return x;
9944 }
9945
9946 if (GET_CODE (x) == PLUS
9947 && REG_P (XEXP (x, 0))
9948 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9949 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9950 && CONST_INT_P (XEXP (x, 1))
9951 && reg_offset_p
9952 && !SPE_VECTOR_MODE (mode)
9953 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
9954 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9955 {
9956 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9957 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9958 HOST_WIDE_INT high
9959 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9960
9961 /* Check for 32-bit overflow or quad addresses with one of the
9962 four least significant bits set. */
9963 if (high + low != val
9964 || (quad_offset_p && (low & 0xf)))
9965 {
9966 *win = 0;
9967 return x;
9968 }
9969
9970 /* Reload the high part into a base reg; leave the low part
9971 in the mem directly. */
9972
9973 x = gen_rtx_PLUS (GET_MODE (x),
9974 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9975 GEN_INT (high)),
9976 GEN_INT (low));
9977
9978 if (TARGET_DEBUG_ADDR)
9979 {
9980 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9981 debug_rtx (x);
9982 }
9983 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9984 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9985 opnum, (enum reload_type) type);
9986 *win = 1;
9987 return x;
9988 }
9989
9990 if (GET_CODE (x) == SYMBOL_REF
9991 && reg_offset_p
9992 && !quad_offset_p
9993 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9994 && !SPE_VECTOR_MODE (mode)
9995 #if TARGET_MACHO
9996 && DEFAULT_ABI == ABI_DARWIN
9997 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9998 && machopic_symbol_defined_p (x)
9999 #else
10000 && DEFAULT_ABI == ABI_V4
10001 && !flag_pic
10002 #endif
10003 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
10004 The same goes for DImode without 64-bit gprs and DFmode and DDmode
10005 without fprs.
10006 ??? Assume floating point reg based on mode? This assumption is
10007 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
10008 where reload ends up doing a DFmode load of a constant from
10009 mem using two gprs. Unfortunately, at this point reload
10010 hasn't yet selected regs so poking around in reload data
10011 won't help and even if we could figure out the regs reliably,
10012 we'd still want to allow this transformation when the mem is
10013 naturally aligned. Since we say the address is good here, we
10014 can't disable offsets from LO_SUMs in mem_operand_gpr.
10015 FIXME: Allow offset from lo_sum for other modes too, when
10016 mem is sufficiently aligned.
10017
10018 Also disallow this if the type can go in VMX/Altivec registers, since
10019 those registers do not have d-form (reg+offset) address modes. */
10020 && !reg_addr[mode].scalar_in_vmx_p
10021 && mode != TFmode
10022 && mode != TDmode
10023 && mode != IFmode
10024 && mode != KFmode
10025 && (mode != TImode || !TARGET_VSX_TIMODE)
10026 && mode != PTImode
10027 && (mode != DImode || TARGET_POWERPC64)
10028 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
10029 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
10030 {
10031 #if TARGET_MACHO
10032 if (flag_pic)
10033 {
10034 rtx offset = machopic_gen_offset (x);
10035 x = gen_rtx_LO_SUM (GET_MODE (x),
10036 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
10037 gen_rtx_HIGH (Pmode, offset)), offset);
10038 }
10039 else
10040 #endif
10041 x = gen_rtx_LO_SUM (GET_MODE (x),
10042 gen_rtx_HIGH (Pmode, x), x);
10043
10044 if (TARGET_DEBUG_ADDR)
10045 {
10046 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
10047 debug_rtx (x);
10048 }
10049 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10050 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10051 opnum, (enum reload_type) type);
10052 *win = 1;
10053 return x;
10054 }
10055
10056 /* Reload an offset address wrapped by an AND that represents the
10057 masking of the lower bits. Strip the outer AND and let reload
10058 convert the offset address into an indirect address. For VSX,
10059 force reload to create the address with an AND in a separate
10060 register, because we can't guarantee an altivec register will
10061 be used. */
10062 if (VECTOR_MEM_ALTIVEC_P (mode)
10063 && GET_CODE (x) == AND
10064 && GET_CODE (XEXP (x, 0)) == PLUS
10065 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
10066 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
10067 && GET_CODE (XEXP (x, 1)) == CONST_INT
10068 && INTVAL (XEXP (x, 1)) == -16)
10069 {
10070 x = XEXP (x, 0);
10071 *win = 1;
10072 return x;
10073 }
10074
10075 if (TARGET_TOC
10076 && reg_offset_p
10077 && !quad_offset_p
10078 && GET_CODE (x) == SYMBOL_REF
10079 && use_toc_relative_ref (x, mode))
10080 {
10081 x = create_TOC_reference (x, NULL_RTX);
10082 if (TARGET_CMODEL != CMODEL_SMALL)
10083 {
10084 if (TARGET_DEBUG_ADDR)
10085 {
10086 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
10087 debug_rtx (x);
10088 }
10089 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
10090 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10091 opnum, (enum reload_type) type);
10092 }
10093 *win = 1;
10094 return x;
10095 }
10096 *win = 0;
10097 return x;
10098 }
10099
10100 /* Debug version of rs6000_legitimize_reload_address. */
10101 static rtx
10102 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
10103 int opnum, int type,
10104 int ind_levels, int *win)
10105 {
10106 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
10107 ind_levels, win);
10108 fprintf (stderr,
10109 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
10110 "type = %d, ind_levels = %d, win = %d, original addr:\n",
10111 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
10112 debug_rtx (x);
10113
10114 if (x == ret)
10115 fprintf (stderr, "Same address returned\n");
10116 else if (!ret)
10117 fprintf (stderr, "NULL returned\n");
10118 else
10119 {
10120 fprintf (stderr, "New address:\n");
10121 debug_rtx (ret);
10122 }
10123
10124 return ret;
10125 }
10126
10127 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
10128 that is a valid memory address for an instruction.
10129 The MODE argument is the machine mode for the MEM expression
10130 that wants to use this address.
10131
10132 On the RS/6000, there are four valid address: a SYMBOL_REF that
10133 refers to a constant pool entry of an address (or the sum of it
10134 plus a constant), a short (16-bit signed) constant plus a register,
10135 the sum of two registers, or a register indirect, possibly with an
10136 auto-increment. For DFmode, DDmode and DImode with a constant plus
10137 register, we must ensure that both words are addressable or PowerPC64
10138 with offset word aligned.
10139
10140 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
10141 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
10142 because adjacent memory cells are accessed by adding word-sized offsets
10143 during assembly output. */
10144 static bool
10145 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
10146 {
10147 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
10148 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
10149
10150 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
10151 if (VECTOR_MEM_ALTIVEC_P (mode)
10152 && GET_CODE (x) == AND
10153 && GET_CODE (XEXP (x, 1)) == CONST_INT
10154 && INTVAL (XEXP (x, 1)) == -16)
10155 x = XEXP (x, 0);
10156
10157 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
10158 return 0;
10159 if (legitimate_indirect_address_p (x, reg_ok_strict))
10160 return 1;
10161 if (TARGET_UPDATE
10162 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
10163 && mode_supports_pre_incdec_p (mode)
10164 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
10165 return 1;
10166 /* Handle restricted vector d-form offsets in ISA 3.0. */
10167 if (quad_offset_p)
10168 {
10169 if (quad_address_p (x, mode, reg_ok_strict))
10170 return 1;
10171 }
10172 else if (virtual_stack_registers_memory_p (x))
10173 return 1;
10174
10175 else if (reg_offset_p)
10176 {
10177 if (legitimate_small_data_p (mode, x))
10178 return 1;
10179 if (legitimate_constant_pool_address_p (x, mode,
10180 reg_ok_strict || lra_in_progress))
10181 return 1;
10182 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
10183 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
10184 return 1;
10185 }
10186
10187 /* For TImode, if we have TImode in VSX registers, only allow register
10188 indirect addresses. This will allow the values to go in either GPRs
10189 or VSX registers without reloading. The vector types would tend to
10190 go into VSX registers, so we allow REG+REG, while TImode seems
10191 somewhat split, in that some uses are GPR based, and some VSX based. */
10192 /* FIXME: We could loosen this by changing the following to
10193 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
10194 but currently we cannot allow REG+REG addressing for TImode. See
10195 PR72827 for complete details on how this ends up hoodwinking DSE. */
10196 if (mode == TImode && TARGET_VSX_TIMODE)
10197 return 0;
10198 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
10199 if (! reg_ok_strict
10200 && reg_offset_p
10201 && GET_CODE (x) == PLUS
10202 && GET_CODE (XEXP (x, 0)) == REG
10203 && (XEXP (x, 0) == virtual_stack_vars_rtx
10204 || XEXP (x, 0) == arg_pointer_rtx)
10205 && GET_CODE (XEXP (x, 1)) == CONST_INT)
10206 return 1;
10207 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10208 return 1;
10209 if (!FLOAT128_2REG_P (mode)
10210 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
10211 || TARGET_POWERPC64
10212 || (mode != DFmode && mode != DDmode)
10213 || (TARGET_E500_DOUBLE && mode != DDmode))
10214 && (TARGET_POWERPC64 || mode != DImode)
10215 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10216 && mode != PTImode
10217 && !avoiding_indexed_address_p (mode)
10218 && legitimate_indexed_address_p (x, reg_ok_strict))
10219 return 1;
10220 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10221 && mode_supports_pre_modify_p (mode)
10222 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10223 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10224 reg_ok_strict, false)
10225 || (!avoiding_indexed_address_p (mode)
10226 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10227 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10228 return 1;
10229 if (reg_offset_p && !quad_offset_p
10230 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10231 return 1;
10232 return 0;
10233 }
10234
10235 /* Debug version of rs6000_legitimate_address_p. */
10236 static bool
10237 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10238 bool reg_ok_strict)
10239 {
10240 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10241 fprintf (stderr,
10242 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10243 "strict = %d, reload = %s, code = %s\n",
10244 ret ? "true" : "false",
10245 GET_MODE_NAME (mode),
10246 reg_ok_strict,
10247 (reload_completed
10248 ? "after"
10249 : (reload_in_progress ? "progress" : "before")),
10250 GET_RTX_NAME (GET_CODE (x)));
10251 debug_rtx (x);
10252
10253 return ret;
10254 }
10255
10256 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10257
10258 static bool
10259 rs6000_mode_dependent_address_p (const_rtx addr,
10260 addr_space_t as ATTRIBUTE_UNUSED)
10261 {
10262 return rs6000_mode_dependent_address_ptr (addr);
10263 }
10264
10265 /* Go to LABEL if ADDR (a legitimate address expression)
10266 has an effect that depends on the machine mode it is used for.
10267
10268 On the RS/6000 this is true of all integral offsets (since AltiVec
10269 and VSX modes don't allow them) or is a pre-increment or decrement.
10270
10271 ??? Except that due to conceptual problems in offsettable_address_p
10272 we can't really report the problems of integral offsets. So leave
10273 this assuming that the adjustable offset must be valid for the
10274 sub-words of a TFmode operand, which is what we had before. */
10275
10276 static bool
10277 rs6000_mode_dependent_address (const_rtx addr)
10278 {
10279 switch (GET_CODE (addr))
10280 {
10281 case PLUS:
10282 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10283 is considered a legitimate address before reload, so there
10284 are no offset restrictions in that case. Note that this
10285 condition is safe in strict mode because any address involving
10286 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10287 been rejected as illegitimate. */
10288 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10289 && XEXP (addr, 0) != arg_pointer_rtx
10290 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10291 {
10292 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10293 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10294 }
10295 break;
10296
10297 case LO_SUM:
10298 /* Anything in the constant pool is sufficiently aligned that
10299 all bytes have the same high part address. */
10300 return !legitimate_constant_pool_address_p (addr, QImode, false);
10301
10302 /* Auto-increment cases are now treated generically in recog.c. */
10303 case PRE_MODIFY:
10304 return TARGET_UPDATE;
10305
10306 /* AND is only allowed in Altivec loads. */
10307 case AND:
10308 return true;
10309
10310 default:
10311 break;
10312 }
10313
10314 return false;
10315 }
10316
10317 /* Debug version of rs6000_mode_dependent_address. */
10318 static bool
10319 rs6000_debug_mode_dependent_address (const_rtx addr)
10320 {
10321 bool ret = rs6000_mode_dependent_address (addr);
10322
10323 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10324 ret ? "true" : "false");
10325 debug_rtx (addr);
10326
10327 return ret;
10328 }
10329
10330 /* Implement FIND_BASE_TERM. */
10331
10332 rtx
10333 rs6000_find_base_term (rtx op)
10334 {
10335 rtx base;
10336
10337 base = op;
10338 if (GET_CODE (base) == CONST)
10339 base = XEXP (base, 0);
10340 if (GET_CODE (base) == PLUS)
10341 base = XEXP (base, 0);
10342 if (GET_CODE (base) == UNSPEC)
10343 switch (XINT (base, 1))
10344 {
10345 case UNSPEC_TOCREL:
10346 case UNSPEC_MACHOPIC_OFFSET:
10347 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10348 for aliasing purposes. */
10349 return XVECEXP (base, 0, 0);
10350 }
10351
10352 return op;
10353 }
10354
10355 /* More elaborate version of recog's offsettable_memref_p predicate
10356 that works around the ??? note of rs6000_mode_dependent_address.
10357 In particular it accepts
10358
10359 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10360
10361 in 32-bit mode, that the recog predicate rejects. */
10362
10363 static bool
10364 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10365 {
10366 bool worst_case;
10367
10368 if (!MEM_P (op))
10369 return false;
10370
10371 /* First mimic offsettable_memref_p. */
10372 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10373 return true;
10374
10375 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10376 the latter predicate knows nothing about the mode of the memory
10377 reference and, therefore, assumes that it is the largest supported
10378 mode (TFmode). As a consequence, legitimate offsettable memory
10379 references are rejected. rs6000_legitimate_offset_address_p contains
10380 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10381 at least with a little bit of help here given that we know the
10382 actual registers used. */
10383 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10384 || GET_MODE_SIZE (reg_mode) == 4);
10385 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10386 true, worst_case);
10387 }
10388
10389 /* Determine the reassociation width to be used in reassociate_bb.
10390 This takes into account how many parallel operations we
10391 can actually do of a given type, and also the latency.
10392 P8:
10393 int add/sub 6/cycle
10394 mul 2/cycle
10395 vect add/sub/mul 2/cycle
10396 fp add/sub/mul 2/cycle
10397 dfp 1/cycle
10398 */
10399
10400 static int
10401 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10402 machine_mode mode)
10403 {
10404 switch (rs6000_cpu)
10405 {
10406 case PROCESSOR_POWER8:
10407 case PROCESSOR_POWER9:
10408 if (DECIMAL_FLOAT_MODE_P (mode))
10409 return 1;
10410 if (VECTOR_MODE_P (mode))
10411 return 4;
10412 if (INTEGRAL_MODE_P (mode))
10413 return opc == MULT_EXPR ? 4 : 6;
10414 if (FLOAT_MODE_P (mode))
10415 return 4;
10416 break;
10417 default:
10418 break;
10419 }
10420 return 1;
10421 }
10422
10423 /* Change register usage conditional on target flags. */
10424 static void
10425 rs6000_conditional_register_usage (void)
10426 {
10427 int i;
10428
10429 if (TARGET_DEBUG_TARGET)
10430 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10431
10432 /* Set MQ register fixed (already call_used) so that it will not be
10433 allocated. */
10434 fixed_regs[64] = 1;
10435
10436 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10437 if (TARGET_64BIT)
10438 fixed_regs[13] = call_used_regs[13]
10439 = call_really_used_regs[13] = 1;
10440
10441 /* Conditionally disable FPRs. */
10442 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
10443 for (i = 32; i < 64; i++)
10444 fixed_regs[i] = call_used_regs[i]
10445 = call_really_used_regs[i] = 1;
10446
10447 /* The TOC register is not killed across calls in a way that is
10448 visible to the compiler. */
10449 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10450 call_really_used_regs[2] = 0;
10451
10452 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10453 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10454
10455 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10456 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10457 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10458 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10459
10460 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10461 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10462 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10463 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10464
10465 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10466 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10467 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10468
10469 if (TARGET_SPE)
10470 {
10471 global_regs[SPEFSCR_REGNO] = 1;
10472 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
10473 registers in prologues and epilogues. We no longer use r14
10474 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
10475 pool for link-compatibility with older versions of GCC. Once
10476 "old" code has died out, we can return r14 to the allocation
10477 pool. */
10478 fixed_regs[14]
10479 = call_used_regs[14]
10480 = call_really_used_regs[14] = 1;
10481 }
10482
10483 if (!TARGET_ALTIVEC && !TARGET_VSX)
10484 {
10485 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10486 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10487 call_really_used_regs[VRSAVE_REGNO] = 1;
10488 }
10489
10490 if (TARGET_ALTIVEC || TARGET_VSX)
10491 global_regs[VSCR_REGNO] = 1;
10492
10493 if (TARGET_ALTIVEC_ABI)
10494 {
10495 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10496 call_used_regs[i] = call_really_used_regs[i] = 1;
10497
10498 /* AIX reserves VR20:31 in non-extended ABI mode. */
10499 if (TARGET_XCOFF)
10500 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10501 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10502 }
10503 }
10504
10505 \f
10506 /* Output insns to set DEST equal to the constant SOURCE as a series of
10507 lis, ori and shl instructions and return TRUE. */
10508
10509 bool
10510 rs6000_emit_set_const (rtx dest, rtx source)
10511 {
10512 machine_mode mode = GET_MODE (dest);
10513 rtx temp, set;
10514 rtx_insn *insn;
10515 HOST_WIDE_INT c;
10516
10517 gcc_checking_assert (CONST_INT_P (source));
10518 c = INTVAL (source);
10519 switch (mode)
10520 {
10521 case E_QImode:
10522 case E_HImode:
10523 emit_insn (gen_rtx_SET (dest, source));
10524 return true;
10525
10526 case E_SImode:
10527 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10528
10529 emit_insn (gen_rtx_SET (copy_rtx (temp),
10530 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10531 emit_insn (gen_rtx_SET (dest,
10532 gen_rtx_IOR (SImode, copy_rtx (temp),
10533 GEN_INT (c & 0xffff))));
10534 break;
10535
10536 case E_DImode:
10537 if (!TARGET_POWERPC64)
10538 {
10539 rtx hi, lo;
10540
10541 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10542 DImode);
10543 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10544 DImode);
10545 emit_move_insn (hi, GEN_INT (c >> 32));
10546 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10547 emit_move_insn (lo, GEN_INT (c));
10548 }
10549 else
10550 rs6000_emit_set_long_const (dest, c);
10551 break;
10552
10553 default:
10554 gcc_unreachable ();
10555 }
10556
10557 insn = get_last_insn ();
10558 set = single_set (insn);
10559 if (! CONSTANT_P (SET_SRC (set)))
10560 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10561
10562 return true;
10563 }
10564
10565 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10566 Output insns to set DEST equal to the constant C as a series of
10567 lis, ori and shl instructions. */
10568
10569 static void
10570 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10571 {
10572 rtx temp;
10573 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10574
10575 ud1 = c & 0xffff;
10576 c = c >> 16;
10577 ud2 = c & 0xffff;
10578 c = c >> 16;
10579 ud3 = c & 0xffff;
10580 c = c >> 16;
10581 ud4 = c & 0xffff;
10582
10583 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10584 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10585 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10586
10587 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10588 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10589 {
10590 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10591
10592 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10593 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10594 if (ud1 != 0)
10595 emit_move_insn (dest,
10596 gen_rtx_IOR (DImode, copy_rtx (temp),
10597 GEN_INT (ud1)));
10598 }
10599 else if (ud3 == 0 && ud4 == 0)
10600 {
10601 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10602
10603 gcc_assert (ud2 & 0x8000);
10604 emit_move_insn (copy_rtx (temp),
10605 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10606 if (ud1 != 0)
10607 emit_move_insn (copy_rtx (temp),
10608 gen_rtx_IOR (DImode, copy_rtx (temp),
10609 GEN_INT (ud1)));
10610 emit_move_insn (dest,
10611 gen_rtx_ZERO_EXTEND (DImode,
10612 gen_lowpart (SImode,
10613 copy_rtx (temp))));
10614 }
10615 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10616 || (ud4 == 0 && ! (ud3 & 0x8000)))
10617 {
10618 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10619
10620 emit_move_insn (copy_rtx (temp),
10621 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10622 if (ud2 != 0)
10623 emit_move_insn (copy_rtx (temp),
10624 gen_rtx_IOR (DImode, copy_rtx (temp),
10625 GEN_INT (ud2)));
10626 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10627 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10628 GEN_INT (16)));
10629 if (ud1 != 0)
10630 emit_move_insn (dest,
10631 gen_rtx_IOR (DImode, copy_rtx (temp),
10632 GEN_INT (ud1)));
10633 }
10634 else
10635 {
10636 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10637
10638 emit_move_insn (copy_rtx (temp),
10639 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10640 if (ud3 != 0)
10641 emit_move_insn (copy_rtx (temp),
10642 gen_rtx_IOR (DImode, copy_rtx (temp),
10643 GEN_INT (ud3)));
10644
10645 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10646 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10647 GEN_INT (32)));
10648 if (ud2 != 0)
10649 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10650 gen_rtx_IOR (DImode, copy_rtx (temp),
10651 GEN_INT (ud2 << 16)));
10652 if (ud1 != 0)
10653 emit_move_insn (dest,
10654 gen_rtx_IOR (DImode, copy_rtx (temp),
10655 GEN_INT (ud1)));
10656 }
10657 }
10658
10659 /* Helper for the following. Get rid of [r+r] memory refs
10660 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10661
10662 static void
10663 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10664 {
10665 if (reload_in_progress)
10666 return;
10667
10668 if (GET_CODE (operands[0]) == MEM
10669 && GET_CODE (XEXP (operands[0], 0)) != REG
10670 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10671 GET_MODE (operands[0]), false))
10672 operands[0]
10673 = replace_equiv_address (operands[0],
10674 copy_addr_to_reg (XEXP (operands[0], 0)));
10675
10676 if (GET_CODE (operands[1]) == MEM
10677 && GET_CODE (XEXP (operands[1], 0)) != REG
10678 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10679 GET_MODE (operands[1]), false))
10680 operands[1]
10681 = replace_equiv_address (operands[1],
10682 copy_addr_to_reg (XEXP (operands[1], 0)));
10683 }
10684
10685 /* Generate a vector of constants to permute MODE for a little-endian
10686 storage operation by swapping the two halves of a vector. */
10687 static rtvec
10688 rs6000_const_vec (machine_mode mode)
10689 {
10690 int i, subparts;
10691 rtvec v;
10692
10693 switch (mode)
10694 {
10695 case E_V1TImode:
10696 subparts = 1;
10697 break;
10698 case E_V2DFmode:
10699 case E_V2DImode:
10700 subparts = 2;
10701 break;
10702 case E_V4SFmode:
10703 case E_V4SImode:
10704 subparts = 4;
10705 break;
10706 case E_V8HImode:
10707 subparts = 8;
10708 break;
10709 case E_V16QImode:
10710 subparts = 16;
10711 break;
10712 default:
10713 gcc_unreachable();
10714 }
10715
10716 v = rtvec_alloc (subparts);
10717
10718 for (i = 0; i < subparts / 2; ++i)
10719 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10720 for (i = subparts / 2; i < subparts; ++i)
10721 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10722
10723 return v;
10724 }
10725
10726 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10727 for a VSX load or store operation. */
10728 rtx
10729 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10730 {
10731 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10732 128-bit integers if they are allowed in VSX registers. */
10733 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10734 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10735 else
10736 {
10737 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10738 return gen_rtx_VEC_SELECT (mode, source, par);
10739 }
10740 }
10741
10742 /* Emit a little-endian load from vector memory location SOURCE to VSX
10743 register DEST in mode MODE. The load is done with two permuting
10744 insn's that represent an lxvd2x and xxpermdi. */
10745 void
10746 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10747 {
10748 rtx tmp, permute_mem, permute_reg;
10749
10750 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10751 V1TImode). */
10752 if (mode == TImode || mode == V1TImode)
10753 {
10754 mode = V2DImode;
10755 dest = gen_lowpart (V2DImode, dest);
10756 source = adjust_address (source, V2DImode, 0);
10757 }
10758
10759 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10760 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10761 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10762 emit_insn (gen_rtx_SET (tmp, permute_mem));
10763 emit_insn (gen_rtx_SET (dest, permute_reg));
10764 }
10765
10766 /* Emit a little-endian store to vector memory location DEST from VSX
10767 register SOURCE in mode MODE. The store is done with two permuting
10768 insn's that represent an xxpermdi and an stxvd2x. */
10769 void
10770 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10771 {
10772 rtx tmp, permute_src, permute_tmp;
10773
10774 /* This should never be called during or after reload, because it does
10775 not re-permute the source register. It is intended only for use
10776 during expand. */
10777 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10778
10779 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10780 V1TImode). */
10781 if (mode == TImode || mode == V1TImode)
10782 {
10783 mode = V2DImode;
10784 dest = adjust_address (dest, V2DImode, 0);
10785 source = gen_lowpart (V2DImode, source);
10786 }
10787
10788 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10789 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10790 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10791 emit_insn (gen_rtx_SET (tmp, permute_src));
10792 emit_insn (gen_rtx_SET (dest, permute_tmp));
10793 }
10794
10795 /* Emit a sequence representing a little-endian VSX load or store,
10796 moving data from SOURCE to DEST in mode MODE. This is done
10797 separately from rs6000_emit_move to ensure it is called only
10798 during expand. LE VSX loads and stores introduced later are
10799 handled with a split. The expand-time RTL generation allows
10800 us to optimize away redundant pairs of register-permutes. */
10801 void
10802 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10803 {
10804 gcc_assert (!BYTES_BIG_ENDIAN
10805 && VECTOR_MEM_VSX_P (mode)
10806 && !TARGET_P9_VECTOR
10807 && !gpr_or_gpr_p (dest, source)
10808 && (MEM_P (source) ^ MEM_P (dest)));
10809
10810 if (MEM_P (source))
10811 {
10812 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10813 rs6000_emit_le_vsx_load (dest, source, mode);
10814 }
10815 else
10816 {
10817 if (!REG_P (source))
10818 source = force_reg (mode, source);
10819 rs6000_emit_le_vsx_store (dest, source, mode);
10820 }
10821 }
10822
10823 /* Return whether a SFmode or SImode move can be done without converting one
10824 mode to another. This arrises when we have:
10825
10826 (SUBREG:SF (REG:SI ...))
10827 (SUBREG:SI (REG:SF ...))
10828
10829 and one of the values is in a floating point/vector register, where SFmode
10830 scalars are stored in DFmode format. */
10831
10832 bool
10833 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10834 {
10835 if (TARGET_ALLOW_SF_SUBREG)
10836 return true;
10837
10838 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10839 return true;
10840
10841 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10842 return true;
10843
10844 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10845 if (SUBREG_P (dest))
10846 {
10847 rtx dest_subreg = SUBREG_REG (dest);
10848 rtx src_subreg = SUBREG_REG (src);
10849 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10850 }
10851
10852 return false;
10853 }
10854
10855
10856 /* Helper function to change moves with:
10857
10858 (SUBREG:SF (REG:SI)) and
10859 (SUBREG:SI (REG:SF))
10860
10861 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10862 values are stored as DFmode values in the VSX registers. We need to convert
10863 the bits before we can use a direct move or operate on the bits in the
10864 vector register as an integer type.
10865
10866 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10867
10868 static bool
10869 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10870 {
10871 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10872 && !lra_in_progress
10873 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10874 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10875 {
10876 rtx inner_source = SUBREG_REG (source);
10877 machine_mode inner_mode = GET_MODE (inner_source);
10878
10879 if (mode == SImode && inner_mode == SFmode)
10880 {
10881 emit_insn (gen_movsi_from_sf (dest, inner_source));
10882 return true;
10883 }
10884
10885 if (mode == SFmode && inner_mode == SImode)
10886 {
10887 emit_insn (gen_movsf_from_si (dest, inner_source));
10888 return true;
10889 }
10890 }
10891
10892 return false;
10893 }
10894
10895 /* Emit a move from SOURCE to DEST in mode MODE. */
10896 void
10897 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10898 {
10899 rtx operands[2];
10900 operands[0] = dest;
10901 operands[1] = source;
10902
10903 if (TARGET_DEBUG_ADDR)
10904 {
10905 fprintf (stderr,
10906 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10907 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10908 GET_MODE_NAME (mode),
10909 reload_in_progress,
10910 reload_completed,
10911 can_create_pseudo_p ());
10912 debug_rtx (dest);
10913 fprintf (stderr, "source:\n");
10914 debug_rtx (source);
10915 }
10916
10917 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10918 if (CONST_WIDE_INT_P (operands[1])
10919 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10920 {
10921 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10922 gcc_unreachable ();
10923 }
10924
10925 /* See if we need to special case SImode/SFmode SUBREG moves. */
10926 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10927 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10928 return;
10929
10930 /* Check if GCC is setting up a block move that will end up using FP
10931 registers as temporaries. We must make sure this is acceptable. */
10932 if (GET_CODE (operands[0]) == MEM
10933 && GET_CODE (operands[1]) == MEM
10934 && mode == DImode
10935 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10936 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10937 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10938 ? 32 : MEM_ALIGN (operands[0])))
10939 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10940 ? 32
10941 : MEM_ALIGN (operands[1]))))
10942 && ! MEM_VOLATILE_P (operands [0])
10943 && ! MEM_VOLATILE_P (operands [1]))
10944 {
10945 emit_move_insn (adjust_address (operands[0], SImode, 0),
10946 adjust_address (operands[1], SImode, 0));
10947 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10948 adjust_address (copy_rtx (operands[1]), SImode, 4));
10949 return;
10950 }
10951
10952 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10953 && !gpc_reg_operand (operands[1], mode))
10954 operands[1] = force_reg (mode, operands[1]);
10955
10956 /* Recognize the case where operand[1] is a reference to thread-local
10957 data and load its address to a register. */
10958 if (tls_referenced_p (operands[1]))
10959 {
10960 enum tls_model model;
10961 rtx tmp = operands[1];
10962 rtx addend = NULL;
10963
10964 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10965 {
10966 addend = XEXP (XEXP (tmp, 0), 1);
10967 tmp = XEXP (XEXP (tmp, 0), 0);
10968 }
10969
10970 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10971 model = SYMBOL_REF_TLS_MODEL (tmp);
10972 gcc_assert (model != 0);
10973
10974 tmp = rs6000_legitimize_tls_address (tmp, model);
10975 if (addend)
10976 {
10977 tmp = gen_rtx_PLUS (mode, tmp, addend);
10978 tmp = force_operand (tmp, operands[0]);
10979 }
10980 operands[1] = tmp;
10981 }
10982
10983 /* Handle the case where reload calls us with an invalid address. */
10984 if (reload_in_progress && mode == Pmode
10985 && (! general_operand (operands[1], mode)
10986 || ! nonimmediate_operand (operands[0], mode)))
10987 goto emit_set;
10988
10989 /* 128-bit constant floating-point values on Darwin should really be loaded
10990 as two parts. However, this premature splitting is a problem when DFmode
10991 values can go into Altivec registers. */
10992 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10993 && GET_CODE (operands[1]) == CONST_DOUBLE)
10994 {
10995 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10996 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10997 DFmode);
10998 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10999 GET_MODE_SIZE (DFmode)),
11000 simplify_gen_subreg (DFmode, operands[1], mode,
11001 GET_MODE_SIZE (DFmode)),
11002 DFmode);
11003 return;
11004 }
11005
11006 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
11007 cfun->machine->sdmode_stack_slot =
11008 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
11009
11010
11011 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11012 p1:SD) if p1 is not of floating point class and p0 is spilled as
11013 we can have no analogous movsd_store for this. */
11014 if (lra_in_progress && mode == DDmode
11015 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11016 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11017 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
11018 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11019 {
11020 enum reg_class cl;
11021 int regno = REGNO (SUBREG_REG (operands[1]));
11022
11023 if (regno >= FIRST_PSEUDO_REGISTER)
11024 {
11025 cl = reg_preferred_class (regno);
11026 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11027 }
11028 if (regno >= 0 && ! FP_REGNO_P (regno))
11029 {
11030 mode = SDmode;
11031 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11032 operands[1] = SUBREG_REG (operands[1]);
11033 }
11034 }
11035 if (lra_in_progress
11036 && mode == SDmode
11037 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
11038 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11039 && (REG_P (operands[1])
11040 || (GET_CODE (operands[1]) == SUBREG
11041 && REG_P (SUBREG_REG (operands[1])))))
11042 {
11043 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
11044 ? SUBREG_REG (operands[1]) : operands[1]);
11045 enum reg_class cl;
11046
11047 if (regno >= FIRST_PSEUDO_REGISTER)
11048 {
11049 cl = reg_preferred_class (regno);
11050 gcc_assert (cl != NO_REGS);
11051 regno = ira_class_hard_regs[cl][0];
11052 }
11053 if (FP_REGNO_P (regno))
11054 {
11055 if (GET_MODE (operands[0]) != DDmode)
11056 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11057 emit_insn (gen_movsd_store (operands[0], operands[1]));
11058 }
11059 else if (INT_REGNO_P (regno))
11060 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11061 else
11062 gcc_unreachable();
11063 return;
11064 }
11065 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11066 p:DD)) if p0 is not of floating point class and p1 is spilled as
11067 we can have no analogous movsd_load for this. */
11068 if (lra_in_progress && mode == DDmode
11069 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
11070 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11071 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11072 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11073 {
11074 enum reg_class cl;
11075 int regno = REGNO (SUBREG_REG (operands[0]));
11076
11077 if (regno >= FIRST_PSEUDO_REGISTER)
11078 {
11079 cl = reg_preferred_class (regno);
11080 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11081 }
11082 if (regno >= 0 && ! FP_REGNO_P (regno))
11083 {
11084 mode = SDmode;
11085 operands[0] = SUBREG_REG (operands[0]);
11086 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11087 }
11088 }
11089 if (lra_in_progress
11090 && mode == SDmode
11091 && (REG_P (operands[0])
11092 || (GET_CODE (operands[0]) == SUBREG
11093 && REG_P (SUBREG_REG (operands[0]))))
11094 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
11095 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11096 {
11097 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
11098 ? SUBREG_REG (operands[0]) : operands[0]);
11099 enum reg_class cl;
11100
11101 if (regno >= FIRST_PSEUDO_REGISTER)
11102 {
11103 cl = reg_preferred_class (regno);
11104 gcc_assert (cl != NO_REGS);
11105 regno = ira_class_hard_regs[cl][0];
11106 }
11107 if (FP_REGNO_P (regno))
11108 {
11109 if (GET_MODE (operands[1]) != DDmode)
11110 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11111 emit_insn (gen_movsd_load (operands[0], operands[1]));
11112 }
11113 else if (INT_REGNO_P (regno))
11114 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11115 else
11116 gcc_unreachable();
11117 return;
11118 }
11119
11120 if (reload_in_progress
11121 && mode == SDmode
11122 && cfun->machine->sdmode_stack_slot != NULL_RTX
11123 && MEM_P (operands[0])
11124 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
11125 && REG_P (operands[1]))
11126 {
11127 if (FP_REGNO_P (REGNO (operands[1])))
11128 {
11129 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
11130 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11131 emit_insn (gen_movsd_store (mem, operands[1]));
11132 }
11133 else if (INT_REGNO_P (REGNO (operands[1])))
11134 {
11135 rtx mem = operands[0];
11136 if (BYTES_BIG_ENDIAN)
11137 mem = adjust_address_nv (mem, mode, 4);
11138 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11139 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
11140 }
11141 else
11142 gcc_unreachable();
11143 return;
11144 }
11145 if (reload_in_progress
11146 && mode == SDmode
11147 && REG_P (operands[0])
11148 && MEM_P (operands[1])
11149 && cfun->machine->sdmode_stack_slot != NULL_RTX
11150 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
11151 {
11152 if (FP_REGNO_P (REGNO (operands[0])))
11153 {
11154 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
11155 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11156 emit_insn (gen_movsd_load (operands[0], mem));
11157 }
11158 else if (INT_REGNO_P (REGNO (operands[0])))
11159 {
11160 rtx mem = operands[1];
11161 if (BYTES_BIG_ENDIAN)
11162 mem = adjust_address_nv (mem, mode, 4);
11163 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
11164 emit_insn (gen_movsd_hardfloat (operands[0], mem));
11165 }
11166 else
11167 gcc_unreachable();
11168 return;
11169 }
11170
11171 /* FIXME: In the long term, this switch statement should go away
11172 and be replaced by a sequence of tests based on things like
11173 mode == Pmode. */
11174 switch (mode)
11175 {
11176 case E_HImode:
11177 case E_QImode:
11178 if (CONSTANT_P (operands[1])
11179 && GET_CODE (operands[1]) != CONST_INT)
11180 operands[1] = force_const_mem (mode, operands[1]);
11181 break;
11182
11183 case E_TFmode:
11184 case E_TDmode:
11185 case E_IFmode:
11186 case E_KFmode:
11187 if (FLOAT128_2REG_P (mode))
11188 rs6000_eliminate_indexed_memrefs (operands);
11189 /* fall through */
11190
11191 case E_DFmode:
11192 case E_DDmode:
11193 case E_SFmode:
11194 case E_SDmode:
11195 if (CONSTANT_P (operands[1])
11196 && ! easy_fp_constant (operands[1], mode))
11197 operands[1] = force_const_mem (mode, operands[1]);
11198 break;
11199
11200 case E_V16QImode:
11201 case E_V8HImode:
11202 case E_V4SFmode:
11203 case E_V4SImode:
11204 case E_V4HImode:
11205 case E_V2SFmode:
11206 case E_V2SImode:
11207 case E_V1DImode:
11208 case E_V2DFmode:
11209 case E_V2DImode:
11210 case E_V1TImode:
11211 if (CONSTANT_P (operands[1])
11212 && !easy_vector_constant (operands[1], mode))
11213 operands[1] = force_const_mem (mode, operands[1]);
11214 break;
11215
11216 case E_SImode:
11217 case E_DImode:
11218 /* Use default pattern for address of ELF small data */
11219 if (TARGET_ELF
11220 && mode == Pmode
11221 && DEFAULT_ABI == ABI_V4
11222 && (GET_CODE (operands[1]) == SYMBOL_REF
11223 || GET_CODE (operands[1]) == CONST)
11224 && small_data_operand (operands[1], mode))
11225 {
11226 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11227 return;
11228 }
11229
11230 if (DEFAULT_ABI == ABI_V4
11231 && mode == Pmode && mode == SImode
11232 && flag_pic == 1 && got_operand (operands[1], mode))
11233 {
11234 emit_insn (gen_movsi_got (operands[0], operands[1]));
11235 return;
11236 }
11237
11238 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11239 && TARGET_NO_TOC
11240 && ! flag_pic
11241 && mode == Pmode
11242 && CONSTANT_P (operands[1])
11243 && GET_CODE (operands[1]) != HIGH
11244 && GET_CODE (operands[1]) != CONST_INT)
11245 {
11246 rtx target = (!can_create_pseudo_p ()
11247 ? operands[0]
11248 : gen_reg_rtx (mode));
11249
11250 /* If this is a function address on -mcall-aixdesc,
11251 convert it to the address of the descriptor. */
11252 if (DEFAULT_ABI == ABI_AIX
11253 && GET_CODE (operands[1]) == SYMBOL_REF
11254 && XSTR (operands[1], 0)[0] == '.')
11255 {
11256 const char *name = XSTR (operands[1], 0);
11257 rtx new_ref;
11258 while (*name == '.')
11259 name++;
11260 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11261 CONSTANT_POOL_ADDRESS_P (new_ref)
11262 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11263 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11264 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11265 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11266 operands[1] = new_ref;
11267 }
11268
11269 if (DEFAULT_ABI == ABI_DARWIN)
11270 {
11271 #if TARGET_MACHO
11272 if (MACHO_DYNAMIC_NO_PIC_P)
11273 {
11274 /* Take care of any required data indirection. */
11275 operands[1] = rs6000_machopic_legitimize_pic_address (
11276 operands[1], mode, operands[0]);
11277 if (operands[0] != operands[1])
11278 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11279 return;
11280 }
11281 #endif
11282 emit_insn (gen_macho_high (target, operands[1]));
11283 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11284 return;
11285 }
11286
11287 emit_insn (gen_elf_high (target, operands[1]));
11288 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11289 return;
11290 }
11291
11292 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11293 and we have put it in the TOC, we just need to make a TOC-relative
11294 reference to it. */
11295 if (TARGET_TOC
11296 && GET_CODE (operands[1]) == SYMBOL_REF
11297 && use_toc_relative_ref (operands[1], mode))
11298 operands[1] = create_TOC_reference (operands[1], operands[0]);
11299 else if (mode == Pmode
11300 && CONSTANT_P (operands[1])
11301 && GET_CODE (operands[1]) != HIGH
11302 && ((GET_CODE (operands[1]) != CONST_INT
11303 && ! easy_fp_constant (operands[1], mode))
11304 || (GET_CODE (operands[1]) == CONST_INT
11305 && (num_insns_constant (operands[1], mode)
11306 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11307 || (GET_CODE (operands[0]) == REG
11308 && FP_REGNO_P (REGNO (operands[0]))))
11309 && !toc_relative_expr_p (operands[1], false)
11310 && (TARGET_CMODEL == CMODEL_SMALL
11311 || can_create_pseudo_p ()
11312 || (REG_P (operands[0])
11313 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11314 {
11315
11316 #if TARGET_MACHO
11317 /* Darwin uses a special PIC legitimizer. */
11318 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11319 {
11320 operands[1] =
11321 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11322 operands[0]);
11323 if (operands[0] != operands[1])
11324 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11325 return;
11326 }
11327 #endif
11328
11329 /* If we are to limit the number of things we put in the TOC and
11330 this is a symbol plus a constant we can add in one insn,
11331 just put the symbol in the TOC and add the constant. Don't do
11332 this if reload is in progress. */
11333 if (GET_CODE (operands[1]) == CONST
11334 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11335 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11336 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11337 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11338 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11339 && ! side_effects_p (operands[0]))
11340 {
11341 rtx sym =
11342 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11343 rtx other = XEXP (XEXP (operands[1], 0), 1);
11344
11345 sym = force_reg (mode, sym);
11346 emit_insn (gen_add3_insn (operands[0], sym, other));
11347 return;
11348 }
11349
11350 operands[1] = force_const_mem (mode, operands[1]);
11351
11352 if (TARGET_TOC
11353 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11354 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11355 {
11356 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11357 operands[0]);
11358 operands[1] = gen_const_mem (mode, tocref);
11359 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11360 }
11361 }
11362 break;
11363
11364 case E_TImode:
11365 if (!VECTOR_MEM_VSX_P (TImode))
11366 rs6000_eliminate_indexed_memrefs (operands);
11367 break;
11368
11369 case E_PTImode:
11370 rs6000_eliminate_indexed_memrefs (operands);
11371 break;
11372
11373 default:
11374 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11375 }
11376
11377 /* Above, we may have called force_const_mem which may have returned
11378 an invalid address. If we can, fix this up; otherwise, reload will
11379 have to deal with it. */
11380 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11381 operands[1] = validize_mem (operands[1]);
11382
11383 emit_set:
11384 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11385 }
11386
11387 /* Return true if a structure, union or array containing FIELD should be
11388 accessed using `BLKMODE'.
11389
11390 For the SPE, simd types are V2SI, and gcc can be tempted to put the
11391 entire thing in a DI and use subregs to access the internals.
11392 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
11393 back-end. Because a single GPR can hold a V2SI, but not a DI, the
11394 best thing to do is set structs to BLKmode and avoid Severe Tire
11395 Damage.
11396
11397 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
11398 fit into 1, whereas DI still needs two. */
11399
11400 static bool
11401 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
11402 {
11403 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
11404 || (TARGET_E500_DOUBLE && mode == DFmode));
11405 }
11406 \f
11407 /* Nonzero if we can use a floating-point register to pass this arg. */
11408 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11409 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11410 && (CUM)->fregno <= FP_ARG_MAX_REG \
11411 && TARGET_HARD_FLOAT && TARGET_FPRS)
11412
11413 /* Nonzero if we can use an AltiVec register to pass this arg. */
11414 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11415 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11416 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11417 && TARGET_ALTIVEC_ABI \
11418 && (NAMED))
11419
11420 /* Walk down the type tree of TYPE counting consecutive base elements.
11421 If *MODEP is VOIDmode, then set it to the first valid floating point
11422 or vector type. If a non-floating point or vector type is found, or
11423 if a floating point or vector type that doesn't match a non-VOIDmode
11424 *MODEP is found, then return -1, otherwise return the count in the
11425 sub-tree. */
11426
11427 static int
11428 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11429 {
11430 machine_mode mode;
11431 HOST_WIDE_INT size;
11432
11433 switch (TREE_CODE (type))
11434 {
11435 case REAL_TYPE:
11436 mode = TYPE_MODE (type);
11437 if (!SCALAR_FLOAT_MODE_P (mode))
11438 return -1;
11439
11440 if (*modep == VOIDmode)
11441 *modep = mode;
11442
11443 if (*modep == mode)
11444 return 1;
11445
11446 break;
11447
11448 case COMPLEX_TYPE:
11449 mode = TYPE_MODE (TREE_TYPE (type));
11450 if (!SCALAR_FLOAT_MODE_P (mode))
11451 return -1;
11452
11453 if (*modep == VOIDmode)
11454 *modep = mode;
11455
11456 if (*modep == mode)
11457 return 2;
11458
11459 break;
11460
11461 case VECTOR_TYPE:
11462 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11463 return -1;
11464
11465 /* Use V4SImode as representative of all 128-bit vector types. */
11466 size = int_size_in_bytes (type);
11467 switch (size)
11468 {
11469 case 16:
11470 mode = V4SImode;
11471 break;
11472 default:
11473 return -1;
11474 }
11475
11476 if (*modep == VOIDmode)
11477 *modep = mode;
11478
11479 /* Vector modes are considered to be opaque: two vectors are
11480 equivalent for the purposes of being homogeneous aggregates
11481 if they are the same size. */
11482 if (*modep == mode)
11483 return 1;
11484
11485 break;
11486
11487 case ARRAY_TYPE:
11488 {
11489 int count;
11490 tree index = TYPE_DOMAIN (type);
11491
11492 /* Can't handle incomplete types nor sizes that are not
11493 fixed. */
11494 if (!COMPLETE_TYPE_P (type)
11495 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11496 return -1;
11497
11498 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11499 if (count == -1
11500 || !index
11501 || !TYPE_MAX_VALUE (index)
11502 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11503 || !TYPE_MIN_VALUE (index)
11504 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11505 || count < 0)
11506 return -1;
11507
11508 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11509 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11510
11511 /* There must be no padding. */
11512 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11513 return -1;
11514
11515 return count;
11516 }
11517
11518 case RECORD_TYPE:
11519 {
11520 int count = 0;
11521 int sub_count;
11522 tree field;
11523
11524 /* Can't handle incomplete types nor sizes that are not
11525 fixed. */
11526 if (!COMPLETE_TYPE_P (type)
11527 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11528 return -1;
11529
11530 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11531 {
11532 if (TREE_CODE (field) != FIELD_DECL)
11533 continue;
11534
11535 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11536 if (sub_count < 0)
11537 return -1;
11538 count += sub_count;
11539 }
11540
11541 /* There must be no padding. */
11542 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11543 return -1;
11544
11545 return count;
11546 }
11547
11548 case UNION_TYPE:
11549 case QUAL_UNION_TYPE:
11550 {
11551 /* These aren't very interesting except in a degenerate case. */
11552 int count = 0;
11553 int sub_count;
11554 tree field;
11555
11556 /* Can't handle incomplete types nor sizes that are not
11557 fixed. */
11558 if (!COMPLETE_TYPE_P (type)
11559 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11560 return -1;
11561
11562 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11563 {
11564 if (TREE_CODE (field) != FIELD_DECL)
11565 continue;
11566
11567 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11568 if (sub_count < 0)
11569 return -1;
11570 count = count > sub_count ? count : sub_count;
11571 }
11572
11573 /* There must be no padding. */
11574 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11575 return -1;
11576
11577 return count;
11578 }
11579
11580 default:
11581 break;
11582 }
11583
11584 return -1;
11585 }
11586
11587 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11588 float or vector aggregate that shall be passed in FP/vector registers
11589 according to the ELFv2 ABI, return the homogeneous element mode in
11590 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11591
11592 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11593
11594 static bool
11595 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11596 machine_mode *elt_mode,
11597 int *n_elts)
11598 {
11599 /* Note that we do not accept complex types at the top level as
11600 homogeneous aggregates; these types are handled via the
11601 targetm.calls.split_complex_arg mechanism. Complex types
11602 can be elements of homogeneous aggregates, however. */
11603 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11604 {
11605 machine_mode field_mode = VOIDmode;
11606 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11607
11608 if (field_count > 0)
11609 {
11610 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11611 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11612
11613 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11614 up to AGGR_ARG_NUM_REG registers. */
11615 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11616 {
11617 if (elt_mode)
11618 *elt_mode = field_mode;
11619 if (n_elts)
11620 *n_elts = field_count;
11621 return true;
11622 }
11623 }
11624 }
11625
11626 if (elt_mode)
11627 *elt_mode = mode;
11628 if (n_elts)
11629 *n_elts = 1;
11630 return false;
11631 }
11632
11633 /* Return a nonzero value to say to return the function value in
11634 memory, just as large structures are always returned. TYPE will be
11635 the data type of the value, and FNTYPE will be the type of the
11636 function doing the returning, or @code{NULL} for libcalls.
11637
11638 The AIX ABI for the RS/6000 specifies that all structures are
11639 returned in memory. The Darwin ABI does the same.
11640
11641 For the Darwin 64 Bit ABI, a function result can be returned in
11642 registers or in memory, depending on the size of the return data
11643 type. If it is returned in registers, the value occupies the same
11644 registers as it would if it were the first and only function
11645 argument. Otherwise, the function places its result in memory at
11646 the location pointed to by GPR3.
11647
11648 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11649 but a draft put them in memory, and GCC used to implement the draft
11650 instead of the final standard. Therefore, aix_struct_return
11651 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11652 compatibility can change DRAFT_V4_STRUCT_RET to override the
11653 default, and -m switches get the final word. See
11654 rs6000_option_override_internal for more details.
11655
11656 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11657 long double support is enabled. These values are returned in memory.
11658
11659 int_size_in_bytes returns -1 for variable size objects, which go in
11660 memory always. The cast to unsigned makes -1 > 8. */
11661
11662 static bool
11663 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11664 {
11665 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11666 if (TARGET_MACHO
11667 && rs6000_darwin64_abi
11668 && TREE_CODE (type) == RECORD_TYPE
11669 && int_size_in_bytes (type) > 0)
11670 {
11671 CUMULATIVE_ARGS valcum;
11672 rtx valret;
11673
11674 valcum.words = 0;
11675 valcum.fregno = FP_ARG_MIN_REG;
11676 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11677 /* Do a trial code generation as if this were going to be passed
11678 as an argument; if any part goes in memory, we return NULL. */
11679 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11680 if (valret)
11681 return false;
11682 /* Otherwise fall through to more conventional ABI rules. */
11683 }
11684
11685 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11686 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11687 NULL, NULL))
11688 return false;
11689
11690 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11691 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11692 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11693 return false;
11694
11695 if (AGGREGATE_TYPE_P (type)
11696 && (aix_struct_return
11697 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11698 return true;
11699
11700 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11701 modes only exist for GCC vector types if -maltivec. */
11702 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11703 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11704 return false;
11705
11706 /* Return synthetic vectors in memory. */
11707 if (TREE_CODE (type) == VECTOR_TYPE
11708 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11709 {
11710 static bool warned_for_return_big_vectors = false;
11711 if (!warned_for_return_big_vectors)
11712 {
11713 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11714 "non-standard ABI extension with no compatibility guarantee");
11715 warned_for_return_big_vectors = true;
11716 }
11717 return true;
11718 }
11719
11720 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11721 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11722 return true;
11723
11724 return false;
11725 }
11726
11727 /* Specify whether values returned in registers should be at the most
11728 significant end of a register. We want aggregates returned by
11729 value to match the way aggregates are passed to functions. */
11730
11731 static bool
11732 rs6000_return_in_msb (const_tree valtype)
11733 {
11734 return (DEFAULT_ABI == ABI_ELFv2
11735 && BYTES_BIG_ENDIAN
11736 && AGGREGATE_TYPE_P (valtype)
11737 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11738 }
11739
11740 #ifdef HAVE_AS_GNU_ATTRIBUTE
11741 /* Return TRUE if a call to function FNDECL may be one that
11742 potentially affects the function calling ABI of the object file. */
11743
11744 static bool
11745 call_ABI_of_interest (tree fndecl)
11746 {
11747 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11748 {
11749 struct cgraph_node *c_node;
11750
11751 /* Libcalls are always interesting. */
11752 if (fndecl == NULL_TREE)
11753 return true;
11754
11755 /* Any call to an external function is interesting. */
11756 if (DECL_EXTERNAL (fndecl))
11757 return true;
11758
11759 /* Interesting functions that we are emitting in this object file. */
11760 c_node = cgraph_node::get (fndecl);
11761 c_node = c_node->ultimate_alias_target ();
11762 return !c_node->only_called_directly_p ();
11763 }
11764 return false;
11765 }
11766 #endif
11767
11768 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11769 for a call to a function whose data type is FNTYPE.
11770 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11771
11772 For incoming args we set the number of arguments in the prototype large
11773 so we never return a PARALLEL. */
11774
11775 void
11776 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11777 rtx libname ATTRIBUTE_UNUSED, int incoming,
11778 int libcall, int n_named_args,
11779 tree fndecl ATTRIBUTE_UNUSED,
11780 machine_mode return_mode ATTRIBUTE_UNUSED)
11781 {
11782 static CUMULATIVE_ARGS zero_cumulative;
11783
11784 *cum = zero_cumulative;
11785 cum->words = 0;
11786 cum->fregno = FP_ARG_MIN_REG;
11787 cum->vregno = ALTIVEC_ARG_MIN_REG;
11788 cum->prototype = (fntype && prototype_p (fntype));
11789 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11790 ? CALL_LIBCALL : CALL_NORMAL);
11791 cum->sysv_gregno = GP_ARG_MIN_REG;
11792 cum->stdarg = stdarg_p (fntype);
11793 cum->libcall = libcall;
11794
11795 cum->nargs_prototype = 0;
11796 if (incoming || cum->prototype)
11797 cum->nargs_prototype = n_named_args;
11798
11799 /* Check for a longcall attribute. */
11800 if ((!fntype && rs6000_default_long_calls)
11801 || (fntype
11802 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11803 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11804 cum->call_cookie |= CALL_LONG;
11805
11806 if (TARGET_DEBUG_ARG)
11807 {
11808 fprintf (stderr, "\ninit_cumulative_args:");
11809 if (fntype)
11810 {
11811 tree ret_type = TREE_TYPE (fntype);
11812 fprintf (stderr, " ret code = %s,",
11813 get_tree_code_name (TREE_CODE (ret_type)));
11814 }
11815
11816 if (cum->call_cookie & CALL_LONG)
11817 fprintf (stderr, " longcall,");
11818
11819 fprintf (stderr, " proto = %d, nargs = %d\n",
11820 cum->prototype, cum->nargs_prototype);
11821 }
11822
11823 #ifdef HAVE_AS_GNU_ATTRIBUTE
11824 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11825 {
11826 cum->escapes = call_ABI_of_interest (fndecl);
11827 if (cum->escapes)
11828 {
11829 tree return_type;
11830
11831 if (fntype)
11832 {
11833 return_type = TREE_TYPE (fntype);
11834 return_mode = TYPE_MODE (return_type);
11835 }
11836 else
11837 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11838
11839 if (return_type != NULL)
11840 {
11841 if (TREE_CODE (return_type) == RECORD_TYPE
11842 && TYPE_TRANSPARENT_AGGR (return_type))
11843 {
11844 return_type = TREE_TYPE (first_field (return_type));
11845 return_mode = TYPE_MODE (return_type);
11846 }
11847 if (AGGREGATE_TYPE_P (return_type)
11848 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11849 <= 8))
11850 rs6000_returns_struct = true;
11851 }
11852 if (SCALAR_FLOAT_MODE_P (return_mode))
11853 {
11854 rs6000_passes_float = true;
11855 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11856 && (FLOAT128_IBM_P (return_mode)
11857 || FLOAT128_IEEE_P (return_mode)
11858 || (return_type != NULL
11859 && (TYPE_MAIN_VARIANT (return_type)
11860 == long_double_type_node))))
11861 rs6000_passes_long_double = true;
11862 }
11863 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11864 || SPE_VECTOR_MODE (return_mode))
11865 rs6000_passes_vector = true;
11866 }
11867 }
11868 #endif
11869
11870 if (fntype
11871 && !TARGET_ALTIVEC
11872 && TARGET_ALTIVEC_ABI
11873 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11874 {
11875 error ("cannot return value in vector register because"
11876 " altivec instructions are disabled, use -maltivec"
11877 " to enable them");
11878 }
11879 }
11880 \f
11881 /* The mode the ABI uses for a word. This is not the same as word_mode
11882 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11883
11884 static scalar_int_mode
11885 rs6000_abi_word_mode (void)
11886 {
11887 return TARGET_32BIT ? SImode : DImode;
11888 }
11889
11890 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11891 static char *
11892 rs6000_offload_options (void)
11893 {
11894 if (TARGET_64BIT)
11895 return xstrdup ("-foffload-abi=lp64");
11896 else
11897 return xstrdup ("-foffload-abi=ilp32");
11898 }
11899
11900 /* On rs6000, function arguments are promoted, as are function return
11901 values. */
11902
11903 static machine_mode
11904 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11905 machine_mode mode,
11906 int *punsignedp ATTRIBUTE_UNUSED,
11907 const_tree, int)
11908 {
11909 PROMOTE_MODE (mode, *punsignedp, type);
11910
11911 return mode;
11912 }
11913
11914 /* Return true if TYPE must be passed on the stack and not in registers. */
11915
11916 static bool
11917 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11918 {
11919 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11920 return must_pass_in_stack_var_size (mode, type);
11921 else
11922 return must_pass_in_stack_var_size_or_pad (mode, type);
11923 }
11924
11925 static inline bool
11926 is_complex_IBM_long_double (machine_mode mode)
11927 {
11928 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11929 }
11930
11931 /* Whether ABI_V4 passes MODE args to a function in floating point
11932 registers. */
11933
11934 static bool
11935 abi_v4_pass_in_fpr (machine_mode mode)
11936 {
11937 if (!TARGET_FPRS || !TARGET_HARD_FLOAT)
11938 return false;
11939 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11940 return true;
11941 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11942 return true;
11943 /* ABI_V4 passes complex IBM long double in 8 gprs.
11944 Stupid, but we can't change the ABI now. */
11945 if (is_complex_IBM_long_double (mode))
11946 return false;
11947 if (FLOAT128_2REG_P (mode))
11948 return true;
11949 if (DECIMAL_FLOAT_MODE_P (mode))
11950 return true;
11951 return false;
11952 }
11953
11954 /* If defined, a C expression which determines whether, and in which
11955 direction, to pad out an argument with extra space. The value
11956 should be of type `enum direction': either `upward' to pad above
11957 the argument, `downward' to pad below, or `none' to inhibit
11958 padding.
11959
11960 For the AIX ABI structs are always stored left shifted in their
11961 argument slot. */
11962
11963 enum direction
11964 function_arg_padding (machine_mode mode, const_tree type)
11965 {
11966 #ifndef AGGREGATE_PADDING_FIXED
11967 #define AGGREGATE_PADDING_FIXED 0
11968 #endif
11969 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11970 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11971 #endif
11972
11973 if (!AGGREGATE_PADDING_FIXED)
11974 {
11975 /* GCC used to pass structures of the same size as integer types as
11976 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11977 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11978 passed padded downward, except that -mstrict-align further
11979 muddied the water in that multi-component structures of 2 and 4
11980 bytes in size were passed padded upward.
11981
11982 The following arranges for best compatibility with previous
11983 versions of gcc, but removes the -mstrict-align dependency. */
11984 if (BYTES_BIG_ENDIAN)
11985 {
11986 HOST_WIDE_INT size = 0;
11987
11988 if (mode == BLKmode)
11989 {
11990 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11991 size = int_size_in_bytes (type);
11992 }
11993 else
11994 size = GET_MODE_SIZE (mode);
11995
11996 if (size == 1 || size == 2 || size == 4)
11997 return downward;
11998 }
11999 return upward;
12000 }
12001
12002 if (AGGREGATES_PAD_UPWARD_ALWAYS)
12003 {
12004 if (type != 0 && AGGREGATE_TYPE_P (type))
12005 return upward;
12006 }
12007
12008 /* Fall back to the default. */
12009 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
12010 }
12011
12012 /* If defined, a C expression that gives the alignment boundary, in bits,
12013 of an argument with the specified mode and type. If it is not defined,
12014 PARM_BOUNDARY is used for all arguments.
12015
12016 V.4 wants long longs and doubles to be double word aligned. Just
12017 testing the mode size is a boneheaded way to do this as it means
12018 that other types such as complex int are also double word aligned.
12019 However, we're stuck with this because changing the ABI might break
12020 existing library interfaces.
12021
12022 Doubleword align SPE vectors.
12023 Quadword align Altivec/VSX vectors.
12024 Quadword align large synthetic vector types. */
12025
12026 static unsigned int
12027 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
12028 {
12029 machine_mode elt_mode;
12030 int n_elts;
12031
12032 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12033
12034 if (DEFAULT_ABI == ABI_V4
12035 && (GET_MODE_SIZE (mode) == 8
12036 || (TARGET_HARD_FLOAT
12037 && TARGET_FPRS
12038 && !is_complex_IBM_long_double (mode)
12039 && FLOAT128_2REG_P (mode))))
12040 return 64;
12041 else if (FLOAT128_VECTOR_P (mode))
12042 return 128;
12043 else if (SPE_VECTOR_MODE (mode)
12044 || (type && TREE_CODE (type) == VECTOR_TYPE
12045 && int_size_in_bytes (type) >= 8
12046 && int_size_in_bytes (type) < 16))
12047 return 64;
12048 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12049 || (type && TREE_CODE (type) == VECTOR_TYPE
12050 && int_size_in_bytes (type) >= 16))
12051 return 128;
12052
12053 /* Aggregate types that need > 8 byte alignment are quadword-aligned
12054 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
12055 -mcompat-align-parm is used. */
12056 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
12057 || DEFAULT_ABI == ABI_ELFv2)
12058 && type && TYPE_ALIGN (type) > 64)
12059 {
12060 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
12061 or homogeneous float/vector aggregates here. We already handled
12062 vector aggregates above, but still need to check for float here. */
12063 bool aggregate_p = (AGGREGATE_TYPE_P (type)
12064 && !SCALAR_FLOAT_MODE_P (elt_mode));
12065
12066 /* We used to check for BLKmode instead of the above aggregate type
12067 check. Warn when this results in any difference to the ABI. */
12068 if (aggregate_p != (mode == BLKmode))
12069 {
12070 static bool warned;
12071 if (!warned && warn_psabi)
12072 {
12073 warned = true;
12074 inform (input_location,
12075 "the ABI of passing aggregates with %d-byte alignment"
12076 " has changed in GCC 5",
12077 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
12078 }
12079 }
12080
12081 if (aggregate_p)
12082 return 128;
12083 }
12084
12085 /* Similar for the Darwin64 ABI. Note that for historical reasons we
12086 implement the "aggregate type" check as a BLKmode check here; this
12087 means certain aggregate types are in fact not aligned. */
12088 if (TARGET_MACHO && rs6000_darwin64_abi
12089 && mode == BLKmode
12090 && type && TYPE_ALIGN (type) > 64)
12091 return 128;
12092
12093 return PARM_BOUNDARY;
12094 }
12095
12096 /* The offset in words to the start of the parameter save area. */
12097
12098 static unsigned int
12099 rs6000_parm_offset (void)
12100 {
12101 return (DEFAULT_ABI == ABI_V4 ? 2
12102 : DEFAULT_ABI == ABI_ELFv2 ? 4
12103 : 6);
12104 }
12105
12106 /* For a function parm of MODE and TYPE, return the starting word in
12107 the parameter area. NWORDS of the parameter area are already used. */
12108
12109 static unsigned int
12110 rs6000_parm_start (machine_mode mode, const_tree type,
12111 unsigned int nwords)
12112 {
12113 unsigned int align;
12114
12115 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
12116 return nwords + (-(rs6000_parm_offset () + nwords) & align);
12117 }
12118
12119 /* Compute the size (in words) of a function argument. */
12120
12121 static unsigned long
12122 rs6000_arg_size (machine_mode mode, const_tree type)
12123 {
12124 unsigned long size;
12125
12126 if (mode != BLKmode)
12127 size = GET_MODE_SIZE (mode);
12128 else
12129 size = int_size_in_bytes (type);
12130
12131 if (TARGET_32BIT)
12132 return (size + 3) >> 2;
12133 else
12134 return (size + 7) >> 3;
12135 }
12136 \f
12137 /* Use this to flush pending int fields. */
12138
12139 static void
12140 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
12141 HOST_WIDE_INT bitpos, int final)
12142 {
12143 unsigned int startbit, endbit;
12144 int intregs, intoffset;
12145 machine_mode mode;
12146
12147 /* Handle the situations where a float is taking up the first half
12148 of the GPR, and the other half is empty (typically due to
12149 alignment restrictions). We can detect this by a 8-byte-aligned
12150 int field, or by seeing that this is the final flush for this
12151 argument. Count the word and continue on. */
12152 if (cum->floats_in_gpr == 1
12153 && (cum->intoffset % 64 == 0
12154 || (cum->intoffset == -1 && final)))
12155 {
12156 cum->words++;
12157 cum->floats_in_gpr = 0;
12158 }
12159
12160 if (cum->intoffset == -1)
12161 return;
12162
12163 intoffset = cum->intoffset;
12164 cum->intoffset = -1;
12165 cum->floats_in_gpr = 0;
12166
12167 if (intoffset % BITS_PER_WORD != 0)
12168 {
12169 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12170 MODE_INT, 0);
12171 if (mode == BLKmode)
12172 {
12173 /* We couldn't find an appropriate mode, which happens,
12174 e.g., in packed structs when there are 3 bytes to load.
12175 Back intoffset back to the beginning of the word in this
12176 case. */
12177 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12178 }
12179 }
12180
12181 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12182 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12183 intregs = (endbit - startbit) / BITS_PER_WORD;
12184 cum->words += intregs;
12185 /* words should be unsigned. */
12186 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
12187 {
12188 int pad = (endbit/BITS_PER_WORD) - cum->words;
12189 cum->words += pad;
12190 }
12191 }
12192
12193 /* The darwin64 ABI calls for us to recurse down through structs,
12194 looking for elements passed in registers. Unfortunately, we have
12195 to track int register count here also because of misalignments
12196 in powerpc alignment mode. */
12197
12198 static void
12199 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
12200 const_tree type,
12201 HOST_WIDE_INT startbitpos)
12202 {
12203 tree f;
12204
12205 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12206 if (TREE_CODE (f) == FIELD_DECL)
12207 {
12208 HOST_WIDE_INT bitpos = startbitpos;
12209 tree ftype = TREE_TYPE (f);
12210 machine_mode mode;
12211 if (ftype == error_mark_node)
12212 continue;
12213 mode = TYPE_MODE (ftype);
12214
12215 if (DECL_SIZE (f) != 0
12216 && tree_fits_uhwi_p (bit_position (f)))
12217 bitpos += int_bit_position (f);
12218
12219 /* ??? FIXME: else assume zero offset. */
12220
12221 if (TREE_CODE (ftype) == RECORD_TYPE)
12222 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
12223 else if (USE_FP_FOR_ARG_P (cum, mode))
12224 {
12225 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
12226 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12227 cum->fregno += n_fpregs;
12228 /* Single-precision floats present a special problem for
12229 us, because they are smaller than an 8-byte GPR, and so
12230 the structure-packing rules combined with the standard
12231 varargs behavior mean that we want to pack float/float
12232 and float/int combinations into a single register's
12233 space. This is complicated by the arg advance flushing,
12234 which works on arbitrarily large groups of int-type
12235 fields. */
12236 if (mode == SFmode)
12237 {
12238 if (cum->floats_in_gpr == 1)
12239 {
12240 /* Two floats in a word; count the word and reset
12241 the float count. */
12242 cum->words++;
12243 cum->floats_in_gpr = 0;
12244 }
12245 else if (bitpos % 64 == 0)
12246 {
12247 /* A float at the beginning of an 8-byte word;
12248 count it and put off adjusting cum->words until
12249 we see if a arg advance flush is going to do it
12250 for us. */
12251 cum->floats_in_gpr++;
12252 }
12253 else
12254 {
12255 /* The float is at the end of a word, preceded
12256 by integer fields, so the arg advance flush
12257 just above has already set cum->words and
12258 everything is taken care of. */
12259 }
12260 }
12261 else
12262 cum->words += n_fpregs;
12263 }
12264 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12265 {
12266 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12267 cum->vregno++;
12268 cum->words += 2;
12269 }
12270 else if (cum->intoffset == -1)
12271 cum->intoffset = bitpos;
12272 }
12273 }
12274
12275 /* Check for an item that needs to be considered specially under the darwin 64
12276 bit ABI. These are record types where the mode is BLK or the structure is
12277 8 bytes in size. */
12278 static int
12279 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12280 {
12281 return rs6000_darwin64_abi
12282 && ((mode == BLKmode
12283 && TREE_CODE (type) == RECORD_TYPE
12284 && int_size_in_bytes (type) > 0)
12285 || (type && TREE_CODE (type) == RECORD_TYPE
12286 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12287 }
12288
12289 /* Update the data in CUM to advance over an argument
12290 of mode MODE and data type TYPE.
12291 (TYPE is null for libcalls where that information may not be available.)
12292
12293 Note that for args passed by reference, function_arg will be called
12294 with MODE and TYPE set to that of the pointer to the arg, not the arg
12295 itself. */
12296
12297 static void
12298 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12299 const_tree type, bool named, int depth)
12300 {
12301 machine_mode elt_mode;
12302 int n_elts;
12303
12304 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12305
12306 /* Only tick off an argument if we're not recursing. */
12307 if (depth == 0)
12308 cum->nargs_prototype--;
12309
12310 #ifdef HAVE_AS_GNU_ATTRIBUTE
12311 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12312 && cum->escapes)
12313 {
12314 if (SCALAR_FLOAT_MODE_P (mode))
12315 {
12316 rs6000_passes_float = true;
12317 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12318 && (FLOAT128_IBM_P (mode)
12319 || FLOAT128_IEEE_P (mode)
12320 || (type != NULL
12321 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12322 rs6000_passes_long_double = true;
12323 }
12324 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12325 || (SPE_VECTOR_MODE (mode)
12326 && !cum->stdarg
12327 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12328 rs6000_passes_vector = true;
12329 }
12330 #endif
12331
12332 if (TARGET_ALTIVEC_ABI
12333 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12334 || (type && TREE_CODE (type) == VECTOR_TYPE
12335 && int_size_in_bytes (type) == 16)))
12336 {
12337 bool stack = false;
12338
12339 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12340 {
12341 cum->vregno += n_elts;
12342
12343 if (!TARGET_ALTIVEC)
12344 error ("cannot pass argument in vector register because"
12345 " altivec instructions are disabled, use -maltivec"
12346 " to enable them");
12347
12348 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12349 even if it is going to be passed in a vector register.
12350 Darwin does the same for variable-argument functions. */
12351 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12352 && TARGET_64BIT)
12353 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12354 stack = true;
12355 }
12356 else
12357 stack = true;
12358
12359 if (stack)
12360 {
12361 int align;
12362
12363 /* Vector parameters must be 16-byte aligned. In 32-bit
12364 mode this means we need to take into account the offset
12365 to the parameter save area. In 64-bit mode, they just
12366 have to start on an even word, since the parameter save
12367 area is 16-byte aligned. */
12368 if (TARGET_32BIT)
12369 align = -(rs6000_parm_offset () + cum->words) & 3;
12370 else
12371 align = cum->words & 1;
12372 cum->words += align + rs6000_arg_size (mode, type);
12373
12374 if (TARGET_DEBUG_ARG)
12375 {
12376 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12377 cum->words, align);
12378 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12379 cum->nargs_prototype, cum->prototype,
12380 GET_MODE_NAME (mode));
12381 }
12382 }
12383 }
12384 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
12385 && !cum->stdarg
12386 && cum->sysv_gregno <= GP_ARG_MAX_REG)
12387 cum->sysv_gregno++;
12388
12389 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12390 {
12391 int size = int_size_in_bytes (type);
12392 /* Variable sized types have size == -1 and are
12393 treated as if consisting entirely of ints.
12394 Pad to 16 byte boundary if needed. */
12395 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12396 && (cum->words % 2) != 0)
12397 cum->words++;
12398 /* For varargs, we can just go up by the size of the struct. */
12399 if (!named)
12400 cum->words += (size + 7) / 8;
12401 else
12402 {
12403 /* It is tempting to say int register count just goes up by
12404 sizeof(type)/8, but this is wrong in a case such as
12405 { int; double; int; } [powerpc alignment]. We have to
12406 grovel through the fields for these too. */
12407 cum->intoffset = 0;
12408 cum->floats_in_gpr = 0;
12409 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12410 rs6000_darwin64_record_arg_advance_flush (cum,
12411 size * BITS_PER_UNIT, 1);
12412 }
12413 if (TARGET_DEBUG_ARG)
12414 {
12415 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12416 cum->words, TYPE_ALIGN (type), size);
12417 fprintf (stderr,
12418 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12419 cum->nargs_prototype, cum->prototype,
12420 GET_MODE_NAME (mode));
12421 }
12422 }
12423 else if (DEFAULT_ABI == ABI_V4)
12424 {
12425 if (abi_v4_pass_in_fpr (mode))
12426 {
12427 /* _Decimal128 must use an even/odd register pair. This assumes
12428 that the register number is odd when fregno is odd. */
12429 if (mode == TDmode && (cum->fregno % 2) == 1)
12430 cum->fregno++;
12431
12432 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12433 <= FP_ARG_V4_MAX_REG)
12434 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12435 else
12436 {
12437 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12438 if (mode == DFmode || FLOAT128_IBM_P (mode)
12439 || mode == DDmode || mode == TDmode)
12440 cum->words += cum->words & 1;
12441 cum->words += rs6000_arg_size (mode, type);
12442 }
12443 }
12444 else
12445 {
12446 int n_words = rs6000_arg_size (mode, type);
12447 int gregno = cum->sysv_gregno;
12448
12449 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
12450 (r7,r8) or (r9,r10). As does any other 2 word item such
12451 as complex int due to a historical mistake. */
12452 if (n_words == 2)
12453 gregno += (1 - gregno) & 1;
12454
12455 /* Multi-reg args are not split between registers and stack. */
12456 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12457 {
12458 /* Long long and SPE vectors are aligned on the stack.
12459 So are other 2 word items such as complex int due to
12460 a historical mistake. */
12461 if (n_words == 2)
12462 cum->words += cum->words & 1;
12463 cum->words += n_words;
12464 }
12465
12466 /* Note: continuing to accumulate gregno past when we've started
12467 spilling to the stack indicates the fact that we've started
12468 spilling to the stack to expand_builtin_saveregs. */
12469 cum->sysv_gregno = gregno + n_words;
12470 }
12471
12472 if (TARGET_DEBUG_ARG)
12473 {
12474 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12475 cum->words, cum->fregno);
12476 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12477 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12478 fprintf (stderr, "mode = %4s, named = %d\n",
12479 GET_MODE_NAME (mode), named);
12480 }
12481 }
12482 else
12483 {
12484 int n_words = rs6000_arg_size (mode, type);
12485 int start_words = cum->words;
12486 int align_words = rs6000_parm_start (mode, type, start_words);
12487
12488 cum->words = align_words + n_words;
12489
12490 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
12491 {
12492 /* _Decimal128 must be passed in an even/odd float register pair.
12493 This assumes that the register number is odd when fregno is
12494 odd. */
12495 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12496 cum->fregno++;
12497 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12498 }
12499
12500 if (TARGET_DEBUG_ARG)
12501 {
12502 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12503 cum->words, cum->fregno);
12504 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12505 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12506 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12507 named, align_words - start_words, depth);
12508 }
12509 }
12510 }
12511
12512 static void
12513 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12514 const_tree type, bool named)
12515 {
12516 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12517 0);
12518 }
12519
12520 static rtx
12521 spe_build_register_parallel (machine_mode mode, int gregno)
12522 {
12523 rtx r1, r3, r5, r7;
12524
12525 switch (mode)
12526 {
12527 case E_DFmode:
12528 r1 = gen_rtx_REG (DImode, gregno);
12529 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12530 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
12531
12532 case E_DCmode:
12533 case E_TFmode:
12534 r1 = gen_rtx_REG (DImode, gregno);
12535 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12536 r3 = gen_rtx_REG (DImode, gregno + 2);
12537 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12538 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
12539
12540 case E_TCmode:
12541 r1 = gen_rtx_REG (DImode, gregno);
12542 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
12543 r3 = gen_rtx_REG (DImode, gregno + 2);
12544 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
12545 r5 = gen_rtx_REG (DImode, gregno + 4);
12546 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
12547 r7 = gen_rtx_REG (DImode, gregno + 6);
12548 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
12549 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
12550
12551 default:
12552 gcc_unreachable ();
12553 }
12554 }
12555
12556 /* Determine where to put a SIMD argument on the SPE. */
12557 static rtx
12558 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
12559 const_tree type)
12560 {
12561 int gregno = cum->sysv_gregno;
12562
12563 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
12564 are passed and returned in a pair of GPRs for ABI compatibility. */
12565 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
12566 || mode == DCmode || mode == TCmode))
12567 {
12568 int n_words = rs6000_arg_size (mode, type);
12569
12570 /* Doubles go in an odd/even register pair (r5/r6, etc). */
12571 if (mode == DFmode)
12572 gregno += (1 - gregno) & 1;
12573
12574 /* Multi-reg args are not split between registers and stack. */
12575 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12576 return NULL_RTX;
12577
12578 return spe_build_register_parallel (mode, gregno);
12579 }
12580 if (cum->stdarg)
12581 {
12582 int n_words = rs6000_arg_size (mode, type);
12583
12584 /* SPE vectors are put in odd registers. */
12585 if (n_words == 2 && (gregno & 1) == 0)
12586 gregno += 1;
12587
12588 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
12589 {
12590 rtx r1, r2;
12591 machine_mode m = SImode;
12592
12593 r1 = gen_rtx_REG (m, gregno);
12594 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
12595 r2 = gen_rtx_REG (m, gregno + 1);
12596 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
12597 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
12598 }
12599 else
12600 return NULL_RTX;
12601 }
12602 else
12603 {
12604 if (gregno <= GP_ARG_MAX_REG)
12605 return gen_rtx_REG (mode, gregno);
12606 else
12607 return NULL_RTX;
12608 }
12609 }
12610
12611 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12612 structure between cum->intoffset and bitpos to integer registers. */
12613
12614 static void
12615 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12616 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12617 {
12618 machine_mode mode;
12619 unsigned int regno;
12620 unsigned int startbit, endbit;
12621 int this_regno, intregs, intoffset;
12622 rtx reg;
12623
12624 if (cum->intoffset == -1)
12625 return;
12626
12627 intoffset = cum->intoffset;
12628 cum->intoffset = -1;
12629
12630 /* If this is the trailing part of a word, try to only load that
12631 much into the register. Otherwise load the whole register. Note
12632 that in the latter case we may pick up unwanted bits. It's not a
12633 problem at the moment but may wish to revisit. */
12634
12635 if (intoffset % BITS_PER_WORD != 0)
12636 {
12637 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12638 MODE_INT, 0);
12639 if (mode == BLKmode)
12640 {
12641 /* We couldn't find an appropriate mode, which happens,
12642 e.g., in packed structs when there are 3 bytes to load.
12643 Back intoffset back to the beginning of the word in this
12644 case. */
12645 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12646 mode = word_mode;
12647 }
12648 }
12649 else
12650 mode = word_mode;
12651
12652 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12653 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12654 intregs = (endbit - startbit) / BITS_PER_WORD;
12655 this_regno = cum->words + intoffset / BITS_PER_WORD;
12656
12657 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12658 cum->use_stack = 1;
12659
12660 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12661 if (intregs <= 0)
12662 return;
12663
12664 intoffset /= BITS_PER_UNIT;
12665 do
12666 {
12667 regno = GP_ARG_MIN_REG + this_regno;
12668 reg = gen_rtx_REG (mode, regno);
12669 rvec[(*k)++] =
12670 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12671
12672 this_regno += 1;
12673 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12674 mode = word_mode;
12675 intregs -= 1;
12676 }
12677 while (intregs > 0);
12678 }
12679
12680 /* Recursive workhorse for the following. */
12681
12682 static void
12683 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12684 HOST_WIDE_INT startbitpos, rtx rvec[],
12685 int *k)
12686 {
12687 tree f;
12688
12689 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12690 if (TREE_CODE (f) == FIELD_DECL)
12691 {
12692 HOST_WIDE_INT bitpos = startbitpos;
12693 tree ftype = TREE_TYPE (f);
12694 machine_mode mode;
12695 if (ftype == error_mark_node)
12696 continue;
12697 mode = TYPE_MODE (ftype);
12698
12699 if (DECL_SIZE (f) != 0
12700 && tree_fits_uhwi_p (bit_position (f)))
12701 bitpos += int_bit_position (f);
12702
12703 /* ??? FIXME: else assume zero offset. */
12704
12705 if (TREE_CODE (ftype) == RECORD_TYPE)
12706 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12707 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12708 {
12709 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12710 #if 0
12711 switch (mode)
12712 {
12713 case E_SCmode: mode = SFmode; break;
12714 case E_DCmode: mode = DFmode; break;
12715 case E_TCmode: mode = TFmode; break;
12716 default: break;
12717 }
12718 #endif
12719 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12720 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12721 {
12722 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12723 && (mode == TFmode || mode == TDmode));
12724 /* Long double or _Decimal128 split over regs and memory. */
12725 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12726 cum->use_stack=1;
12727 }
12728 rvec[(*k)++]
12729 = gen_rtx_EXPR_LIST (VOIDmode,
12730 gen_rtx_REG (mode, cum->fregno++),
12731 GEN_INT (bitpos / BITS_PER_UNIT));
12732 if (FLOAT128_2REG_P (mode))
12733 cum->fregno++;
12734 }
12735 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12736 {
12737 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12738 rvec[(*k)++]
12739 = gen_rtx_EXPR_LIST (VOIDmode,
12740 gen_rtx_REG (mode, cum->vregno++),
12741 GEN_INT (bitpos / BITS_PER_UNIT));
12742 }
12743 else if (cum->intoffset == -1)
12744 cum->intoffset = bitpos;
12745 }
12746 }
12747
12748 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12749 the register(s) to be used for each field and subfield of a struct
12750 being passed by value, along with the offset of where the
12751 register's value may be found in the block. FP fields go in FP
12752 register, vector fields go in vector registers, and everything
12753 else goes in int registers, packed as in memory.
12754
12755 This code is also used for function return values. RETVAL indicates
12756 whether this is the case.
12757
12758 Much of this is taken from the SPARC V9 port, which has a similar
12759 calling convention. */
12760
12761 static rtx
12762 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12763 bool named, bool retval)
12764 {
12765 rtx rvec[FIRST_PSEUDO_REGISTER];
12766 int k = 1, kbase = 1;
12767 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12768 /* This is a copy; modifications are not visible to our caller. */
12769 CUMULATIVE_ARGS copy_cum = *orig_cum;
12770 CUMULATIVE_ARGS *cum = &copy_cum;
12771
12772 /* Pad to 16 byte boundary if needed. */
12773 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12774 && (cum->words % 2) != 0)
12775 cum->words++;
12776
12777 cum->intoffset = 0;
12778 cum->use_stack = 0;
12779 cum->named = named;
12780
12781 /* Put entries into rvec[] for individual FP and vector fields, and
12782 for the chunks of memory that go in int regs. Note we start at
12783 element 1; 0 is reserved for an indication of using memory, and
12784 may or may not be filled in below. */
12785 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12786 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12787
12788 /* If any part of the struct went on the stack put all of it there.
12789 This hack is because the generic code for
12790 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12791 parts of the struct are not at the beginning. */
12792 if (cum->use_stack)
12793 {
12794 if (retval)
12795 return NULL_RTX; /* doesn't go in registers at all */
12796 kbase = 0;
12797 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12798 }
12799 if (k > 1 || cum->use_stack)
12800 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12801 else
12802 return NULL_RTX;
12803 }
12804
12805 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12806
12807 static rtx
12808 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12809 int align_words)
12810 {
12811 int n_units;
12812 int i, k;
12813 rtx rvec[GP_ARG_NUM_REG + 1];
12814
12815 if (align_words >= GP_ARG_NUM_REG)
12816 return NULL_RTX;
12817
12818 n_units = rs6000_arg_size (mode, type);
12819
12820 /* Optimize the simple case where the arg fits in one gpr, except in
12821 the case of BLKmode due to assign_parms assuming that registers are
12822 BITS_PER_WORD wide. */
12823 if (n_units == 0
12824 || (n_units == 1 && mode != BLKmode))
12825 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12826
12827 k = 0;
12828 if (align_words + n_units > GP_ARG_NUM_REG)
12829 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12830 using a magic NULL_RTX component.
12831 This is not strictly correct. Only some of the arg belongs in
12832 memory, not all of it. However, the normal scheme using
12833 function_arg_partial_nregs can result in unusual subregs, eg.
12834 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12835 store the whole arg to memory is often more efficient than code
12836 to store pieces, and we know that space is available in the right
12837 place for the whole arg. */
12838 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12839
12840 i = 0;
12841 do
12842 {
12843 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12844 rtx off = GEN_INT (i++ * 4);
12845 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12846 }
12847 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12848
12849 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12850 }
12851
12852 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12853 but must also be copied into the parameter save area starting at
12854 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12855 to the GPRs and/or memory. Return the number of elements used. */
12856
12857 static int
12858 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12859 int align_words, rtx *rvec)
12860 {
12861 int k = 0;
12862
12863 if (align_words < GP_ARG_NUM_REG)
12864 {
12865 int n_words = rs6000_arg_size (mode, type);
12866
12867 if (align_words + n_words > GP_ARG_NUM_REG
12868 || mode == BLKmode
12869 || (TARGET_32BIT && TARGET_POWERPC64))
12870 {
12871 /* If this is partially on the stack, then we only
12872 include the portion actually in registers here. */
12873 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12874 int i = 0;
12875
12876 if (align_words + n_words > GP_ARG_NUM_REG)
12877 {
12878 /* Not all of the arg fits in gprs. Say that it goes in memory
12879 too, using a magic NULL_RTX component. Also see comment in
12880 rs6000_mixed_function_arg for why the normal
12881 function_arg_partial_nregs scheme doesn't work in this case. */
12882 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12883 }
12884
12885 do
12886 {
12887 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12888 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12889 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12890 }
12891 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12892 }
12893 else
12894 {
12895 /* The whole arg fits in gprs. */
12896 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12897 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12898 }
12899 }
12900 else
12901 {
12902 /* It's entirely in memory. */
12903 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12904 }
12905
12906 return k;
12907 }
12908
12909 /* RVEC is a vector of K components of an argument of mode MODE.
12910 Construct the final function_arg return value from it. */
12911
12912 static rtx
12913 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12914 {
12915 gcc_assert (k >= 1);
12916
12917 /* Avoid returning a PARALLEL in the trivial cases. */
12918 if (k == 1)
12919 {
12920 if (XEXP (rvec[0], 0) == NULL_RTX)
12921 return NULL_RTX;
12922
12923 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12924 return XEXP (rvec[0], 0);
12925 }
12926
12927 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12928 }
12929
12930 /* Determine where to put an argument to a function.
12931 Value is zero to push the argument on the stack,
12932 or a hard register in which to store the argument.
12933
12934 MODE is the argument's machine mode.
12935 TYPE is the data type of the argument (as a tree).
12936 This is null for libcalls where that information may
12937 not be available.
12938 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12939 the preceding args and about the function being called. It is
12940 not modified in this routine.
12941 NAMED is nonzero if this argument is a named parameter
12942 (otherwise it is an extra parameter matching an ellipsis).
12943
12944 On RS/6000 the first eight words of non-FP are normally in registers
12945 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12946 Under V.4, the first 8 FP args are in registers.
12947
12948 If this is floating-point and no prototype is specified, we use
12949 both an FP and integer register (or possibly FP reg and stack). Library
12950 functions (when CALL_LIBCALL is set) always have the proper types for args,
12951 so we can pass the FP value just in one register. emit_library_function
12952 doesn't support PARALLEL anyway.
12953
12954 Note that for args passed by reference, function_arg will be called
12955 with MODE and TYPE set to that of the pointer to the arg, not the arg
12956 itself. */
12957
12958 static rtx
12959 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12960 const_tree type, bool named)
12961 {
12962 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12963 enum rs6000_abi abi = DEFAULT_ABI;
12964 machine_mode elt_mode;
12965 int n_elts;
12966
12967 /* Return a marker to indicate whether CR1 needs to set or clear the
12968 bit that V.4 uses to say fp args were passed in registers.
12969 Assume that we don't need the marker for software floating point,
12970 or compiler generated library calls. */
12971 if (mode == VOIDmode)
12972 {
12973 if (abi == ABI_V4
12974 && (cum->call_cookie & CALL_LIBCALL) == 0
12975 && (cum->stdarg
12976 || (cum->nargs_prototype < 0
12977 && (cum->prototype || TARGET_NO_PROTOTYPE))))
12978 {
12979 /* For the SPE, we need to crxor CR6 always. */
12980 if (TARGET_SPE_ABI)
12981 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
12982 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
12983 return GEN_INT (cum->call_cookie
12984 | ((cum->fregno == FP_ARG_MIN_REG)
12985 ? CALL_V4_SET_FP_ARGS
12986 : CALL_V4_CLEAR_FP_ARGS));
12987 }
12988
12989 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12990 }
12991
12992 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12993
12994 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12995 {
12996 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12997 if (rslt != NULL_RTX)
12998 return rslt;
12999 /* Else fall through to usual handling. */
13000 }
13001
13002 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13003 {
13004 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13005 rtx r, off;
13006 int i, k = 0;
13007
13008 /* Do we also need to pass this argument in the parameter save area?
13009 Library support functions for IEEE 128-bit are assumed to not need the
13010 value passed both in GPRs and in vector registers. */
13011 if (TARGET_64BIT && !cum->prototype
13012 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13013 {
13014 int align_words = ROUND_UP (cum->words, 2);
13015 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13016 }
13017
13018 /* Describe where this argument goes in the vector registers. */
13019 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
13020 {
13021 r = gen_rtx_REG (elt_mode, cum->vregno + i);
13022 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13023 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13024 }
13025
13026 return rs6000_finish_function_arg (mode, rvec, k);
13027 }
13028 else if (TARGET_ALTIVEC_ABI
13029 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
13030 || (type && TREE_CODE (type) == VECTOR_TYPE
13031 && int_size_in_bytes (type) == 16)))
13032 {
13033 if (named || abi == ABI_V4)
13034 return NULL_RTX;
13035 else
13036 {
13037 /* Vector parameters to varargs functions under AIX or Darwin
13038 get passed in memory and possibly also in GPRs. */
13039 int align, align_words, n_words;
13040 machine_mode part_mode;
13041
13042 /* Vector parameters must be 16-byte aligned. In 32-bit
13043 mode this means we need to take into account the offset
13044 to the parameter save area. In 64-bit mode, they just
13045 have to start on an even word, since the parameter save
13046 area is 16-byte aligned. */
13047 if (TARGET_32BIT)
13048 align = -(rs6000_parm_offset () + cum->words) & 3;
13049 else
13050 align = cum->words & 1;
13051 align_words = cum->words + align;
13052
13053 /* Out of registers? Memory, then. */
13054 if (align_words >= GP_ARG_NUM_REG)
13055 return NULL_RTX;
13056
13057 if (TARGET_32BIT && TARGET_POWERPC64)
13058 return rs6000_mixed_function_arg (mode, type, align_words);
13059
13060 /* The vector value goes in GPRs. Only the part of the
13061 value in GPRs is reported here. */
13062 part_mode = mode;
13063 n_words = rs6000_arg_size (mode, type);
13064 if (align_words + n_words > GP_ARG_NUM_REG)
13065 /* Fortunately, there are only two possibilities, the value
13066 is either wholly in GPRs or half in GPRs and half not. */
13067 part_mode = DImode;
13068
13069 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
13070 }
13071 }
13072 else if (TARGET_SPE_ABI && TARGET_SPE
13073 && (SPE_VECTOR_MODE (mode)
13074 || (TARGET_E500_DOUBLE && (mode == DFmode
13075 || mode == DCmode
13076 || mode == TFmode
13077 || mode == TCmode))))
13078 return rs6000_spe_function_arg (cum, mode, type);
13079
13080 else if (abi == ABI_V4)
13081 {
13082 if (abi_v4_pass_in_fpr (mode))
13083 {
13084 /* _Decimal128 must use an even/odd register pair. This assumes
13085 that the register number is odd when fregno is odd. */
13086 if (mode == TDmode && (cum->fregno % 2) == 1)
13087 cum->fregno++;
13088
13089 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
13090 <= FP_ARG_V4_MAX_REG)
13091 return gen_rtx_REG (mode, cum->fregno);
13092 else
13093 return NULL_RTX;
13094 }
13095 else
13096 {
13097 int n_words = rs6000_arg_size (mode, type);
13098 int gregno = cum->sysv_gregno;
13099
13100 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
13101 (r7,r8) or (r9,r10). As does any other 2 word item such
13102 as complex int due to a historical mistake. */
13103 if (n_words == 2)
13104 gregno += (1 - gregno) & 1;
13105
13106 /* Multi-reg args are not split between registers and stack. */
13107 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
13108 return NULL_RTX;
13109
13110 if (TARGET_32BIT && TARGET_POWERPC64)
13111 return rs6000_mixed_function_arg (mode, type,
13112 gregno - GP_ARG_MIN_REG);
13113 return gen_rtx_REG (mode, gregno);
13114 }
13115 }
13116 else
13117 {
13118 int align_words = rs6000_parm_start (mode, type, cum->words);
13119
13120 /* _Decimal128 must be passed in an even/odd float register pair.
13121 This assumes that the register number is odd when fregno is odd. */
13122 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
13123 cum->fregno++;
13124
13125 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13126 {
13127 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
13128 rtx r, off;
13129 int i, k = 0;
13130 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13131 int fpr_words;
13132
13133 /* Do we also need to pass this argument in the parameter
13134 save area? */
13135 if (type && (cum->nargs_prototype <= 0
13136 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13137 && TARGET_XL_COMPAT
13138 && align_words >= GP_ARG_NUM_REG)))
13139 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
13140
13141 /* Describe where this argument goes in the fprs. */
13142 for (i = 0; i < n_elts
13143 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
13144 {
13145 /* Check if the argument is split over registers and memory.
13146 This can only ever happen for long double or _Decimal128;
13147 complex types are handled via split_complex_arg. */
13148 machine_mode fmode = elt_mode;
13149 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
13150 {
13151 gcc_assert (FLOAT128_2REG_P (fmode));
13152 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
13153 }
13154
13155 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
13156 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
13157 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13158 }
13159
13160 /* If there were not enough FPRs to hold the argument, the rest
13161 usually goes into memory. However, if the current position
13162 is still within the register parameter area, a portion may
13163 actually have to go into GPRs.
13164
13165 Note that it may happen that the portion of the argument
13166 passed in the first "half" of the first GPR was already
13167 passed in the last FPR as well.
13168
13169 For unnamed arguments, we already set up GPRs to cover the
13170 whole argument in rs6000_psave_function_arg, so there is
13171 nothing further to do at this point. */
13172 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
13173 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
13174 && cum->nargs_prototype > 0)
13175 {
13176 static bool warned;
13177
13178 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
13179 int n_words = rs6000_arg_size (mode, type);
13180
13181 align_words += fpr_words;
13182 n_words -= fpr_words;
13183
13184 do
13185 {
13186 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
13187 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
13188 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
13189 }
13190 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
13191
13192 if (!warned && warn_psabi)
13193 {
13194 warned = true;
13195 inform (input_location,
13196 "the ABI of passing homogeneous float aggregates"
13197 " has changed in GCC 5");
13198 }
13199 }
13200
13201 return rs6000_finish_function_arg (mode, rvec, k);
13202 }
13203 else if (align_words < GP_ARG_NUM_REG)
13204 {
13205 if (TARGET_32BIT && TARGET_POWERPC64)
13206 return rs6000_mixed_function_arg (mode, type, align_words);
13207
13208 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
13209 }
13210 else
13211 return NULL_RTX;
13212 }
13213 }
13214 \f
13215 /* For an arg passed partly in registers and partly in memory, this is
13216 the number of bytes passed in registers. For args passed entirely in
13217 registers or entirely in memory, zero. When an arg is described by a
13218 PARALLEL, perhaps using more than one register type, this function
13219 returns the number of bytes used by the first element of the PARALLEL. */
13220
13221 static int
13222 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
13223 tree type, bool named)
13224 {
13225 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
13226 bool passed_in_gprs = true;
13227 int ret = 0;
13228 int align_words;
13229 machine_mode elt_mode;
13230 int n_elts;
13231
13232 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
13233
13234 if (DEFAULT_ABI == ABI_V4)
13235 return 0;
13236
13237 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
13238 {
13239 /* If we are passing this arg in the fixed parameter save area (gprs or
13240 memory) as well as VRs, we do not use the partial bytes mechanism;
13241 instead, rs6000_function_arg will return a PARALLEL including a memory
13242 element as necessary. Library support functions for IEEE 128-bit are
13243 assumed to not need the value passed both in GPRs and in vector
13244 registers. */
13245 if (TARGET_64BIT && !cum->prototype
13246 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
13247 return 0;
13248
13249 /* Otherwise, we pass in VRs only. Check for partial copies. */
13250 passed_in_gprs = false;
13251 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
13252 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
13253 }
13254
13255 /* In this complicated case we just disable the partial_nregs code. */
13256 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
13257 return 0;
13258
13259 align_words = rs6000_parm_start (mode, type, cum->words);
13260
13261 if (USE_FP_FOR_ARG_P (cum, elt_mode))
13262 {
13263 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
13264
13265 /* If we are passing this arg in the fixed parameter save area
13266 (gprs or memory) as well as FPRs, we do not use the partial
13267 bytes mechanism; instead, rs6000_function_arg will return a
13268 PARALLEL including a memory element as necessary. */
13269 if (type
13270 && (cum->nargs_prototype <= 0
13271 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13272 && TARGET_XL_COMPAT
13273 && align_words >= GP_ARG_NUM_REG)))
13274 return 0;
13275
13276 /* Otherwise, we pass in FPRs only. Check for partial copies. */
13277 passed_in_gprs = false;
13278 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
13279 {
13280 /* Compute number of bytes / words passed in FPRs. If there
13281 is still space available in the register parameter area
13282 *after* that amount, a part of the argument will be passed
13283 in GPRs. In that case, the total amount passed in any
13284 registers is equal to the amount that would have been passed
13285 in GPRs if everything were passed there, so we fall back to
13286 the GPR code below to compute the appropriate value. */
13287 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
13288 * MIN (8, GET_MODE_SIZE (elt_mode)));
13289 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
13290
13291 if (align_words + fpr_words < GP_ARG_NUM_REG)
13292 passed_in_gprs = true;
13293 else
13294 ret = fpr;
13295 }
13296 }
13297
13298 if (passed_in_gprs
13299 && align_words < GP_ARG_NUM_REG
13300 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
13301 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
13302
13303 if (ret != 0 && TARGET_DEBUG_ARG)
13304 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
13305
13306 return ret;
13307 }
13308 \f
13309 /* A C expression that indicates when an argument must be passed by
13310 reference. If nonzero for an argument, a copy of that argument is
13311 made in memory and a pointer to the argument is passed instead of
13312 the argument itself. The pointer is passed in whatever way is
13313 appropriate for passing a pointer to that type.
13314
13315 Under V.4, aggregates and long double are passed by reference.
13316
13317 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
13318 reference unless the AltiVec vector extension ABI is in force.
13319
13320 As an extension to all ABIs, variable sized types are passed by
13321 reference. */
13322
13323 static bool
13324 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
13325 machine_mode mode, const_tree type,
13326 bool named ATTRIBUTE_UNUSED)
13327 {
13328 if (!type)
13329 return 0;
13330
13331 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
13332 && FLOAT128_IEEE_P (TYPE_MODE (type)))
13333 {
13334 if (TARGET_DEBUG_ARG)
13335 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
13336 return 1;
13337 }
13338
13339 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
13340 {
13341 if (TARGET_DEBUG_ARG)
13342 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
13343 return 1;
13344 }
13345
13346 if (int_size_in_bytes (type) < 0)
13347 {
13348 if (TARGET_DEBUG_ARG)
13349 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
13350 return 1;
13351 }
13352
13353 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
13354 modes only exist for GCC vector types if -maltivec. */
13355 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13356 {
13357 if (TARGET_DEBUG_ARG)
13358 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13359 return 1;
13360 }
13361
13362 /* Pass synthetic vectors in memory. */
13363 if (TREE_CODE (type) == VECTOR_TYPE
13364 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13365 {
13366 static bool warned_for_pass_big_vectors = false;
13367 if (TARGET_DEBUG_ARG)
13368 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13369 if (!warned_for_pass_big_vectors)
13370 {
13371 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13372 "non-standard ABI extension with no compatibility guarantee");
13373 warned_for_pass_big_vectors = true;
13374 }
13375 return 1;
13376 }
13377
13378 return 0;
13379 }
13380
13381 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13382 already processes. Return true if the parameter must be passed
13383 (fully or partially) on the stack. */
13384
13385 static bool
13386 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13387 {
13388 machine_mode mode;
13389 int unsignedp;
13390 rtx entry_parm;
13391
13392 /* Catch errors. */
13393 if (type == NULL || type == error_mark_node)
13394 return true;
13395
13396 /* Handle types with no storage requirement. */
13397 if (TYPE_MODE (type) == VOIDmode)
13398 return false;
13399
13400 /* Handle complex types. */
13401 if (TREE_CODE (type) == COMPLEX_TYPE)
13402 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13403 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13404
13405 /* Handle transparent aggregates. */
13406 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13407 && TYPE_TRANSPARENT_AGGR (type))
13408 type = TREE_TYPE (first_field (type));
13409
13410 /* See if this arg was passed by invisible reference. */
13411 if (pass_by_reference (get_cumulative_args (args_so_far),
13412 TYPE_MODE (type), type, true))
13413 type = build_pointer_type (type);
13414
13415 /* Find mode as it is passed by the ABI. */
13416 unsignedp = TYPE_UNSIGNED (type);
13417 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13418
13419 /* If we must pass in stack, we need a stack. */
13420 if (rs6000_must_pass_in_stack (mode, type))
13421 return true;
13422
13423 /* If there is no incoming register, we need a stack. */
13424 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13425 if (entry_parm == NULL)
13426 return true;
13427
13428 /* Likewise if we need to pass both in registers and on the stack. */
13429 if (GET_CODE (entry_parm) == PARALLEL
13430 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13431 return true;
13432
13433 /* Also true if we're partially in registers and partially not. */
13434 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13435 return true;
13436
13437 /* Update info on where next arg arrives in registers. */
13438 rs6000_function_arg_advance (args_so_far, mode, type, true);
13439 return false;
13440 }
13441
13442 /* Return true if FUN has no prototype, has a variable argument
13443 list, or passes any parameter in memory. */
13444
13445 static bool
13446 rs6000_function_parms_need_stack (tree fun, bool incoming)
13447 {
13448 tree fntype, result;
13449 CUMULATIVE_ARGS args_so_far_v;
13450 cumulative_args_t args_so_far;
13451
13452 if (!fun)
13453 /* Must be a libcall, all of which only use reg parms. */
13454 return false;
13455
13456 fntype = fun;
13457 if (!TYPE_P (fun))
13458 fntype = TREE_TYPE (fun);
13459
13460 /* Varargs functions need the parameter save area. */
13461 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13462 return true;
13463
13464 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13465 args_so_far = pack_cumulative_args (&args_so_far_v);
13466
13467 /* When incoming, we will have been passed the function decl.
13468 It is necessary to use the decl to handle K&R style functions,
13469 where TYPE_ARG_TYPES may not be available. */
13470 if (incoming)
13471 {
13472 gcc_assert (DECL_P (fun));
13473 result = DECL_RESULT (fun);
13474 }
13475 else
13476 result = TREE_TYPE (fntype);
13477
13478 if (result && aggregate_value_p (result, fntype))
13479 {
13480 if (!TYPE_P (result))
13481 result = TREE_TYPE (result);
13482 result = build_pointer_type (result);
13483 rs6000_parm_needs_stack (args_so_far, result);
13484 }
13485
13486 if (incoming)
13487 {
13488 tree parm;
13489
13490 for (parm = DECL_ARGUMENTS (fun);
13491 parm && parm != void_list_node;
13492 parm = TREE_CHAIN (parm))
13493 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13494 return true;
13495 }
13496 else
13497 {
13498 function_args_iterator args_iter;
13499 tree arg_type;
13500
13501 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13502 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13503 return true;
13504 }
13505
13506 return false;
13507 }
13508
13509 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13510 usually a constant depending on the ABI. However, in the ELFv2 ABI
13511 the register parameter area is optional when calling a function that
13512 has a prototype is scope, has no variable argument list, and passes
13513 all parameters in registers. */
13514
13515 int
13516 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13517 {
13518 int reg_parm_stack_space;
13519
13520 switch (DEFAULT_ABI)
13521 {
13522 default:
13523 reg_parm_stack_space = 0;
13524 break;
13525
13526 case ABI_AIX:
13527 case ABI_DARWIN:
13528 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13529 break;
13530
13531 case ABI_ELFv2:
13532 /* ??? Recomputing this every time is a bit expensive. Is there
13533 a place to cache this information? */
13534 if (rs6000_function_parms_need_stack (fun, incoming))
13535 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13536 else
13537 reg_parm_stack_space = 0;
13538 break;
13539 }
13540
13541 return reg_parm_stack_space;
13542 }
13543
13544 static void
13545 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13546 {
13547 int i;
13548 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13549
13550 if (nregs == 0)
13551 return;
13552
13553 for (i = 0; i < nregs; i++)
13554 {
13555 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13556 if (reload_completed)
13557 {
13558 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13559 tem = NULL_RTX;
13560 else
13561 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13562 i * GET_MODE_SIZE (reg_mode));
13563 }
13564 else
13565 tem = replace_equiv_address (tem, XEXP (tem, 0));
13566
13567 gcc_assert (tem);
13568
13569 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13570 }
13571 }
13572 \f
13573 /* Perform any needed actions needed for a function that is receiving a
13574 variable number of arguments.
13575
13576 CUM is as above.
13577
13578 MODE and TYPE are the mode and type of the current parameter.
13579
13580 PRETEND_SIZE is a variable that should be set to the amount of stack
13581 that must be pushed by the prolog to pretend that our caller pushed
13582 it.
13583
13584 Normally, this macro will push all remaining incoming registers on the
13585 stack and set PRETEND_SIZE to the length of the registers pushed. */
13586
13587 static void
13588 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13589 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13590 int no_rtl)
13591 {
13592 CUMULATIVE_ARGS next_cum;
13593 int reg_size = TARGET_32BIT ? 4 : 8;
13594 rtx save_area = NULL_RTX, mem;
13595 int first_reg_offset;
13596 alias_set_type set;
13597
13598 /* Skip the last named argument. */
13599 next_cum = *get_cumulative_args (cum);
13600 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13601
13602 if (DEFAULT_ABI == ABI_V4)
13603 {
13604 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13605
13606 if (! no_rtl)
13607 {
13608 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13609 HOST_WIDE_INT offset = 0;
13610
13611 /* Try to optimize the size of the varargs save area.
13612 The ABI requires that ap.reg_save_area is doubleword
13613 aligned, but we don't need to allocate space for all
13614 the bytes, only those to which we actually will save
13615 anything. */
13616 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13617 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13618 if (TARGET_HARD_FLOAT && TARGET_FPRS
13619 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13620 && cfun->va_list_fpr_size)
13621 {
13622 if (gpr_reg_num)
13623 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13624 * UNITS_PER_FP_WORD;
13625 if (cfun->va_list_fpr_size
13626 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13627 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13628 else
13629 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13630 * UNITS_PER_FP_WORD;
13631 }
13632 if (gpr_reg_num)
13633 {
13634 offset = -((first_reg_offset * reg_size) & ~7);
13635 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13636 {
13637 gpr_reg_num = cfun->va_list_gpr_size;
13638 if (reg_size == 4 && (first_reg_offset & 1))
13639 gpr_reg_num++;
13640 }
13641 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13642 }
13643 else if (fpr_size)
13644 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13645 * UNITS_PER_FP_WORD
13646 - (int) (GP_ARG_NUM_REG * reg_size);
13647
13648 if (gpr_size + fpr_size)
13649 {
13650 rtx reg_save_area
13651 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13652 gcc_assert (GET_CODE (reg_save_area) == MEM);
13653 reg_save_area = XEXP (reg_save_area, 0);
13654 if (GET_CODE (reg_save_area) == PLUS)
13655 {
13656 gcc_assert (XEXP (reg_save_area, 0)
13657 == virtual_stack_vars_rtx);
13658 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13659 offset += INTVAL (XEXP (reg_save_area, 1));
13660 }
13661 else
13662 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13663 }
13664
13665 cfun->machine->varargs_save_offset = offset;
13666 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13667 }
13668 }
13669 else
13670 {
13671 first_reg_offset = next_cum.words;
13672 save_area = crtl->args.internal_arg_pointer;
13673
13674 if (targetm.calls.must_pass_in_stack (mode, type))
13675 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13676 }
13677
13678 set = get_varargs_alias_set ();
13679 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13680 && cfun->va_list_gpr_size)
13681 {
13682 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13683
13684 if (va_list_gpr_counter_field)
13685 /* V4 va_list_gpr_size counts number of registers needed. */
13686 n_gpr = cfun->va_list_gpr_size;
13687 else
13688 /* char * va_list instead counts number of bytes needed. */
13689 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13690
13691 if (nregs > n_gpr)
13692 nregs = n_gpr;
13693
13694 mem = gen_rtx_MEM (BLKmode,
13695 plus_constant (Pmode, save_area,
13696 first_reg_offset * reg_size));
13697 MEM_NOTRAP_P (mem) = 1;
13698 set_mem_alias_set (mem, set);
13699 set_mem_align (mem, BITS_PER_WORD);
13700
13701 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13702 nregs);
13703 }
13704
13705 /* Save FP registers if needed. */
13706 if (DEFAULT_ABI == ABI_V4
13707 && TARGET_HARD_FLOAT && TARGET_FPRS
13708 && ! no_rtl
13709 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13710 && cfun->va_list_fpr_size)
13711 {
13712 int fregno = next_cum.fregno, nregs;
13713 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13714 rtx lab = gen_label_rtx ();
13715 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13716 * UNITS_PER_FP_WORD);
13717
13718 emit_jump_insn
13719 (gen_rtx_SET (pc_rtx,
13720 gen_rtx_IF_THEN_ELSE (VOIDmode,
13721 gen_rtx_NE (VOIDmode, cr1,
13722 const0_rtx),
13723 gen_rtx_LABEL_REF (VOIDmode, lab),
13724 pc_rtx)));
13725
13726 for (nregs = 0;
13727 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13728 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13729 {
13730 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13731 ? DFmode : SFmode,
13732 plus_constant (Pmode, save_area, off));
13733 MEM_NOTRAP_P (mem) = 1;
13734 set_mem_alias_set (mem, set);
13735 set_mem_align (mem, GET_MODE_ALIGNMENT (
13736 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13737 ? DFmode : SFmode));
13738 emit_move_insn (mem, gen_rtx_REG (
13739 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13740 ? DFmode : SFmode, fregno));
13741 }
13742
13743 emit_label (lab);
13744 }
13745 }
13746
13747 /* Create the va_list data type. */
13748
13749 static tree
13750 rs6000_build_builtin_va_list (void)
13751 {
13752 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13753
13754 /* For AIX, prefer 'char *' because that's what the system
13755 header files like. */
13756 if (DEFAULT_ABI != ABI_V4)
13757 return build_pointer_type (char_type_node);
13758
13759 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13760 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13761 get_identifier ("__va_list_tag"), record);
13762
13763 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13764 unsigned_char_type_node);
13765 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13766 unsigned_char_type_node);
13767 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13768 every user file. */
13769 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13770 get_identifier ("reserved"), short_unsigned_type_node);
13771 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13772 get_identifier ("overflow_arg_area"),
13773 ptr_type_node);
13774 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13775 get_identifier ("reg_save_area"),
13776 ptr_type_node);
13777
13778 va_list_gpr_counter_field = f_gpr;
13779 va_list_fpr_counter_field = f_fpr;
13780
13781 DECL_FIELD_CONTEXT (f_gpr) = record;
13782 DECL_FIELD_CONTEXT (f_fpr) = record;
13783 DECL_FIELD_CONTEXT (f_res) = record;
13784 DECL_FIELD_CONTEXT (f_ovf) = record;
13785 DECL_FIELD_CONTEXT (f_sav) = record;
13786
13787 TYPE_STUB_DECL (record) = type_decl;
13788 TYPE_NAME (record) = type_decl;
13789 TYPE_FIELDS (record) = f_gpr;
13790 DECL_CHAIN (f_gpr) = f_fpr;
13791 DECL_CHAIN (f_fpr) = f_res;
13792 DECL_CHAIN (f_res) = f_ovf;
13793 DECL_CHAIN (f_ovf) = f_sav;
13794
13795 layout_type (record);
13796
13797 /* The correct type is an array type of one element. */
13798 return build_array_type (record, build_index_type (size_zero_node));
13799 }
13800
13801 /* Implement va_start. */
13802
13803 static void
13804 rs6000_va_start (tree valist, rtx nextarg)
13805 {
13806 HOST_WIDE_INT words, n_gpr, n_fpr;
13807 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13808 tree gpr, fpr, ovf, sav, t;
13809
13810 /* Only SVR4 needs something special. */
13811 if (DEFAULT_ABI != ABI_V4)
13812 {
13813 std_expand_builtin_va_start (valist, nextarg);
13814 return;
13815 }
13816
13817 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13818 f_fpr = DECL_CHAIN (f_gpr);
13819 f_res = DECL_CHAIN (f_fpr);
13820 f_ovf = DECL_CHAIN (f_res);
13821 f_sav = DECL_CHAIN (f_ovf);
13822
13823 valist = build_simple_mem_ref (valist);
13824 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13825 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13826 f_fpr, NULL_TREE);
13827 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13828 f_ovf, NULL_TREE);
13829 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13830 f_sav, NULL_TREE);
13831
13832 /* Count number of gp and fp argument registers used. */
13833 words = crtl->args.info.words;
13834 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13835 GP_ARG_NUM_REG);
13836 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13837 FP_ARG_NUM_REG);
13838
13839 if (TARGET_DEBUG_ARG)
13840 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13841 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13842 words, n_gpr, n_fpr);
13843
13844 if (cfun->va_list_gpr_size)
13845 {
13846 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13847 build_int_cst (NULL_TREE, n_gpr));
13848 TREE_SIDE_EFFECTS (t) = 1;
13849 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13850 }
13851
13852 if (cfun->va_list_fpr_size)
13853 {
13854 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13855 build_int_cst (NULL_TREE, n_fpr));
13856 TREE_SIDE_EFFECTS (t) = 1;
13857 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13858
13859 #ifdef HAVE_AS_GNU_ATTRIBUTE
13860 if (call_ABI_of_interest (cfun->decl))
13861 rs6000_passes_float = true;
13862 #endif
13863 }
13864
13865 /* Find the overflow area. */
13866 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13867 if (words != 0)
13868 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13869 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13870 TREE_SIDE_EFFECTS (t) = 1;
13871 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13872
13873 /* If there were no va_arg invocations, don't set up the register
13874 save area. */
13875 if (!cfun->va_list_gpr_size
13876 && !cfun->va_list_fpr_size
13877 && n_gpr < GP_ARG_NUM_REG
13878 && n_fpr < FP_ARG_V4_MAX_REG)
13879 return;
13880
13881 /* Find the register save area. */
13882 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13883 if (cfun->machine->varargs_save_offset)
13884 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13885 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13886 TREE_SIDE_EFFECTS (t) = 1;
13887 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13888 }
13889
13890 /* Implement va_arg. */
13891
13892 static tree
13893 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13894 gimple_seq *post_p)
13895 {
13896 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13897 tree gpr, fpr, ovf, sav, reg, t, u;
13898 int size, rsize, n_reg, sav_ofs, sav_scale;
13899 tree lab_false, lab_over, addr;
13900 int align;
13901 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13902 int regalign = 0;
13903 gimple *stmt;
13904
13905 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13906 {
13907 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13908 return build_va_arg_indirect_ref (t);
13909 }
13910
13911 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13912 earlier version of gcc, with the property that it always applied alignment
13913 adjustments to the va-args (even for zero-sized types). The cheapest way
13914 to deal with this is to replicate the effect of the part of
13915 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13916 of relevance.
13917 We don't need to check for pass-by-reference because of the test above.
13918 We can return a simplifed answer, since we know there's no offset to add. */
13919
13920 if (((TARGET_MACHO
13921 && rs6000_darwin64_abi)
13922 || DEFAULT_ABI == ABI_ELFv2
13923 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13924 && integer_zerop (TYPE_SIZE (type)))
13925 {
13926 unsigned HOST_WIDE_INT align, boundary;
13927 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13928 align = PARM_BOUNDARY / BITS_PER_UNIT;
13929 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13930 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13931 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13932 boundary /= BITS_PER_UNIT;
13933 if (boundary > align)
13934 {
13935 tree t ;
13936 /* This updates arg ptr by the amount that would be necessary
13937 to align the zero-sized (but not zero-alignment) item. */
13938 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13939 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13940 gimplify_and_add (t, pre_p);
13941
13942 t = fold_convert (sizetype, valist_tmp);
13943 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13944 fold_convert (TREE_TYPE (valist),
13945 fold_build2 (BIT_AND_EXPR, sizetype, t,
13946 size_int (-boundary))));
13947 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13948 gimplify_and_add (t, pre_p);
13949 }
13950 /* Since it is zero-sized there's no increment for the item itself. */
13951 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13952 return build_va_arg_indirect_ref (valist_tmp);
13953 }
13954
13955 if (DEFAULT_ABI != ABI_V4)
13956 {
13957 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13958 {
13959 tree elem_type = TREE_TYPE (type);
13960 machine_mode elem_mode = TYPE_MODE (elem_type);
13961 int elem_size = GET_MODE_SIZE (elem_mode);
13962
13963 if (elem_size < UNITS_PER_WORD)
13964 {
13965 tree real_part, imag_part;
13966 gimple_seq post = NULL;
13967
13968 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13969 &post);
13970 /* Copy the value into a temporary, lest the formal temporary
13971 be reused out from under us. */
13972 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13973 gimple_seq_add_seq (pre_p, post);
13974
13975 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13976 post_p);
13977
13978 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13979 }
13980 }
13981
13982 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13983 }
13984
13985 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13986 f_fpr = DECL_CHAIN (f_gpr);
13987 f_res = DECL_CHAIN (f_fpr);
13988 f_ovf = DECL_CHAIN (f_res);
13989 f_sav = DECL_CHAIN (f_ovf);
13990
13991 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13992 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13993 f_fpr, NULL_TREE);
13994 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13995 f_ovf, NULL_TREE);
13996 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13997 f_sav, NULL_TREE);
13998
13999 size = int_size_in_bytes (type);
14000 rsize = (size + 3) / 4;
14001 int pad = 4 * rsize - size;
14002 align = 1;
14003
14004 machine_mode mode = TYPE_MODE (type);
14005 if (abi_v4_pass_in_fpr (mode))
14006 {
14007 /* FP args go in FP registers, if present. */
14008 reg = fpr;
14009 n_reg = (size + 7) / 8;
14010 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
14011 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
14012 if (mode != SFmode && mode != SDmode)
14013 align = 8;
14014 }
14015 else
14016 {
14017 /* Otherwise into GP registers. */
14018 reg = gpr;
14019 n_reg = rsize;
14020 sav_ofs = 0;
14021 sav_scale = 4;
14022 if (n_reg == 2)
14023 align = 8;
14024 }
14025
14026 /* Pull the value out of the saved registers.... */
14027
14028 lab_over = NULL;
14029 addr = create_tmp_var (ptr_type_node, "addr");
14030
14031 /* AltiVec vectors never go in registers when -mabi=altivec. */
14032 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
14033 align = 16;
14034 else
14035 {
14036 lab_false = create_artificial_label (input_location);
14037 lab_over = create_artificial_label (input_location);
14038
14039 /* Long long and SPE vectors are aligned in the registers.
14040 As are any other 2 gpr item such as complex int due to a
14041 historical mistake. */
14042 u = reg;
14043 if (n_reg == 2 && reg == gpr)
14044 {
14045 regalign = 1;
14046 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14047 build_int_cst (TREE_TYPE (reg), n_reg - 1));
14048 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
14049 unshare_expr (reg), u);
14050 }
14051 /* _Decimal128 is passed in even/odd fpr pairs; the stored
14052 reg number is 0 for f1, so we want to make it odd. */
14053 else if (reg == fpr && mode == TDmode)
14054 {
14055 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14056 build_int_cst (TREE_TYPE (reg), 1));
14057 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
14058 }
14059
14060 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
14061 t = build2 (GE_EXPR, boolean_type_node, u, t);
14062 u = build1 (GOTO_EXPR, void_type_node, lab_false);
14063 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
14064 gimplify_and_add (t, pre_p);
14065
14066 t = sav;
14067 if (sav_ofs)
14068 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
14069
14070 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
14071 build_int_cst (TREE_TYPE (reg), n_reg));
14072 u = fold_convert (sizetype, u);
14073 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
14074 t = fold_build_pointer_plus (t, u);
14075
14076 /* _Decimal32 varargs are located in the second word of the 64-bit
14077 FP register for 32-bit binaries. */
14078 if (TARGET_32BIT
14079 && TARGET_HARD_FLOAT && TARGET_FPRS
14080 && mode == SDmode)
14081 t = fold_build_pointer_plus_hwi (t, size);
14082
14083 /* Args are passed right-aligned. */
14084 if (BYTES_BIG_ENDIAN)
14085 t = fold_build_pointer_plus_hwi (t, pad);
14086
14087 gimplify_assign (addr, t, pre_p);
14088
14089 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
14090
14091 stmt = gimple_build_label (lab_false);
14092 gimple_seq_add_stmt (pre_p, stmt);
14093
14094 if ((n_reg == 2 && !regalign) || n_reg > 2)
14095 {
14096 /* Ensure that we don't find any more args in regs.
14097 Alignment has taken care of for special cases. */
14098 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
14099 }
14100 }
14101
14102 /* ... otherwise out of the overflow area. */
14103
14104 /* Care for on-stack alignment if needed. */
14105 t = ovf;
14106 if (align != 1)
14107 {
14108 t = fold_build_pointer_plus_hwi (t, align - 1);
14109 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
14110 build_int_cst (TREE_TYPE (t), -align));
14111 }
14112
14113 /* Args are passed right-aligned. */
14114 if (BYTES_BIG_ENDIAN)
14115 t = fold_build_pointer_plus_hwi (t, pad);
14116
14117 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
14118
14119 gimplify_assign (unshare_expr (addr), t, pre_p);
14120
14121 t = fold_build_pointer_plus_hwi (t, size);
14122 gimplify_assign (unshare_expr (ovf), t, pre_p);
14123
14124 if (lab_over)
14125 {
14126 stmt = gimple_build_label (lab_over);
14127 gimple_seq_add_stmt (pre_p, stmt);
14128 }
14129
14130 if (STRICT_ALIGNMENT
14131 && (TYPE_ALIGN (type)
14132 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
14133 {
14134 /* The value (of type complex double, for example) may not be
14135 aligned in memory in the saved registers, so copy via a
14136 temporary. (This is the same code as used for SPARC.) */
14137 tree tmp = create_tmp_var (type, "va_arg_tmp");
14138 tree dest_addr = build_fold_addr_expr (tmp);
14139
14140 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
14141 3, dest_addr, addr, size_int (rsize * 4));
14142
14143 gimplify_and_add (copy, pre_p);
14144 addr = dest_addr;
14145 }
14146
14147 addr = fold_convert (ptrtype, addr);
14148 return build_va_arg_indirect_ref (addr);
14149 }
14150
14151 /* Builtins. */
14152
14153 static void
14154 def_builtin (const char *name, tree type, enum rs6000_builtins code)
14155 {
14156 tree t;
14157 unsigned classify = rs6000_builtin_info[(int)code].attr;
14158 const char *attr_string = "";
14159
14160 gcc_assert (name != NULL);
14161 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
14162
14163 if (rs6000_builtin_decls[(int)code])
14164 fatal_error (input_location,
14165 "internal error: builtin function %s already processed", name);
14166
14167 rs6000_builtin_decls[(int)code] = t =
14168 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
14169
14170 /* Set any special attributes. */
14171 if ((classify & RS6000_BTC_CONST) != 0)
14172 {
14173 /* const function, function only depends on the inputs. */
14174 TREE_READONLY (t) = 1;
14175 TREE_NOTHROW (t) = 1;
14176 attr_string = ", const";
14177 }
14178 else if ((classify & RS6000_BTC_PURE) != 0)
14179 {
14180 /* pure function, function can read global memory, but does not set any
14181 external state. */
14182 DECL_PURE_P (t) = 1;
14183 TREE_NOTHROW (t) = 1;
14184 attr_string = ", pure";
14185 }
14186 else if ((classify & RS6000_BTC_FP) != 0)
14187 {
14188 /* Function is a math function. If rounding mode is on, then treat the
14189 function as not reading global memory, but it can have arbitrary side
14190 effects. If it is off, then assume the function is a const function.
14191 This mimics the ATTR_MATHFN_FPROUNDING attribute in
14192 builtin-attribute.def that is used for the math functions. */
14193 TREE_NOTHROW (t) = 1;
14194 if (flag_rounding_math)
14195 {
14196 DECL_PURE_P (t) = 1;
14197 DECL_IS_NOVOPS (t) = 1;
14198 attr_string = ", fp, pure";
14199 }
14200 else
14201 {
14202 TREE_READONLY (t) = 1;
14203 attr_string = ", fp, const";
14204 }
14205 }
14206 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
14207 gcc_unreachable ();
14208
14209 if (TARGET_DEBUG_BUILTIN)
14210 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
14211 (int)code, name, attr_string);
14212 }
14213
14214 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
14215
14216 #undef RS6000_BUILTIN_0
14217 #undef RS6000_BUILTIN_1
14218 #undef RS6000_BUILTIN_2
14219 #undef RS6000_BUILTIN_3
14220 #undef RS6000_BUILTIN_A
14221 #undef RS6000_BUILTIN_D
14222 #undef RS6000_BUILTIN_E
14223 #undef RS6000_BUILTIN_H
14224 #undef RS6000_BUILTIN_P
14225 #undef RS6000_BUILTIN_Q
14226 #undef RS6000_BUILTIN_S
14227 #undef RS6000_BUILTIN_X
14228
14229 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14230 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14231 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14232 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
14233 { MASK, ICODE, NAME, ENUM },
14234
14235 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14236 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14237 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14238 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14239 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14240 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14241 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14242 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14243
14244 static const struct builtin_description bdesc_3arg[] =
14245 {
14246 #include "powerpcspe-builtin.def"
14247 };
14248
14249 /* DST operations: void foo (void *, const int, const char). */
14250
14251 #undef RS6000_BUILTIN_0
14252 #undef RS6000_BUILTIN_1
14253 #undef RS6000_BUILTIN_2
14254 #undef RS6000_BUILTIN_3
14255 #undef RS6000_BUILTIN_A
14256 #undef RS6000_BUILTIN_D
14257 #undef RS6000_BUILTIN_E
14258 #undef RS6000_BUILTIN_H
14259 #undef RS6000_BUILTIN_P
14260 #undef RS6000_BUILTIN_Q
14261 #undef RS6000_BUILTIN_S
14262 #undef RS6000_BUILTIN_X
14263
14264 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14265 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14266 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14267 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14268 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14269 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
14270 { MASK, ICODE, NAME, ENUM },
14271
14272 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14273 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14274 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14275 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14276 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14277 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14278
14279 static const struct builtin_description bdesc_dst[] =
14280 {
14281 #include "powerpcspe-builtin.def"
14282 };
14283
14284 /* Simple binary operations: VECc = foo (VECa, VECb). */
14285
14286 #undef RS6000_BUILTIN_0
14287 #undef RS6000_BUILTIN_1
14288 #undef RS6000_BUILTIN_2
14289 #undef RS6000_BUILTIN_3
14290 #undef RS6000_BUILTIN_A
14291 #undef RS6000_BUILTIN_D
14292 #undef RS6000_BUILTIN_E
14293 #undef RS6000_BUILTIN_H
14294 #undef RS6000_BUILTIN_P
14295 #undef RS6000_BUILTIN_Q
14296 #undef RS6000_BUILTIN_S
14297 #undef RS6000_BUILTIN_X
14298
14299 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14300 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14301 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
14302 { MASK, ICODE, NAME, ENUM },
14303
14304 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14305 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14306 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14307 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14308 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14309 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14310 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14311 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14312 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14313
14314 static const struct builtin_description bdesc_2arg[] =
14315 {
14316 #include "powerpcspe-builtin.def"
14317 };
14318
14319 #undef RS6000_BUILTIN_0
14320 #undef RS6000_BUILTIN_1
14321 #undef RS6000_BUILTIN_2
14322 #undef RS6000_BUILTIN_3
14323 #undef RS6000_BUILTIN_A
14324 #undef RS6000_BUILTIN_D
14325 #undef RS6000_BUILTIN_E
14326 #undef RS6000_BUILTIN_H
14327 #undef RS6000_BUILTIN_P
14328 #undef RS6000_BUILTIN_Q
14329 #undef RS6000_BUILTIN_S
14330 #undef RS6000_BUILTIN_X
14331
14332 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14333 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14334 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14335 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14336 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14337 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14338 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14339 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14340 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
14341 { MASK, ICODE, NAME, ENUM },
14342
14343 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14344 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14345 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14346
14347 /* AltiVec predicates. */
14348
14349 static const struct builtin_description bdesc_altivec_preds[] =
14350 {
14351 #include "powerpcspe-builtin.def"
14352 };
14353
14354 /* SPE predicates. */
14355 #undef RS6000_BUILTIN_0
14356 #undef RS6000_BUILTIN_1
14357 #undef RS6000_BUILTIN_2
14358 #undef RS6000_BUILTIN_3
14359 #undef RS6000_BUILTIN_A
14360 #undef RS6000_BUILTIN_D
14361 #undef RS6000_BUILTIN_E
14362 #undef RS6000_BUILTIN_H
14363 #undef RS6000_BUILTIN_P
14364 #undef RS6000_BUILTIN_Q
14365 #undef RS6000_BUILTIN_S
14366 #undef RS6000_BUILTIN_X
14367
14368 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14369 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14370 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14371 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14372 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14373 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14374 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14375 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14376 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14377 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14378 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
14379 { MASK, ICODE, NAME, ENUM },
14380
14381 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14382
14383 static const struct builtin_description bdesc_spe_predicates[] =
14384 {
14385 #include "powerpcspe-builtin.def"
14386 };
14387
14388 /* SPE evsel predicates. */
14389 #undef RS6000_BUILTIN_0
14390 #undef RS6000_BUILTIN_1
14391 #undef RS6000_BUILTIN_2
14392 #undef RS6000_BUILTIN_3
14393 #undef RS6000_BUILTIN_A
14394 #undef RS6000_BUILTIN_D
14395 #undef RS6000_BUILTIN_E
14396 #undef RS6000_BUILTIN_H
14397 #undef RS6000_BUILTIN_P
14398 #undef RS6000_BUILTIN_Q
14399 #undef RS6000_BUILTIN_S
14400 #undef RS6000_BUILTIN_X
14401
14402 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14403 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14404 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14405 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14406 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14407 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14408 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
14409 { MASK, ICODE, NAME, ENUM },
14410
14411 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14412 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14413 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14414 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14415 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14416
14417 static const struct builtin_description bdesc_spe_evsel[] =
14418 {
14419 #include "powerpcspe-builtin.def"
14420 };
14421
14422 /* PAIRED predicates. */
14423 #undef RS6000_BUILTIN_0
14424 #undef RS6000_BUILTIN_1
14425 #undef RS6000_BUILTIN_2
14426 #undef RS6000_BUILTIN_3
14427 #undef RS6000_BUILTIN_A
14428 #undef RS6000_BUILTIN_D
14429 #undef RS6000_BUILTIN_E
14430 #undef RS6000_BUILTIN_H
14431 #undef RS6000_BUILTIN_P
14432 #undef RS6000_BUILTIN_Q
14433 #undef RS6000_BUILTIN_S
14434 #undef RS6000_BUILTIN_X
14435
14436 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14437 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14438 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14439 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14440 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14441 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14442 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14443 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14444 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14445 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14446 { MASK, ICODE, NAME, ENUM },
14447
14448 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14449 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14450
14451 static const struct builtin_description bdesc_paired_preds[] =
14452 {
14453 #include "powerpcspe-builtin.def"
14454 };
14455
14456 /* ABS* operations. */
14457
14458 #undef RS6000_BUILTIN_0
14459 #undef RS6000_BUILTIN_1
14460 #undef RS6000_BUILTIN_2
14461 #undef RS6000_BUILTIN_3
14462 #undef RS6000_BUILTIN_A
14463 #undef RS6000_BUILTIN_D
14464 #undef RS6000_BUILTIN_E
14465 #undef RS6000_BUILTIN_H
14466 #undef RS6000_BUILTIN_P
14467 #undef RS6000_BUILTIN_Q
14468 #undef RS6000_BUILTIN_S
14469 #undef RS6000_BUILTIN_X
14470
14471 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14472 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14473 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14474 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14475 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14476 { MASK, ICODE, NAME, ENUM },
14477
14478 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14479 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14480 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14481 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14482 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14483 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14484 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14485
14486 static const struct builtin_description bdesc_abs[] =
14487 {
14488 #include "powerpcspe-builtin.def"
14489 };
14490
14491 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14492 foo (VECa). */
14493
14494 #undef RS6000_BUILTIN_0
14495 #undef RS6000_BUILTIN_1
14496 #undef RS6000_BUILTIN_2
14497 #undef RS6000_BUILTIN_3
14498 #undef RS6000_BUILTIN_A
14499 #undef RS6000_BUILTIN_D
14500 #undef RS6000_BUILTIN_E
14501 #undef RS6000_BUILTIN_H
14502 #undef RS6000_BUILTIN_P
14503 #undef RS6000_BUILTIN_Q
14504 #undef RS6000_BUILTIN_S
14505 #undef RS6000_BUILTIN_X
14506
14507 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14508 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14509 { MASK, ICODE, NAME, ENUM },
14510
14511 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14512 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14513 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14514 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14515 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14516 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14517 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14518 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14519 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14520 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14521
14522 static const struct builtin_description bdesc_1arg[] =
14523 {
14524 #include "powerpcspe-builtin.def"
14525 };
14526
14527 /* Simple no-argument operations: result = __builtin_darn_32 () */
14528
14529 #undef RS6000_BUILTIN_0
14530 #undef RS6000_BUILTIN_1
14531 #undef RS6000_BUILTIN_2
14532 #undef RS6000_BUILTIN_3
14533 #undef RS6000_BUILTIN_A
14534 #undef RS6000_BUILTIN_D
14535 #undef RS6000_BUILTIN_E
14536 #undef RS6000_BUILTIN_H
14537 #undef RS6000_BUILTIN_P
14538 #undef RS6000_BUILTIN_Q
14539 #undef RS6000_BUILTIN_S
14540 #undef RS6000_BUILTIN_X
14541
14542 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14543 { MASK, ICODE, NAME, ENUM },
14544
14545 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14546 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14547 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14548 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14549 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14550 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14551 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14552 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14553 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14554 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14555 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14556
14557 static const struct builtin_description bdesc_0arg[] =
14558 {
14559 #include "powerpcspe-builtin.def"
14560 };
14561
14562 /* HTM builtins. */
14563 #undef RS6000_BUILTIN_0
14564 #undef RS6000_BUILTIN_1
14565 #undef RS6000_BUILTIN_2
14566 #undef RS6000_BUILTIN_3
14567 #undef RS6000_BUILTIN_A
14568 #undef RS6000_BUILTIN_D
14569 #undef RS6000_BUILTIN_E
14570 #undef RS6000_BUILTIN_H
14571 #undef RS6000_BUILTIN_P
14572 #undef RS6000_BUILTIN_Q
14573 #undef RS6000_BUILTIN_S
14574 #undef RS6000_BUILTIN_X
14575
14576 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14577 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14578 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14579 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14580 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14581 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14582 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
14583 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14584 { MASK, ICODE, NAME, ENUM },
14585
14586 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14587 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14588 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
14589 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14590
14591 static const struct builtin_description bdesc_htm[] =
14592 {
14593 #include "powerpcspe-builtin.def"
14594 };
14595
14596 #undef RS6000_BUILTIN_0
14597 #undef RS6000_BUILTIN_1
14598 #undef RS6000_BUILTIN_2
14599 #undef RS6000_BUILTIN_3
14600 #undef RS6000_BUILTIN_A
14601 #undef RS6000_BUILTIN_D
14602 #undef RS6000_BUILTIN_E
14603 #undef RS6000_BUILTIN_H
14604 #undef RS6000_BUILTIN_P
14605 #undef RS6000_BUILTIN_Q
14606 #undef RS6000_BUILTIN_S
14607
14608 /* Return true if a builtin function is overloaded. */
14609 bool
14610 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14611 {
14612 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14613 }
14614
14615 const char *
14616 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14617 {
14618 return rs6000_builtin_info[(int)fncode].name;
14619 }
14620
14621 /* Expand an expression EXP that calls a builtin without arguments. */
14622 static rtx
14623 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14624 {
14625 rtx pat;
14626 machine_mode tmode = insn_data[icode].operand[0].mode;
14627
14628 if (icode == CODE_FOR_nothing)
14629 /* Builtin not supported on this processor. */
14630 return 0;
14631
14632 if (target == 0
14633 || GET_MODE (target) != tmode
14634 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14635 target = gen_reg_rtx (tmode);
14636
14637 pat = GEN_FCN (icode) (target);
14638 if (! pat)
14639 return 0;
14640 emit_insn (pat);
14641
14642 return target;
14643 }
14644
14645
14646 static rtx
14647 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14648 {
14649 rtx pat;
14650 tree arg0 = CALL_EXPR_ARG (exp, 0);
14651 tree arg1 = CALL_EXPR_ARG (exp, 1);
14652 rtx op0 = expand_normal (arg0);
14653 rtx op1 = expand_normal (arg1);
14654 machine_mode mode0 = insn_data[icode].operand[0].mode;
14655 machine_mode mode1 = insn_data[icode].operand[1].mode;
14656
14657 if (icode == CODE_FOR_nothing)
14658 /* Builtin not supported on this processor. */
14659 return 0;
14660
14661 /* If we got invalid arguments bail out before generating bad rtl. */
14662 if (arg0 == error_mark_node || arg1 == error_mark_node)
14663 return const0_rtx;
14664
14665 if (GET_CODE (op0) != CONST_INT
14666 || INTVAL (op0) > 255
14667 || INTVAL (op0) < 0)
14668 {
14669 error ("argument 1 must be an 8-bit field value");
14670 return const0_rtx;
14671 }
14672
14673 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14674 op0 = copy_to_mode_reg (mode0, op0);
14675
14676 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14677 op1 = copy_to_mode_reg (mode1, op1);
14678
14679 pat = GEN_FCN (icode) (op0, op1);
14680 if (! pat)
14681 return const0_rtx;
14682 emit_insn (pat);
14683
14684 return NULL_RTX;
14685 }
14686
14687 static rtx
14688 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14689 {
14690 rtx pat;
14691 tree arg0 = CALL_EXPR_ARG (exp, 0);
14692 rtx op0 = expand_normal (arg0);
14693 machine_mode tmode = insn_data[icode].operand[0].mode;
14694 machine_mode mode0 = insn_data[icode].operand[1].mode;
14695
14696 if (icode == CODE_FOR_nothing)
14697 /* Builtin not supported on this processor. */
14698 return 0;
14699
14700 /* If we got invalid arguments bail out before generating bad rtl. */
14701 if (arg0 == error_mark_node)
14702 return const0_rtx;
14703
14704 if (icode == CODE_FOR_altivec_vspltisb
14705 || icode == CODE_FOR_altivec_vspltish
14706 || icode == CODE_FOR_altivec_vspltisw
14707 || icode == CODE_FOR_spe_evsplatfi
14708 || icode == CODE_FOR_spe_evsplati)
14709 {
14710 /* Only allow 5-bit *signed* literals. */
14711 if (GET_CODE (op0) != CONST_INT
14712 || INTVAL (op0) > 15
14713 || INTVAL (op0) < -16)
14714 {
14715 error ("argument 1 must be a 5-bit signed literal");
14716 return CONST0_RTX (tmode);
14717 }
14718 }
14719
14720 if (target == 0
14721 || GET_MODE (target) != tmode
14722 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14723 target = gen_reg_rtx (tmode);
14724
14725 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14726 op0 = copy_to_mode_reg (mode0, op0);
14727
14728 pat = GEN_FCN (icode) (target, op0);
14729 if (! pat)
14730 return 0;
14731 emit_insn (pat);
14732
14733 return target;
14734 }
14735
14736 static rtx
14737 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14738 {
14739 rtx pat, scratch1, scratch2;
14740 tree arg0 = CALL_EXPR_ARG (exp, 0);
14741 rtx op0 = expand_normal (arg0);
14742 machine_mode tmode = insn_data[icode].operand[0].mode;
14743 machine_mode mode0 = insn_data[icode].operand[1].mode;
14744
14745 /* If we have invalid arguments, bail out before generating bad rtl. */
14746 if (arg0 == error_mark_node)
14747 return const0_rtx;
14748
14749 if (target == 0
14750 || GET_MODE (target) != tmode
14751 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14752 target = gen_reg_rtx (tmode);
14753
14754 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14755 op0 = copy_to_mode_reg (mode0, op0);
14756
14757 scratch1 = gen_reg_rtx (mode0);
14758 scratch2 = gen_reg_rtx (mode0);
14759
14760 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14761 if (! pat)
14762 return 0;
14763 emit_insn (pat);
14764
14765 return target;
14766 }
14767
14768 static rtx
14769 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14770 {
14771 rtx pat;
14772 tree arg0 = CALL_EXPR_ARG (exp, 0);
14773 tree arg1 = CALL_EXPR_ARG (exp, 1);
14774 rtx op0 = expand_normal (arg0);
14775 rtx op1 = expand_normal (arg1);
14776 machine_mode tmode = insn_data[icode].operand[0].mode;
14777 machine_mode mode0 = insn_data[icode].operand[1].mode;
14778 machine_mode mode1 = insn_data[icode].operand[2].mode;
14779
14780 if (icode == CODE_FOR_nothing)
14781 /* Builtin not supported on this processor. */
14782 return 0;
14783
14784 /* If we got invalid arguments bail out before generating bad rtl. */
14785 if (arg0 == error_mark_node || arg1 == error_mark_node)
14786 return const0_rtx;
14787
14788 if (icode == CODE_FOR_altivec_vcfux
14789 || icode == CODE_FOR_altivec_vcfsx
14790 || icode == CODE_FOR_altivec_vctsxs
14791 || icode == CODE_FOR_altivec_vctuxs
14792 || icode == CODE_FOR_altivec_vspltb
14793 || icode == CODE_FOR_altivec_vsplth
14794 || icode == CODE_FOR_altivec_vspltw
14795 || icode == CODE_FOR_spe_evaddiw
14796 || icode == CODE_FOR_spe_evldd
14797 || icode == CODE_FOR_spe_evldh
14798 || icode == CODE_FOR_spe_evldw
14799 || icode == CODE_FOR_spe_evlhhesplat
14800 || icode == CODE_FOR_spe_evlhhossplat
14801 || icode == CODE_FOR_spe_evlhhousplat
14802 || icode == CODE_FOR_spe_evlwhe
14803 || icode == CODE_FOR_spe_evlwhos
14804 || icode == CODE_FOR_spe_evlwhou
14805 || icode == CODE_FOR_spe_evlwhsplat
14806 || icode == CODE_FOR_spe_evlwwsplat
14807 || icode == CODE_FOR_spe_evrlwi
14808 || icode == CODE_FOR_spe_evslwi
14809 || icode == CODE_FOR_spe_evsrwis
14810 || icode == CODE_FOR_spe_evsubifw
14811 || icode == CODE_FOR_spe_evsrwiu)
14812 {
14813 /* Only allow 5-bit unsigned literals. */
14814 STRIP_NOPS (arg1);
14815 if (TREE_CODE (arg1) != INTEGER_CST
14816 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14817 {
14818 error ("argument 2 must be a 5-bit unsigned literal");
14819 return CONST0_RTX (tmode);
14820 }
14821 }
14822 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14823 || icode == CODE_FOR_dfptstsfi_lt_dd
14824 || icode == CODE_FOR_dfptstsfi_gt_dd
14825 || icode == CODE_FOR_dfptstsfi_unordered_dd
14826 || icode == CODE_FOR_dfptstsfi_eq_td
14827 || icode == CODE_FOR_dfptstsfi_lt_td
14828 || icode == CODE_FOR_dfptstsfi_gt_td
14829 || icode == CODE_FOR_dfptstsfi_unordered_td)
14830 {
14831 /* Only allow 6-bit unsigned literals. */
14832 STRIP_NOPS (arg0);
14833 if (TREE_CODE (arg0) != INTEGER_CST
14834 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14835 {
14836 error ("argument 1 must be a 6-bit unsigned literal");
14837 return CONST0_RTX (tmode);
14838 }
14839 }
14840 else if (icode == CODE_FOR_xststdcdp
14841 || icode == CODE_FOR_xststdcsp
14842 || icode == CODE_FOR_xvtstdcdp
14843 || icode == CODE_FOR_xvtstdcsp)
14844 {
14845 /* Only allow 7-bit unsigned literals. */
14846 STRIP_NOPS (arg1);
14847 if (TREE_CODE (arg1) != INTEGER_CST
14848 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14849 {
14850 error ("argument 2 must be a 7-bit unsigned literal");
14851 return CONST0_RTX (tmode);
14852 }
14853 }
14854
14855 if (target == 0
14856 || GET_MODE (target) != tmode
14857 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14858 target = gen_reg_rtx (tmode);
14859
14860 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14861 op0 = copy_to_mode_reg (mode0, op0);
14862 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14863 op1 = copy_to_mode_reg (mode1, op1);
14864
14865 pat = GEN_FCN (icode) (target, op0, op1);
14866 if (! pat)
14867 return 0;
14868 emit_insn (pat);
14869
14870 return target;
14871 }
14872
14873 static rtx
14874 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14875 {
14876 rtx pat, scratch;
14877 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14878 tree arg0 = CALL_EXPR_ARG (exp, 1);
14879 tree arg1 = CALL_EXPR_ARG (exp, 2);
14880 rtx op0 = expand_normal (arg0);
14881 rtx op1 = expand_normal (arg1);
14882 machine_mode tmode = SImode;
14883 machine_mode mode0 = insn_data[icode].operand[1].mode;
14884 machine_mode mode1 = insn_data[icode].operand[2].mode;
14885 int cr6_form_int;
14886
14887 if (TREE_CODE (cr6_form) != INTEGER_CST)
14888 {
14889 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14890 return const0_rtx;
14891 }
14892 else
14893 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14894
14895 gcc_assert (mode0 == mode1);
14896
14897 /* If we have invalid arguments, bail out before generating bad rtl. */
14898 if (arg0 == error_mark_node || arg1 == error_mark_node)
14899 return const0_rtx;
14900
14901 if (target == 0
14902 || GET_MODE (target) != tmode
14903 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14904 target = gen_reg_rtx (tmode);
14905
14906 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14907 op0 = copy_to_mode_reg (mode0, op0);
14908 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14909 op1 = copy_to_mode_reg (mode1, op1);
14910
14911 /* Note that for many of the relevant operations (e.g. cmpne or
14912 cmpeq) with float or double operands, it makes more sense for the
14913 mode of the allocated scratch register to select a vector of
14914 integer. But the choice to copy the mode of operand 0 was made
14915 long ago and there are no plans to change it. */
14916 scratch = gen_reg_rtx (mode0);
14917
14918 pat = GEN_FCN (icode) (scratch, op0, op1);
14919 if (! pat)
14920 return 0;
14921 emit_insn (pat);
14922
14923 /* The vec_any* and vec_all* predicates use the same opcodes for two
14924 different operations, but the bits in CR6 will be different
14925 depending on what information we want. So we have to play tricks
14926 with CR6 to get the right bits out.
14927
14928 If you think this is disgusting, look at the specs for the
14929 AltiVec predicates. */
14930
14931 switch (cr6_form_int)
14932 {
14933 case 0:
14934 emit_insn (gen_cr6_test_for_zero (target));
14935 break;
14936 case 1:
14937 emit_insn (gen_cr6_test_for_zero_reverse (target));
14938 break;
14939 case 2:
14940 emit_insn (gen_cr6_test_for_lt (target));
14941 break;
14942 case 3:
14943 emit_insn (gen_cr6_test_for_lt_reverse (target));
14944 break;
14945 default:
14946 error ("argument 1 of __builtin_altivec_predicate is out of range");
14947 break;
14948 }
14949
14950 return target;
14951 }
14952
14953 static rtx
14954 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14955 {
14956 rtx pat, addr;
14957 tree arg0 = CALL_EXPR_ARG (exp, 0);
14958 tree arg1 = CALL_EXPR_ARG (exp, 1);
14959 machine_mode tmode = insn_data[icode].operand[0].mode;
14960 machine_mode mode0 = Pmode;
14961 machine_mode mode1 = Pmode;
14962 rtx op0 = expand_normal (arg0);
14963 rtx op1 = expand_normal (arg1);
14964
14965 if (icode == CODE_FOR_nothing)
14966 /* Builtin not supported on this processor. */
14967 return 0;
14968
14969 /* If we got invalid arguments bail out before generating bad rtl. */
14970 if (arg0 == error_mark_node || arg1 == error_mark_node)
14971 return const0_rtx;
14972
14973 if (target == 0
14974 || GET_MODE (target) != tmode
14975 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14976 target = gen_reg_rtx (tmode);
14977
14978 op1 = copy_to_mode_reg (mode1, op1);
14979
14980 if (op0 == const0_rtx)
14981 {
14982 addr = gen_rtx_MEM (tmode, op1);
14983 }
14984 else
14985 {
14986 op0 = copy_to_mode_reg (mode0, op0);
14987 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14988 }
14989
14990 pat = GEN_FCN (icode) (target, addr);
14991
14992 if (! pat)
14993 return 0;
14994 emit_insn (pat);
14995
14996 return target;
14997 }
14998
14999 /* Return a constant vector for use as a little-endian permute control vector
15000 to reverse the order of elements of the given vector mode. */
15001 static rtx
15002 swap_selector_for_mode (machine_mode mode)
15003 {
15004 /* These are little endian vectors, so their elements are reversed
15005 from what you would normally expect for a permute control vector. */
15006 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
15007 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
15008 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
15009 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
15010 unsigned int *swaparray, i;
15011 rtx perm[16];
15012
15013 switch (mode)
15014 {
15015 case E_V2DFmode:
15016 case E_V2DImode:
15017 swaparray = swap2;
15018 break;
15019 case E_V4SFmode:
15020 case E_V4SImode:
15021 swaparray = swap4;
15022 break;
15023 case E_V8HImode:
15024 swaparray = swap8;
15025 break;
15026 case E_V16QImode:
15027 swaparray = swap16;
15028 break;
15029 default:
15030 gcc_unreachable ();
15031 }
15032
15033 for (i = 0; i < 16; ++i)
15034 perm[i] = GEN_INT (swaparray[i]);
15035
15036 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
15037 }
15038
15039 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
15040 with -maltivec=be specified. Issue the load followed by an element-
15041 reversing permute. */
15042 void
15043 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15044 {
15045 rtx tmp = gen_reg_rtx (mode);
15046 rtx load = gen_rtx_SET (tmp, op1);
15047 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15048 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
15049 rtx sel = swap_selector_for_mode (mode);
15050 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
15051
15052 gcc_assert (REG_P (op0));
15053 emit_insn (par);
15054 emit_insn (gen_rtx_SET (op0, vperm));
15055 }
15056
15057 /* Generate code for a "stvxl" built-in for a little endian target with
15058 -maltivec=be specified. Issue the store preceded by an element-reversing
15059 permute. */
15060 void
15061 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15062 {
15063 rtx tmp = gen_reg_rtx (mode);
15064 rtx store = gen_rtx_SET (op0, tmp);
15065 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
15066 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
15067 rtx sel = swap_selector_for_mode (mode);
15068 rtx vperm;
15069
15070 gcc_assert (REG_P (op1));
15071 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15072 emit_insn (gen_rtx_SET (tmp, vperm));
15073 emit_insn (par);
15074 }
15075
15076 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
15077 specified. Issue the store preceded by an element-reversing permute. */
15078 void
15079 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
15080 {
15081 machine_mode inner_mode = GET_MODE_INNER (mode);
15082 rtx tmp = gen_reg_rtx (mode);
15083 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
15084 rtx sel = swap_selector_for_mode (mode);
15085 rtx vperm;
15086
15087 gcc_assert (REG_P (op1));
15088 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
15089 emit_insn (gen_rtx_SET (tmp, vperm));
15090 emit_insn (gen_rtx_SET (op0, stvx));
15091 }
15092
15093 static rtx
15094 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
15095 {
15096 rtx pat, addr;
15097 tree arg0 = CALL_EXPR_ARG (exp, 0);
15098 tree arg1 = CALL_EXPR_ARG (exp, 1);
15099 machine_mode tmode = insn_data[icode].operand[0].mode;
15100 machine_mode mode0 = Pmode;
15101 machine_mode mode1 = Pmode;
15102 rtx op0 = expand_normal (arg0);
15103 rtx op1 = expand_normal (arg1);
15104
15105 if (icode == CODE_FOR_nothing)
15106 /* Builtin not supported on this processor. */
15107 return 0;
15108
15109 /* If we got invalid arguments bail out before generating bad rtl. */
15110 if (arg0 == error_mark_node || arg1 == error_mark_node)
15111 return const0_rtx;
15112
15113 if (target == 0
15114 || GET_MODE (target) != tmode
15115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15116 target = gen_reg_rtx (tmode);
15117
15118 op1 = copy_to_mode_reg (mode1, op1);
15119
15120 /* For LVX, express the RTL accurately by ANDing the address with -16.
15121 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
15122 so the raw address is fine. */
15123 if (icode == CODE_FOR_altivec_lvx_v2df_2op
15124 || icode == CODE_FOR_altivec_lvx_v2di_2op
15125 || icode == CODE_FOR_altivec_lvx_v4sf_2op
15126 || icode == CODE_FOR_altivec_lvx_v4si_2op
15127 || icode == CODE_FOR_altivec_lvx_v8hi_2op
15128 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
15129 {
15130 rtx rawaddr;
15131 if (op0 == const0_rtx)
15132 rawaddr = op1;
15133 else
15134 {
15135 op0 = copy_to_mode_reg (mode0, op0);
15136 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
15137 }
15138 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15139 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
15140
15141 /* For -maltivec=be, emit the load and follow it up with a
15142 permute to swap the elements. */
15143 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15144 {
15145 rtx temp = gen_reg_rtx (tmode);
15146 emit_insn (gen_rtx_SET (temp, addr));
15147
15148 rtx sel = swap_selector_for_mode (tmode);
15149 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
15150 UNSPEC_VPERM);
15151 emit_insn (gen_rtx_SET (target, vperm));
15152 }
15153 else
15154 emit_insn (gen_rtx_SET (target, addr));
15155 }
15156 else
15157 {
15158 if (op0 == const0_rtx)
15159 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
15160 else
15161 {
15162 op0 = copy_to_mode_reg (mode0, op0);
15163 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
15164 gen_rtx_PLUS (Pmode, op1, op0));
15165 }
15166
15167 pat = GEN_FCN (icode) (target, addr);
15168 if (! pat)
15169 return 0;
15170 emit_insn (pat);
15171 }
15172
15173 return target;
15174 }
15175
15176 static rtx
15177 spe_expand_stv_builtin (enum insn_code icode, tree exp)
15178 {
15179 tree arg0 = CALL_EXPR_ARG (exp, 0);
15180 tree arg1 = CALL_EXPR_ARG (exp, 1);
15181 tree arg2 = CALL_EXPR_ARG (exp, 2);
15182 rtx op0 = expand_normal (arg0);
15183 rtx op1 = expand_normal (arg1);
15184 rtx op2 = expand_normal (arg2);
15185 rtx pat;
15186 machine_mode mode0 = insn_data[icode].operand[0].mode;
15187 machine_mode mode1 = insn_data[icode].operand[1].mode;
15188 machine_mode mode2 = insn_data[icode].operand[2].mode;
15189
15190 /* Invalid arguments. Bail before doing anything stoopid! */
15191 if (arg0 == error_mark_node
15192 || arg1 == error_mark_node
15193 || arg2 == error_mark_node)
15194 return const0_rtx;
15195
15196 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
15197 op0 = copy_to_mode_reg (mode2, op0);
15198 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
15199 op1 = copy_to_mode_reg (mode0, op1);
15200 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
15201 op2 = copy_to_mode_reg (mode1, op2);
15202
15203 pat = GEN_FCN (icode) (op1, op2, op0);
15204 if (pat)
15205 emit_insn (pat);
15206 return NULL_RTX;
15207 }
15208
15209 static rtx
15210 paired_expand_stv_builtin (enum insn_code icode, tree exp)
15211 {
15212 tree arg0 = CALL_EXPR_ARG (exp, 0);
15213 tree arg1 = CALL_EXPR_ARG (exp, 1);
15214 tree arg2 = CALL_EXPR_ARG (exp, 2);
15215 rtx op0 = expand_normal (arg0);
15216 rtx op1 = expand_normal (arg1);
15217 rtx op2 = expand_normal (arg2);
15218 rtx pat, addr;
15219 machine_mode tmode = insn_data[icode].operand[0].mode;
15220 machine_mode mode1 = Pmode;
15221 machine_mode mode2 = Pmode;
15222
15223 /* Invalid arguments. Bail before doing anything stoopid! */
15224 if (arg0 == error_mark_node
15225 || arg1 == error_mark_node
15226 || arg2 == error_mark_node)
15227 return const0_rtx;
15228
15229 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
15230 op0 = copy_to_mode_reg (tmode, op0);
15231
15232 op2 = copy_to_mode_reg (mode2, op2);
15233
15234 if (op1 == const0_rtx)
15235 {
15236 addr = gen_rtx_MEM (tmode, op2);
15237 }
15238 else
15239 {
15240 op1 = copy_to_mode_reg (mode1, op1);
15241 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
15242 }
15243
15244 pat = GEN_FCN (icode) (addr, op0);
15245 if (pat)
15246 emit_insn (pat);
15247 return NULL_RTX;
15248 }
15249
15250 static rtx
15251 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
15252 {
15253 rtx pat;
15254 tree arg0 = CALL_EXPR_ARG (exp, 0);
15255 tree arg1 = CALL_EXPR_ARG (exp, 1);
15256 tree arg2 = CALL_EXPR_ARG (exp, 2);
15257 rtx op0 = expand_normal (arg0);
15258 rtx op1 = expand_normal (arg1);
15259 rtx op2 = expand_normal (arg2);
15260 machine_mode mode0 = insn_data[icode].operand[0].mode;
15261 machine_mode mode1 = insn_data[icode].operand[1].mode;
15262 machine_mode mode2 = insn_data[icode].operand[2].mode;
15263
15264 if (icode == CODE_FOR_nothing)
15265 /* Builtin not supported on this processor. */
15266 return NULL_RTX;
15267
15268 /* If we got invalid arguments bail out before generating bad rtl. */
15269 if (arg0 == error_mark_node
15270 || arg1 == error_mark_node
15271 || arg2 == error_mark_node)
15272 return NULL_RTX;
15273
15274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15275 op0 = copy_to_mode_reg (mode0, op0);
15276 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15277 op1 = copy_to_mode_reg (mode1, op1);
15278 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15279 op2 = copy_to_mode_reg (mode2, op2);
15280
15281 pat = GEN_FCN (icode) (op0, op1, op2);
15282 if (pat)
15283 emit_insn (pat);
15284
15285 return NULL_RTX;
15286 }
15287
15288 static rtx
15289 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
15290 {
15291 tree arg0 = CALL_EXPR_ARG (exp, 0);
15292 tree arg1 = CALL_EXPR_ARG (exp, 1);
15293 tree arg2 = CALL_EXPR_ARG (exp, 2);
15294 rtx op0 = expand_normal (arg0);
15295 rtx op1 = expand_normal (arg1);
15296 rtx op2 = expand_normal (arg2);
15297 rtx pat, addr, rawaddr;
15298 machine_mode tmode = insn_data[icode].operand[0].mode;
15299 machine_mode smode = insn_data[icode].operand[1].mode;
15300 machine_mode mode1 = Pmode;
15301 machine_mode mode2 = Pmode;
15302
15303 /* Invalid arguments. Bail before doing anything stoopid! */
15304 if (arg0 == error_mark_node
15305 || arg1 == error_mark_node
15306 || arg2 == error_mark_node)
15307 return const0_rtx;
15308
15309 op2 = copy_to_mode_reg (mode2, op2);
15310
15311 /* For STVX, express the RTL accurately by ANDing the address with -16.
15312 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
15313 so the raw address is fine. */
15314 if (icode == CODE_FOR_altivec_stvx_v2df_2op
15315 || icode == CODE_FOR_altivec_stvx_v2di_2op
15316 || icode == CODE_FOR_altivec_stvx_v4sf_2op
15317 || icode == CODE_FOR_altivec_stvx_v4si_2op
15318 || icode == CODE_FOR_altivec_stvx_v8hi_2op
15319 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
15320 {
15321 if (op1 == const0_rtx)
15322 rawaddr = op2;
15323 else
15324 {
15325 op1 = copy_to_mode_reg (mode1, op1);
15326 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
15327 }
15328
15329 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
15330 addr = gen_rtx_MEM (tmode, addr);
15331
15332 op0 = copy_to_mode_reg (tmode, op0);
15333
15334 /* For -maltivec=be, emit a permute to swap the elements, followed
15335 by the store. */
15336 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
15337 {
15338 rtx temp = gen_reg_rtx (tmode);
15339 rtx sel = swap_selector_for_mode (tmode);
15340 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
15341 UNSPEC_VPERM);
15342 emit_insn (gen_rtx_SET (temp, vperm));
15343 emit_insn (gen_rtx_SET (addr, temp));
15344 }
15345 else
15346 emit_insn (gen_rtx_SET (addr, op0));
15347 }
15348 else
15349 {
15350 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
15351 op0 = copy_to_mode_reg (smode, op0);
15352
15353 if (op1 == const0_rtx)
15354 addr = gen_rtx_MEM (tmode, op2);
15355 else
15356 {
15357 op1 = copy_to_mode_reg (mode1, op1);
15358 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
15359 }
15360
15361 pat = GEN_FCN (icode) (addr, op0);
15362 if (pat)
15363 emit_insn (pat);
15364 }
15365
15366 return NULL_RTX;
15367 }
15368
15369 /* Return the appropriate SPR number associated with the given builtin. */
15370 static inline HOST_WIDE_INT
15371 htm_spr_num (enum rs6000_builtins code)
15372 {
15373 if (code == HTM_BUILTIN_GET_TFHAR
15374 || code == HTM_BUILTIN_SET_TFHAR)
15375 return TFHAR_SPR;
15376 else if (code == HTM_BUILTIN_GET_TFIAR
15377 || code == HTM_BUILTIN_SET_TFIAR)
15378 return TFIAR_SPR;
15379 else if (code == HTM_BUILTIN_GET_TEXASR
15380 || code == HTM_BUILTIN_SET_TEXASR)
15381 return TEXASR_SPR;
15382 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
15383 || code == HTM_BUILTIN_SET_TEXASRU);
15384 return TEXASRU_SPR;
15385 }
15386
15387 /* Return the appropriate SPR regno associated with the given builtin. */
15388 static inline HOST_WIDE_INT
15389 htm_spr_regno (enum rs6000_builtins code)
15390 {
15391 if (code == HTM_BUILTIN_GET_TFHAR
15392 || code == HTM_BUILTIN_SET_TFHAR)
15393 return TFHAR_REGNO;
15394 else if (code == HTM_BUILTIN_GET_TFIAR
15395 || code == HTM_BUILTIN_SET_TFIAR)
15396 return TFIAR_REGNO;
15397 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
15398 || code == HTM_BUILTIN_SET_TEXASR
15399 || code == HTM_BUILTIN_GET_TEXASRU
15400 || code == HTM_BUILTIN_SET_TEXASRU);
15401 return TEXASR_REGNO;
15402 }
15403
15404 /* Return the correct ICODE value depending on whether we are
15405 setting or reading the HTM SPRs. */
15406 static inline enum insn_code
15407 rs6000_htm_spr_icode (bool nonvoid)
15408 {
15409 if (nonvoid)
15410 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
15411 else
15412 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
15413 }
15414
15415 /* Expand the HTM builtin in EXP and store the result in TARGET.
15416 Store true in *EXPANDEDP if we found a builtin to expand. */
15417 static rtx
15418 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
15419 {
15420 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15421 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
15422 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15423 const struct builtin_description *d;
15424 size_t i;
15425
15426 *expandedp = true;
15427
15428 if (!TARGET_POWERPC64
15429 && (fcode == HTM_BUILTIN_TABORTDC
15430 || fcode == HTM_BUILTIN_TABORTDCI))
15431 {
15432 size_t uns_fcode = (size_t)fcode;
15433 const char *name = rs6000_builtin_info[uns_fcode].name;
15434 error ("builtin %s is only valid in 64-bit mode", name);
15435 return const0_rtx;
15436 }
15437
15438 /* Expand the HTM builtins. */
15439 d = bdesc_htm;
15440 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15441 if (d->code == fcode)
15442 {
15443 rtx op[MAX_HTM_OPERANDS], pat;
15444 int nopnds = 0;
15445 tree arg;
15446 call_expr_arg_iterator iter;
15447 unsigned attr = rs6000_builtin_info[fcode].attr;
15448 enum insn_code icode = d->icode;
15449 const struct insn_operand_data *insn_op;
15450 bool uses_spr = (attr & RS6000_BTC_SPR);
15451 rtx cr = NULL_RTX;
15452
15453 if (uses_spr)
15454 icode = rs6000_htm_spr_icode (nonvoid);
15455 insn_op = &insn_data[icode].operand[0];
15456
15457 if (nonvoid)
15458 {
15459 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
15460 if (!target
15461 || GET_MODE (target) != tmode
15462 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
15463 target = gen_reg_rtx (tmode);
15464 if (uses_spr)
15465 op[nopnds++] = target;
15466 }
15467
15468 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
15469 {
15470 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
15471 return const0_rtx;
15472
15473 insn_op = &insn_data[icode].operand[nopnds];
15474
15475 op[nopnds] = expand_normal (arg);
15476
15477 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
15478 {
15479 if (!strcmp (insn_op->constraint, "n"))
15480 {
15481 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
15482 if (!CONST_INT_P (op[nopnds]))
15483 error ("argument %d must be an unsigned literal", arg_num);
15484 else
15485 error ("argument %d is an unsigned literal that is "
15486 "out of range", arg_num);
15487 return const0_rtx;
15488 }
15489 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
15490 }
15491
15492 nopnds++;
15493 }
15494
15495 /* Handle the builtins for extended mnemonics. These accept
15496 no arguments, but map to builtins that take arguments. */
15497 switch (fcode)
15498 {
15499 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
15500 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
15501 op[nopnds++] = GEN_INT (1);
15502 if (flag_checking)
15503 attr |= RS6000_BTC_UNARY;
15504 break;
15505 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
15506 op[nopnds++] = GEN_INT (0);
15507 if (flag_checking)
15508 attr |= RS6000_BTC_UNARY;
15509 break;
15510 default:
15511 break;
15512 }
15513
15514 /* If this builtin accesses SPRs, then pass in the appropriate
15515 SPR number and SPR regno as the last two operands. */
15516 if (uses_spr)
15517 {
15518 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15519 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15520 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15521 }
15522 /* If this builtin accesses a CR, then pass in a scratch
15523 CR as the last operand. */
15524 else if (attr & RS6000_BTC_CR)
15525 { cr = gen_reg_rtx (CCmode);
15526 op[nopnds++] = cr;
15527 }
15528
15529 if (flag_checking)
15530 {
15531 int expected_nopnds = 0;
15532 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15533 expected_nopnds = 1;
15534 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15535 expected_nopnds = 2;
15536 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15537 expected_nopnds = 3;
15538 if (!(attr & RS6000_BTC_VOID))
15539 expected_nopnds += 1;
15540 if (uses_spr)
15541 expected_nopnds += 2;
15542
15543 gcc_assert (nopnds == expected_nopnds
15544 && nopnds <= MAX_HTM_OPERANDS);
15545 }
15546
15547 switch (nopnds)
15548 {
15549 case 1:
15550 pat = GEN_FCN (icode) (op[0]);
15551 break;
15552 case 2:
15553 pat = GEN_FCN (icode) (op[0], op[1]);
15554 break;
15555 case 3:
15556 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15557 break;
15558 case 4:
15559 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15560 break;
15561 default:
15562 gcc_unreachable ();
15563 }
15564 if (!pat)
15565 return NULL_RTX;
15566 emit_insn (pat);
15567
15568 if (attr & RS6000_BTC_CR)
15569 {
15570 if (fcode == HTM_BUILTIN_TBEGIN)
15571 {
15572 /* Emit code to set TARGET to true or false depending on
15573 whether the tbegin. instruction successfully or failed
15574 to start a transaction. We do this by placing the 1's
15575 complement of CR's EQ bit into TARGET. */
15576 rtx scratch = gen_reg_rtx (SImode);
15577 emit_insn (gen_rtx_SET (scratch,
15578 gen_rtx_EQ (SImode, cr,
15579 const0_rtx)));
15580 emit_insn (gen_rtx_SET (target,
15581 gen_rtx_XOR (SImode, scratch,
15582 GEN_INT (1))));
15583 }
15584 else
15585 {
15586 /* Emit code to copy the 4-bit condition register field
15587 CR into the least significant end of register TARGET. */
15588 rtx scratch1 = gen_reg_rtx (SImode);
15589 rtx scratch2 = gen_reg_rtx (SImode);
15590 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15591 emit_insn (gen_movcc (subreg, cr));
15592 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15593 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15594 }
15595 }
15596
15597 if (nonvoid)
15598 return target;
15599 return const0_rtx;
15600 }
15601
15602 *expandedp = false;
15603 return NULL_RTX;
15604 }
15605
15606 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15607
15608 static rtx
15609 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15610 rtx target)
15611 {
15612 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15613 if (fcode == RS6000_BUILTIN_CPU_INIT)
15614 return const0_rtx;
15615
15616 if (target == 0 || GET_MODE (target) != SImode)
15617 target = gen_reg_rtx (SImode);
15618
15619 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15620 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15621 if (TREE_CODE (arg) != STRING_CST)
15622 {
15623 error ("builtin %s only accepts a string argument",
15624 rs6000_builtin_info[(size_t) fcode].name);
15625 return const0_rtx;
15626 }
15627
15628 if (fcode == RS6000_BUILTIN_CPU_IS)
15629 {
15630 const char *cpu = TREE_STRING_POINTER (arg);
15631 rtx cpuid = NULL_RTX;
15632 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15633 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15634 {
15635 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15636 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15637 break;
15638 }
15639 if (cpuid == NULL_RTX)
15640 {
15641 /* Invalid CPU argument. */
15642 error ("cpu %s is an invalid argument to builtin %s",
15643 cpu, rs6000_builtin_info[(size_t) fcode].name);
15644 return const0_rtx;
15645 }
15646
15647 rtx platform = gen_reg_rtx (SImode);
15648 rtx tcbmem = gen_const_mem (SImode,
15649 gen_rtx_PLUS (Pmode,
15650 gen_rtx_REG (Pmode, TLS_REGNUM),
15651 GEN_INT (TCB_PLATFORM_OFFSET)));
15652 emit_move_insn (platform, tcbmem);
15653 emit_insn (gen_eqsi3 (target, platform, cpuid));
15654 }
15655 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15656 {
15657 const char *hwcap = TREE_STRING_POINTER (arg);
15658 rtx mask = NULL_RTX;
15659 int hwcap_offset;
15660 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15661 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15662 {
15663 mask = GEN_INT (cpu_supports_info[i].mask);
15664 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15665 break;
15666 }
15667 if (mask == NULL_RTX)
15668 {
15669 /* Invalid HWCAP argument. */
15670 error ("hwcap %s is an invalid argument to builtin %s",
15671 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15672 return const0_rtx;
15673 }
15674
15675 rtx tcb_hwcap = gen_reg_rtx (SImode);
15676 rtx tcbmem = gen_const_mem (SImode,
15677 gen_rtx_PLUS (Pmode,
15678 gen_rtx_REG (Pmode, TLS_REGNUM),
15679 GEN_INT (hwcap_offset)));
15680 emit_move_insn (tcb_hwcap, tcbmem);
15681 rtx scratch1 = gen_reg_rtx (SImode);
15682 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15683 rtx scratch2 = gen_reg_rtx (SImode);
15684 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15685 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15686 }
15687
15688 /* Record that we have expanded a CPU builtin, so that we can later
15689 emit a reference to the special symbol exported by LIBC to ensure we
15690 do not link against an old LIBC that doesn't support this feature. */
15691 cpu_builtin_p = true;
15692
15693 #else
15694 /* For old LIBCs, always return FALSE. */
15695 emit_move_insn (target, GEN_INT (0));
15696 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15697
15698 return target;
15699 }
15700
15701 static rtx
15702 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15703 {
15704 rtx pat;
15705 tree arg0 = CALL_EXPR_ARG (exp, 0);
15706 tree arg1 = CALL_EXPR_ARG (exp, 1);
15707 tree arg2 = CALL_EXPR_ARG (exp, 2);
15708 rtx op0 = expand_normal (arg0);
15709 rtx op1 = expand_normal (arg1);
15710 rtx op2 = expand_normal (arg2);
15711 machine_mode tmode = insn_data[icode].operand[0].mode;
15712 machine_mode mode0 = insn_data[icode].operand[1].mode;
15713 machine_mode mode1 = insn_data[icode].operand[2].mode;
15714 machine_mode mode2 = insn_data[icode].operand[3].mode;
15715
15716 if (icode == CODE_FOR_nothing)
15717 /* Builtin not supported on this processor. */
15718 return 0;
15719
15720 /* If we got invalid arguments bail out before generating bad rtl. */
15721 if (arg0 == error_mark_node
15722 || arg1 == error_mark_node
15723 || arg2 == error_mark_node)
15724 return const0_rtx;
15725
15726 /* Check and prepare argument depending on the instruction code.
15727
15728 Note that a switch statement instead of the sequence of tests
15729 would be incorrect as many of the CODE_FOR values could be
15730 CODE_FOR_nothing and that would yield multiple alternatives
15731 with identical values. We'd never reach here at runtime in
15732 this case. */
15733 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15734 || icode == CODE_FOR_altivec_vsldoi_v2df
15735 || icode == CODE_FOR_altivec_vsldoi_v4si
15736 || icode == CODE_FOR_altivec_vsldoi_v8hi
15737 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15738 {
15739 /* Only allow 4-bit unsigned literals. */
15740 STRIP_NOPS (arg2);
15741 if (TREE_CODE (arg2) != INTEGER_CST
15742 || TREE_INT_CST_LOW (arg2) & ~0xf)
15743 {
15744 error ("argument 3 must be a 4-bit unsigned literal");
15745 return CONST0_RTX (tmode);
15746 }
15747 }
15748 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15749 || icode == CODE_FOR_vsx_xxpermdi_v2di
15750 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15751 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15752 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15753 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15754 || icode == CODE_FOR_vsx_xxpermdi_v4si
15755 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15756 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15757 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15758 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15759 || icode == CODE_FOR_vsx_xxsldwi_v4si
15760 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15761 || icode == CODE_FOR_vsx_xxsldwi_v2di
15762 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15763 {
15764 /* Only allow 2-bit unsigned literals. */
15765 STRIP_NOPS (arg2);
15766 if (TREE_CODE (arg2) != INTEGER_CST
15767 || TREE_INT_CST_LOW (arg2) & ~0x3)
15768 {
15769 error ("argument 3 must be a 2-bit unsigned literal");
15770 return CONST0_RTX (tmode);
15771 }
15772 }
15773 else if (icode == CODE_FOR_vsx_set_v2df
15774 || icode == CODE_FOR_vsx_set_v2di
15775 || icode == CODE_FOR_bcdadd
15776 || icode == CODE_FOR_bcdadd_lt
15777 || icode == CODE_FOR_bcdadd_eq
15778 || icode == CODE_FOR_bcdadd_gt
15779 || icode == CODE_FOR_bcdsub
15780 || icode == CODE_FOR_bcdsub_lt
15781 || icode == CODE_FOR_bcdsub_eq
15782 || icode == CODE_FOR_bcdsub_gt)
15783 {
15784 /* Only allow 1-bit unsigned literals. */
15785 STRIP_NOPS (arg2);
15786 if (TREE_CODE (arg2) != INTEGER_CST
15787 || TREE_INT_CST_LOW (arg2) & ~0x1)
15788 {
15789 error ("argument 3 must be a 1-bit unsigned literal");
15790 return CONST0_RTX (tmode);
15791 }
15792 }
15793 else if (icode == CODE_FOR_dfp_ddedpd_dd
15794 || icode == CODE_FOR_dfp_ddedpd_td)
15795 {
15796 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15797 STRIP_NOPS (arg0);
15798 if (TREE_CODE (arg0) != INTEGER_CST
15799 || TREE_INT_CST_LOW (arg2) & ~0x3)
15800 {
15801 error ("argument 1 must be 0 or 2");
15802 return CONST0_RTX (tmode);
15803 }
15804 }
15805 else if (icode == CODE_FOR_dfp_denbcd_dd
15806 || icode == CODE_FOR_dfp_denbcd_td)
15807 {
15808 /* Only allow 1-bit unsigned literals. */
15809 STRIP_NOPS (arg0);
15810 if (TREE_CODE (arg0) != INTEGER_CST
15811 || TREE_INT_CST_LOW (arg0) & ~0x1)
15812 {
15813 error ("argument 1 must be a 1-bit unsigned literal");
15814 return CONST0_RTX (tmode);
15815 }
15816 }
15817 else if (icode == CODE_FOR_dfp_dscli_dd
15818 || icode == CODE_FOR_dfp_dscli_td
15819 || icode == CODE_FOR_dfp_dscri_dd
15820 || icode == CODE_FOR_dfp_dscri_td)
15821 {
15822 /* Only allow 6-bit unsigned literals. */
15823 STRIP_NOPS (arg1);
15824 if (TREE_CODE (arg1) != INTEGER_CST
15825 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15826 {
15827 error ("argument 2 must be a 6-bit unsigned literal");
15828 return CONST0_RTX (tmode);
15829 }
15830 }
15831 else if (icode == CODE_FOR_crypto_vshasigmaw
15832 || icode == CODE_FOR_crypto_vshasigmad)
15833 {
15834 /* Check whether the 2nd and 3rd arguments are integer constants and in
15835 range and prepare arguments. */
15836 STRIP_NOPS (arg1);
15837 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15838 {
15839 error ("argument 2 must be 0 or 1");
15840 return CONST0_RTX (tmode);
15841 }
15842
15843 STRIP_NOPS (arg2);
15844 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15845 {
15846 error ("argument 3 must be in the range 0..15");
15847 return CONST0_RTX (tmode);
15848 }
15849 }
15850
15851 if (target == 0
15852 || GET_MODE (target) != tmode
15853 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15854 target = gen_reg_rtx (tmode);
15855
15856 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15857 op0 = copy_to_mode_reg (mode0, op0);
15858 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15859 op1 = copy_to_mode_reg (mode1, op1);
15860 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15861 op2 = copy_to_mode_reg (mode2, op2);
15862
15863 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15864 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15865 else
15866 pat = GEN_FCN (icode) (target, op0, op1, op2);
15867 if (! pat)
15868 return 0;
15869 emit_insn (pat);
15870
15871 return target;
15872 }
15873
15874 /* Expand the lvx builtins. */
15875 static rtx
15876 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15877 {
15878 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15879 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15880 tree arg0;
15881 machine_mode tmode, mode0;
15882 rtx pat, op0;
15883 enum insn_code icode;
15884
15885 switch (fcode)
15886 {
15887 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15888 icode = CODE_FOR_vector_altivec_load_v16qi;
15889 break;
15890 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15891 icode = CODE_FOR_vector_altivec_load_v8hi;
15892 break;
15893 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15894 icode = CODE_FOR_vector_altivec_load_v4si;
15895 break;
15896 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15897 icode = CODE_FOR_vector_altivec_load_v4sf;
15898 break;
15899 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15900 icode = CODE_FOR_vector_altivec_load_v2df;
15901 break;
15902 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15903 icode = CODE_FOR_vector_altivec_load_v2di;
15904 break;
15905 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15906 icode = CODE_FOR_vector_altivec_load_v1ti;
15907 break;
15908 default:
15909 *expandedp = false;
15910 return NULL_RTX;
15911 }
15912
15913 *expandedp = true;
15914
15915 arg0 = CALL_EXPR_ARG (exp, 0);
15916 op0 = expand_normal (arg0);
15917 tmode = insn_data[icode].operand[0].mode;
15918 mode0 = insn_data[icode].operand[1].mode;
15919
15920 if (target == 0
15921 || GET_MODE (target) != tmode
15922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15923 target = gen_reg_rtx (tmode);
15924
15925 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15926 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15927
15928 pat = GEN_FCN (icode) (target, op0);
15929 if (! pat)
15930 return 0;
15931 emit_insn (pat);
15932 return target;
15933 }
15934
15935 /* Expand the stvx builtins. */
15936 static rtx
15937 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15938 bool *expandedp)
15939 {
15940 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15941 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15942 tree arg0, arg1;
15943 machine_mode mode0, mode1;
15944 rtx pat, op0, op1;
15945 enum insn_code icode;
15946
15947 switch (fcode)
15948 {
15949 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15950 icode = CODE_FOR_vector_altivec_store_v16qi;
15951 break;
15952 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15953 icode = CODE_FOR_vector_altivec_store_v8hi;
15954 break;
15955 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15956 icode = CODE_FOR_vector_altivec_store_v4si;
15957 break;
15958 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15959 icode = CODE_FOR_vector_altivec_store_v4sf;
15960 break;
15961 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15962 icode = CODE_FOR_vector_altivec_store_v2df;
15963 break;
15964 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15965 icode = CODE_FOR_vector_altivec_store_v2di;
15966 break;
15967 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15968 icode = CODE_FOR_vector_altivec_store_v1ti;
15969 break;
15970 default:
15971 *expandedp = false;
15972 return NULL_RTX;
15973 }
15974
15975 arg0 = CALL_EXPR_ARG (exp, 0);
15976 arg1 = CALL_EXPR_ARG (exp, 1);
15977 op0 = expand_normal (arg0);
15978 op1 = expand_normal (arg1);
15979 mode0 = insn_data[icode].operand[0].mode;
15980 mode1 = insn_data[icode].operand[1].mode;
15981
15982 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15983 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15984 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15985 op1 = copy_to_mode_reg (mode1, op1);
15986
15987 pat = GEN_FCN (icode) (op0, op1);
15988 if (pat)
15989 emit_insn (pat);
15990
15991 *expandedp = true;
15992 return NULL_RTX;
15993 }
15994
15995 /* Expand the dst builtins. */
15996 static rtx
15997 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15998 bool *expandedp)
15999 {
16000 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16001 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16002 tree arg0, arg1, arg2;
16003 machine_mode mode0, mode1;
16004 rtx pat, op0, op1, op2;
16005 const struct builtin_description *d;
16006 size_t i;
16007
16008 *expandedp = false;
16009
16010 /* Handle DST variants. */
16011 d = bdesc_dst;
16012 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16013 if (d->code == fcode)
16014 {
16015 arg0 = CALL_EXPR_ARG (exp, 0);
16016 arg1 = CALL_EXPR_ARG (exp, 1);
16017 arg2 = CALL_EXPR_ARG (exp, 2);
16018 op0 = expand_normal (arg0);
16019 op1 = expand_normal (arg1);
16020 op2 = expand_normal (arg2);
16021 mode0 = insn_data[d->icode].operand[0].mode;
16022 mode1 = insn_data[d->icode].operand[1].mode;
16023
16024 /* Invalid arguments, bail out before generating bad rtl. */
16025 if (arg0 == error_mark_node
16026 || arg1 == error_mark_node
16027 || arg2 == error_mark_node)
16028 return const0_rtx;
16029
16030 *expandedp = true;
16031 STRIP_NOPS (arg2);
16032 if (TREE_CODE (arg2) != INTEGER_CST
16033 || TREE_INT_CST_LOW (arg2) & ~0x3)
16034 {
16035 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
16036 return const0_rtx;
16037 }
16038
16039 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16040 op0 = copy_to_mode_reg (Pmode, op0);
16041 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16042 op1 = copy_to_mode_reg (mode1, op1);
16043
16044 pat = GEN_FCN (d->icode) (op0, op1, op2);
16045 if (pat != 0)
16046 emit_insn (pat);
16047
16048 return NULL_RTX;
16049 }
16050
16051 return NULL_RTX;
16052 }
16053
16054 /* Expand vec_init builtin. */
16055 static rtx
16056 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
16057 {
16058 machine_mode tmode = TYPE_MODE (type);
16059 machine_mode inner_mode = GET_MODE_INNER (tmode);
16060 int i, n_elt = GET_MODE_NUNITS (tmode);
16061
16062 gcc_assert (VECTOR_MODE_P (tmode));
16063 gcc_assert (n_elt == call_expr_nargs (exp));
16064
16065 if (!target || !register_operand (target, tmode))
16066 target = gen_reg_rtx (tmode);
16067
16068 /* If we have a vector compromised of a single element, such as V1TImode, do
16069 the initialization directly. */
16070 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
16071 {
16072 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
16073 emit_move_insn (target, gen_lowpart (tmode, x));
16074 }
16075 else
16076 {
16077 rtvec v = rtvec_alloc (n_elt);
16078
16079 for (i = 0; i < n_elt; ++i)
16080 {
16081 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
16082 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16083 }
16084
16085 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
16086 }
16087
16088 return target;
16089 }
16090
16091 /* Return the integer constant in ARG. Constrain it to be in the range
16092 of the subparts of VEC_TYPE; issue an error if not. */
16093
16094 static int
16095 get_element_number (tree vec_type, tree arg)
16096 {
16097 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16098
16099 if (!tree_fits_uhwi_p (arg)
16100 || (elt = tree_to_uhwi (arg), elt > max))
16101 {
16102 error ("selector must be an integer constant in the range 0..%wi", max);
16103 return 0;
16104 }
16105
16106 return elt;
16107 }
16108
16109 /* Expand vec_set builtin. */
16110 static rtx
16111 altivec_expand_vec_set_builtin (tree exp)
16112 {
16113 machine_mode tmode, mode1;
16114 tree arg0, arg1, arg2;
16115 int elt;
16116 rtx op0, op1;
16117
16118 arg0 = CALL_EXPR_ARG (exp, 0);
16119 arg1 = CALL_EXPR_ARG (exp, 1);
16120 arg2 = CALL_EXPR_ARG (exp, 2);
16121
16122 tmode = TYPE_MODE (TREE_TYPE (arg0));
16123 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16124 gcc_assert (VECTOR_MODE_P (tmode));
16125
16126 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
16127 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
16128 elt = get_element_number (TREE_TYPE (arg0), arg2);
16129
16130 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16131 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16132
16133 op0 = force_reg (tmode, op0);
16134 op1 = force_reg (mode1, op1);
16135
16136 rs6000_expand_vector_set (op0, op1, elt);
16137
16138 return op0;
16139 }
16140
16141 /* Expand vec_ext builtin. */
16142 static rtx
16143 altivec_expand_vec_ext_builtin (tree exp, rtx target)
16144 {
16145 machine_mode tmode, mode0;
16146 tree arg0, arg1;
16147 rtx op0;
16148 rtx op1;
16149
16150 arg0 = CALL_EXPR_ARG (exp, 0);
16151 arg1 = CALL_EXPR_ARG (exp, 1);
16152
16153 op0 = expand_normal (arg0);
16154 op1 = expand_normal (arg1);
16155
16156 /* Call get_element_number to validate arg1 if it is a constant. */
16157 if (TREE_CODE (arg1) == INTEGER_CST)
16158 (void) get_element_number (TREE_TYPE (arg0), arg1);
16159
16160 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16161 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16162 gcc_assert (VECTOR_MODE_P (mode0));
16163
16164 op0 = force_reg (mode0, op0);
16165
16166 if (optimize || !target || !register_operand (target, tmode))
16167 target = gen_reg_rtx (tmode);
16168
16169 rs6000_expand_vector_extract (target, op0, op1);
16170
16171 return target;
16172 }
16173
16174 /* Expand the builtin in EXP and store the result in TARGET. Store
16175 true in *EXPANDEDP if we found a builtin to expand. */
16176 static rtx
16177 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
16178 {
16179 const struct builtin_description *d;
16180 size_t i;
16181 enum insn_code icode;
16182 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16183 tree arg0, arg1, arg2;
16184 rtx op0, pat;
16185 machine_mode tmode, mode0;
16186 enum rs6000_builtins fcode
16187 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16188
16189 if (rs6000_overloaded_builtin_p (fcode))
16190 {
16191 *expandedp = true;
16192 error ("unresolved overload for Altivec builtin %qF", fndecl);
16193
16194 /* Given it is invalid, just generate a normal call. */
16195 return expand_call (exp, target, false);
16196 }
16197
16198 target = altivec_expand_ld_builtin (exp, target, expandedp);
16199 if (*expandedp)
16200 return target;
16201
16202 target = altivec_expand_st_builtin (exp, target, expandedp);
16203 if (*expandedp)
16204 return target;
16205
16206 target = altivec_expand_dst_builtin (exp, target, expandedp);
16207 if (*expandedp)
16208 return target;
16209
16210 *expandedp = true;
16211
16212 switch (fcode)
16213 {
16214 case ALTIVEC_BUILTIN_STVX_V2DF:
16215 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
16216 case ALTIVEC_BUILTIN_STVX_V2DI:
16217 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
16218 case ALTIVEC_BUILTIN_STVX_V4SF:
16219 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
16220 case ALTIVEC_BUILTIN_STVX:
16221 case ALTIVEC_BUILTIN_STVX_V4SI:
16222 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
16223 case ALTIVEC_BUILTIN_STVX_V8HI:
16224 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
16225 case ALTIVEC_BUILTIN_STVX_V16QI:
16226 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
16227 case ALTIVEC_BUILTIN_STVEBX:
16228 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
16229 case ALTIVEC_BUILTIN_STVEHX:
16230 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
16231 case ALTIVEC_BUILTIN_STVEWX:
16232 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
16233 case ALTIVEC_BUILTIN_STVXL_V2DF:
16234 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
16235 case ALTIVEC_BUILTIN_STVXL_V2DI:
16236 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
16237 case ALTIVEC_BUILTIN_STVXL_V4SF:
16238 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
16239 case ALTIVEC_BUILTIN_STVXL:
16240 case ALTIVEC_BUILTIN_STVXL_V4SI:
16241 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
16242 case ALTIVEC_BUILTIN_STVXL_V8HI:
16243 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
16244 case ALTIVEC_BUILTIN_STVXL_V16QI:
16245 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
16246
16247 case ALTIVEC_BUILTIN_STVLX:
16248 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
16249 case ALTIVEC_BUILTIN_STVLXL:
16250 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
16251 case ALTIVEC_BUILTIN_STVRX:
16252 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
16253 case ALTIVEC_BUILTIN_STVRXL:
16254 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
16255
16256 case P9V_BUILTIN_STXVL:
16257 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
16258
16259 case VSX_BUILTIN_STXVD2X_V1TI:
16260 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
16261 case VSX_BUILTIN_STXVD2X_V2DF:
16262 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
16263 case VSX_BUILTIN_STXVD2X_V2DI:
16264 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
16265 case VSX_BUILTIN_STXVW4X_V4SF:
16266 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
16267 case VSX_BUILTIN_STXVW4X_V4SI:
16268 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
16269 case VSX_BUILTIN_STXVW4X_V8HI:
16270 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
16271 case VSX_BUILTIN_STXVW4X_V16QI:
16272 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
16273
16274 /* For the following on big endian, it's ok to use any appropriate
16275 unaligned-supporting store, so use a generic expander. For
16276 little-endian, the exact element-reversing instruction must
16277 be used. */
16278 case VSX_BUILTIN_ST_ELEMREV_V2DF:
16279 {
16280 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
16281 : CODE_FOR_vsx_st_elemrev_v2df);
16282 return altivec_expand_stv_builtin (code, exp);
16283 }
16284 case VSX_BUILTIN_ST_ELEMREV_V2DI:
16285 {
16286 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
16287 : CODE_FOR_vsx_st_elemrev_v2di);
16288 return altivec_expand_stv_builtin (code, exp);
16289 }
16290 case VSX_BUILTIN_ST_ELEMREV_V4SF:
16291 {
16292 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
16293 : CODE_FOR_vsx_st_elemrev_v4sf);
16294 return altivec_expand_stv_builtin (code, exp);
16295 }
16296 case VSX_BUILTIN_ST_ELEMREV_V4SI:
16297 {
16298 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
16299 : CODE_FOR_vsx_st_elemrev_v4si);
16300 return altivec_expand_stv_builtin (code, exp);
16301 }
16302 case VSX_BUILTIN_ST_ELEMREV_V8HI:
16303 {
16304 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
16305 : CODE_FOR_vsx_st_elemrev_v8hi);
16306 return altivec_expand_stv_builtin (code, exp);
16307 }
16308 case VSX_BUILTIN_ST_ELEMREV_V16QI:
16309 {
16310 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
16311 : CODE_FOR_vsx_st_elemrev_v16qi);
16312 return altivec_expand_stv_builtin (code, exp);
16313 }
16314
16315 case ALTIVEC_BUILTIN_MFVSCR:
16316 icode = CODE_FOR_altivec_mfvscr;
16317 tmode = insn_data[icode].operand[0].mode;
16318
16319 if (target == 0
16320 || GET_MODE (target) != tmode
16321 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16322 target = gen_reg_rtx (tmode);
16323
16324 pat = GEN_FCN (icode) (target);
16325 if (! pat)
16326 return 0;
16327 emit_insn (pat);
16328 return target;
16329
16330 case ALTIVEC_BUILTIN_MTVSCR:
16331 icode = CODE_FOR_altivec_mtvscr;
16332 arg0 = CALL_EXPR_ARG (exp, 0);
16333 op0 = expand_normal (arg0);
16334 mode0 = insn_data[icode].operand[0].mode;
16335
16336 /* If we got invalid arguments bail out before generating bad rtl. */
16337 if (arg0 == error_mark_node)
16338 return const0_rtx;
16339
16340 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16341 op0 = copy_to_mode_reg (mode0, op0);
16342
16343 pat = GEN_FCN (icode) (op0);
16344 if (pat)
16345 emit_insn (pat);
16346 return NULL_RTX;
16347
16348 case ALTIVEC_BUILTIN_DSSALL:
16349 emit_insn (gen_altivec_dssall ());
16350 return NULL_RTX;
16351
16352 case ALTIVEC_BUILTIN_DSS:
16353 icode = CODE_FOR_altivec_dss;
16354 arg0 = CALL_EXPR_ARG (exp, 0);
16355 STRIP_NOPS (arg0);
16356 op0 = expand_normal (arg0);
16357 mode0 = insn_data[icode].operand[0].mode;
16358
16359 /* If we got invalid arguments bail out before generating bad rtl. */
16360 if (arg0 == error_mark_node)
16361 return const0_rtx;
16362
16363 if (TREE_CODE (arg0) != INTEGER_CST
16364 || TREE_INT_CST_LOW (arg0) & ~0x3)
16365 {
16366 error ("argument to dss must be a 2-bit unsigned literal");
16367 return const0_rtx;
16368 }
16369
16370 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16371 op0 = copy_to_mode_reg (mode0, op0);
16372
16373 emit_insn (gen_altivec_dss (op0));
16374 return NULL_RTX;
16375
16376 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
16377 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
16378 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
16379 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
16380 case VSX_BUILTIN_VEC_INIT_V2DF:
16381 case VSX_BUILTIN_VEC_INIT_V2DI:
16382 case VSX_BUILTIN_VEC_INIT_V1TI:
16383 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
16384
16385 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
16386 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
16387 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
16388 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
16389 case VSX_BUILTIN_VEC_SET_V2DF:
16390 case VSX_BUILTIN_VEC_SET_V2DI:
16391 case VSX_BUILTIN_VEC_SET_V1TI:
16392 return altivec_expand_vec_set_builtin (exp);
16393
16394 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
16395 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
16396 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
16397 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
16398 case VSX_BUILTIN_VEC_EXT_V2DF:
16399 case VSX_BUILTIN_VEC_EXT_V2DI:
16400 case VSX_BUILTIN_VEC_EXT_V1TI:
16401 return altivec_expand_vec_ext_builtin (exp, target);
16402
16403 case P9V_BUILTIN_VEXTRACT4B:
16404 case P9V_BUILTIN_VEC_VEXTRACT4B:
16405 arg1 = CALL_EXPR_ARG (exp, 1);
16406 STRIP_NOPS (arg1);
16407
16408 /* Generate a normal call if it is invalid. */
16409 if (arg1 == error_mark_node)
16410 return expand_call (exp, target, false);
16411
16412 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
16413 {
16414 error ("second argument to vec_vextract4b must be 0..12");
16415 return expand_call (exp, target, false);
16416 }
16417 break;
16418
16419 case P9V_BUILTIN_VINSERT4B:
16420 case P9V_BUILTIN_VINSERT4B_DI:
16421 case P9V_BUILTIN_VEC_VINSERT4B:
16422 arg2 = CALL_EXPR_ARG (exp, 2);
16423 STRIP_NOPS (arg2);
16424
16425 /* Generate a normal call if it is invalid. */
16426 if (arg2 == error_mark_node)
16427 return expand_call (exp, target, false);
16428
16429 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
16430 {
16431 error ("third argument to vec_vinsert4b must be 0..12");
16432 return expand_call (exp, target, false);
16433 }
16434 break;
16435
16436 default:
16437 break;
16438 /* Fall through. */
16439 }
16440
16441 /* Expand abs* operations. */
16442 d = bdesc_abs;
16443 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16444 if (d->code == fcode)
16445 return altivec_expand_abs_builtin (d->icode, exp, target);
16446
16447 /* Expand the AltiVec predicates. */
16448 d = bdesc_altivec_preds;
16449 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16450 if (d->code == fcode)
16451 return altivec_expand_predicate_builtin (d->icode, exp, target);
16452
16453 /* LV* are funky. We initialized them differently. */
16454 switch (fcode)
16455 {
16456 case ALTIVEC_BUILTIN_LVSL:
16457 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
16458 exp, target, false);
16459 case ALTIVEC_BUILTIN_LVSR:
16460 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
16461 exp, target, false);
16462 case ALTIVEC_BUILTIN_LVEBX:
16463 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
16464 exp, target, false);
16465 case ALTIVEC_BUILTIN_LVEHX:
16466 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
16467 exp, target, false);
16468 case ALTIVEC_BUILTIN_LVEWX:
16469 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
16470 exp, target, false);
16471 case ALTIVEC_BUILTIN_LVXL_V2DF:
16472 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
16473 exp, target, false);
16474 case ALTIVEC_BUILTIN_LVXL_V2DI:
16475 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
16476 exp, target, false);
16477 case ALTIVEC_BUILTIN_LVXL_V4SF:
16478 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
16479 exp, target, false);
16480 case ALTIVEC_BUILTIN_LVXL:
16481 case ALTIVEC_BUILTIN_LVXL_V4SI:
16482 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
16483 exp, target, false);
16484 case ALTIVEC_BUILTIN_LVXL_V8HI:
16485 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
16486 exp, target, false);
16487 case ALTIVEC_BUILTIN_LVXL_V16QI:
16488 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
16489 exp, target, false);
16490 case ALTIVEC_BUILTIN_LVX_V2DF:
16491 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
16492 exp, target, false);
16493 case ALTIVEC_BUILTIN_LVX_V2DI:
16494 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
16495 exp, target, false);
16496 case ALTIVEC_BUILTIN_LVX_V4SF:
16497 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
16498 exp, target, false);
16499 case ALTIVEC_BUILTIN_LVX:
16500 case ALTIVEC_BUILTIN_LVX_V4SI:
16501 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
16502 exp, target, false);
16503 case ALTIVEC_BUILTIN_LVX_V8HI:
16504 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
16505 exp, target, false);
16506 case ALTIVEC_BUILTIN_LVX_V16QI:
16507 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16508 exp, target, false);
16509 case ALTIVEC_BUILTIN_LVLX:
16510 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16511 exp, target, true);
16512 case ALTIVEC_BUILTIN_LVLXL:
16513 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16514 exp, target, true);
16515 case ALTIVEC_BUILTIN_LVRX:
16516 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16517 exp, target, true);
16518 case ALTIVEC_BUILTIN_LVRXL:
16519 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16520 exp, target, true);
16521 case VSX_BUILTIN_LXVD2X_V1TI:
16522 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16523 exp, target, false);
16524 case VSX_BUILTIN_LXVD2X_V2DF:
16525 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16526 exp, target, false);
16527 case VSX_BUILTIN_LXVD2X_V2DI:
16528 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16529 exp, target, false);
16530 case VSX_BUILTIN_LXVW4X_V4SF:
16531 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16532 exp, target, false);
16533 case VSX_BUILTIN_LXVW4X_V4SI:
16534 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16535 exp, target, false);
16536 case VSX_BUILTIN_LXVW4X_V8HI:
16537 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16538 exp, target, false);
16539 case VSX_BUILTIN_LXVW4X_V16QI:
16540 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16541 exp, target, false);
16542 /* For the following on big endian, it's ok to use any appropriate
16543 unaligned-supporting load, so use a generic expander. For
16544 little-endian, the exact element-reversing instruction must
16545 be used. */
16546 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16547 {
16548 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16549 : CODE_FOR_vsx_ld_elemrev_v2df);
16550 return altivec_expand_lv_builtin (code, exp, target, false);
16551 }
16552 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16553 {
16554 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16555 : CODE_FOR_vsx_ld_elemrev_v2di);
16556 return altivec_expand_lv_builtin (code, exp, target, false);
16557 }
16558 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16559 {
16560 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16561 : CODE_FOR_vsx_ld_elemrev_v4sf);
16562 return altivec_expand_lv_builtin (code, exp, target, false);
16563 }
16564 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16565 {
16566 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16567 : CODE_FOR_vsx_ld_elemrev_v4si);
16568 return altivec_expand_lv_builtin (code, exp, target, false);
16569 }
16570 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16571 {
16572 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16573 : CODE_FOR_vsx_ld_elemrev_v8hi);
16574 return altivec_expand_lv_builtin (code, exp, target, false);
16575 }
16576 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16577 {
16578 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16579 : CODE_FOR_vsx_ld_elemrev_v16qi);
16580 return altivec_expand_lv_builtin (code, exp, target, false);
16581 }
16582 break;
16583 default:
16584 break;
16585 /* Fall through. */
16586 }
16587
16588 *expandedp = false;
16589 return NULL_RTX;
16590 }
16591
16592 /* Expand the builtin in EXP and store the result in TARGET. Store
16593 true in *EXPANDEDP if we found a builtin to expand. */
16594 static rtx
16595 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16596 {
16597 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16598 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16599 const struct builtin_description *d;
16600 size_t i;
16601
16602 *expandedp = true;
16603
16604 switch (fcode)
16605 {
16606 case PAIRED_BUILTIN_STX:
16607 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16608 case PAIRED_BUILTIN_LX:
16609 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16610 default:
16611 break;
16612 /* Fall through. */
16613 }
16614
16615 /* Expand the paired predicates. */
16616 d = bdesc_paired_preds;
16617 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16618 if (d->code == fcode)
16619 return paired_expand_predicate_builtin (d->icode, exp, target);
16620
16621 *expandedp = false;
16622 return NULL_RTX;
16623 }
16624
16625 /* Binops that need to be initialized manually, but can be expanded
16626 automagically by rs6000_expand_binop_builtin. */
16627 static const struct builtin_description bdesc_2arg_spe[] =
16628 {
16629 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
16630 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
16631 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
16632 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
16633 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
16634 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
16635 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
16636 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
16637 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
16638 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
16639 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
16640 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
16641 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
16642 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
16643 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
16644 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
16645 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
16646 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
16647 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
16648 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
16649 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
16650 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
16651 };
16652
16653 /* Expand the builtin in EXP and store the result in TARGET. Store
16654 true in *EXPANDEDP if we found a builtin to expand.
16655
16656 This expands the SPE builtins that are not simple unary and binary
16657 operations. */
16658 static rtx
16659 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
16660 {
16661 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16662 tree arg1, arg0;
16663 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16664 enum insn_code icode;
16665 machine_mode tmode, mode0;
16666 rtx pat, op0;
16667 const struct builtin_description *d;
16668 size_t i;
16669
16670 *expandedp = true;
16671
16672 /* Syntax check for a 5-bit unsigned immediate. */
16673 switch (fcode)
16674 {
16675 case SPE_BUILTIN_EVSTDD:
16676 case SPE_BUILTIN_EVSTDH:
16677 case SPE_BUILTIN_EVSTDW:
16678 case SPE_BUILTIN_EVSTWHE:
16679 case SPE_BUILTIN_EVSTWHO:
16680 case SPE_BUILTIN_EVSTWWE:
16681 case SPE_BUILTIN_EVSTWWO:
16682 arg1 = CALL_EXPR_ARG (exp, 2);
16683 if (TREE_CODE (arg1) != INTEGER_CST
16684 || TREE_INT_CST_LOW (arg1) & ~0x1f)
16685 {
16686 error ("argument 2 must be a 5-bit unsigned literal");
16687 return const0_rtx;
16688 }
16689 break;
16690 default:
16691 break;
16692 }
16693
16694 /* The evsplat*i instructions are not quite generic. */
16695 switch (fcode)
16696 {
16697 case SPE_BUILTIN_EVSPLATFI:
16698 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
16699 exp, target);
16700 case SPE_BUILTIN_EVSPLATI:
16701 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
16702 exp, target);
16703 default:
16704 break;
16705 }
16706
16707 d = bdesc_2arg_spe;
16708 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
16709 if (d->code == fcode)
16710 return rs6000_expand_binop_builtin (d->icode, exp, target);
16711
16712 d = bdesc_spe_predicates;
16713 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
16714 if (d->code == fcode)
16715 return spe_expand_predicate_builtin (d->icode, exp, target);
16716
16717 d = bdesc_spe_evsel;
16718 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
16719 if (d->code == fcode)
16720 return spe_expand_evsel_builtin (d->icode, exp, target);
16721
16722 switch (fcode)
16723 {
16724 case SPE_BUILTIN_EVSTDDX:
16725 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
16726 case SPE_BUILTIN_EVSTDHX:
16727 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
16728 case SPE_BUILTIN_EVSTDWX:
16729 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
16730 case SPE_BUILTIN_EVSTWHEX:
16731 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
16732 case SPE_BUILTIN_EVSTWHOX:
16733 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
16734 case SPE_BUILTIN_EVSTWWEX:
16735 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
16736 case SPE_BUILTIN_EVSTWWOX:
16737 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
16738 case SPE_BUILTIN_EVSTDD:
16739 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
16740 case SPE_BUILTIN_EVSTDH:
16741 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
16742 case SPE_BUILTIN_EVSTDW:
16743 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
16744 case SPE_BUILTIN_EVSTWHE:
16745 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
16746 case SPE_BUILTIN_EVSTWHO:
16747 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
16748 case SPE_BUILTIN_EVSTWWE:
16749 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
16750 case SPE_BUILTIN_EVSTWWO:
16751 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
16752 case SPE_BUILTIN_MFSPEFSCR:
16753 icode = CODE_FOR_spe_mfspefscr;
16754 tmode = insn_data[icode].operand[0].mode;
16755
16756 if (target == 0
16757 || GET_MODE (target) != tmode
16758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16759 target = gen_reg_rtx (tmode);
16760
16761 pat = GEN_FCN (icode) (target);
16762 if (! pat)
16763 return 0;
16764 emit_insn (pat);
16765 return target;
16766 case SPE_BUILTIN_MTSPEFSCR:
16767 icode = CODE_FOR_spe_mtspefscr;
16768 arg0 = CALL_EXPR_ARG (exp, 0);
16769 op0 = expand_normal (arg0);
16770 mode0 = insn_data[icode].operand[0].mode;
16771
16772 if (arg0 == error_mark_node)
16773 return const0_rtx;
16774
16775 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16776 op0 = copy_to_mode_reg (mode0, op0);
16777
16778 pat = GEN_FCN (icode) (op0);
16779 if (pat)
16780 emit_insn (pat);
16781 return NULL_RTX;
16782 default:
16783 break;
16784 }
16785
16786 *expandedp = false;
16787 return NULL_RTX;
16788 }
16789
16790 static rtx
16791 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16792 {
16793 rtx pat, scratch, tmp;
16794 tree form = CALL_EXPR_ARG (exp, 0);
16795 tree arg0 = CALL_EXPR_ARG (exp, 1);
16796 tree arg1 = CALL_EXPR_ARG (exp, 2);
16797 rtx op0 = expand_normal (arg0);
16798 rtx op1 = expand_normal (arg1);
16799 machine_mode mode0 = insn_data[icode].operand[1].mode;
16800 machine_mode mode1 = insn_data[icode].operand[2].mode;
16801 int form_int;
16802 enum rtx_code code;
16803
16804 if (TREE_CODE (form) != INTEGER_CST)
16805 {
16806 error ("argument 1 of __builtin_paired_predicate must be a constant");
16807 return const0_rtx;
16808 }
16809 else
16810 form_int = TREE_INT_CST_LOW (form);
16811
16812 gcc_assert (mode0 == mode1);
16813
16814 if (arg0 == error_mark_node || arg1 == error_mark_node)
16815 return const0_rtx;
16816
16817 if (target == 0
16818 || GET_MODE (target) != SImode
16819 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16820 target = gen_reg_rtx (SImode);
16821 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16822 op0 = copy_to_mode_reg (mode0, op0);
16823 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16824 op1 = copy_to_mode_reg (mode1, op1);
16825
16826 scratch = gen_reg_rtx (CCFPmode);
16827
16828 pat = GEN_FCN (icode) (scratch, op0, op1);
16829 if (!pat)
16830 return const0_rtx;
16831
16832 emit_insn (pat);
16833
16834 switch (form_int)
16835 {
16836 /* LT bit. */
16837 case 0:
16838 code = LT;
16839 break;
16840 /* GT bit. */
16841 case 1:
16842 code = GT;
16843 break;
16844 /* EQ bit. */
16845 case 2:
16846 code = EQ;
16847 break;
16848 /* UN bit. */
16849 case 3:
16850 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16851 return target;
16852 default:
16853 error ("argument 1 of __builtin_paired_predicate is out of range");
16854 return const0_rtx;
16855 }
16856
16857 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16858 emit_move_insn (target, tmp);
16859 return target;
16860 }
16861
16862 static rtx
16863 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16864 {
16865 rtx pat, scratch, tmp;
16866 tree form = CALL_EXPR_ARG (exp, 0);
16867 tree arg0 = CALL_EXPR_ARG (exp, 1);
16868 tree arg1 = CALL_EXPR_ARG (exp, 2);
16869 rtx op0 = expand_normal (arg0);
16870 rtx op1 = expand_normal (arg1);
16871 machine_mode mode0 = insn_data[icode].operand[1].mode;
16872 machine_mode mode1 = insn_data[icode].operand[2].mode;
16873 int form_int;
16874 enum rtx_code code;
16875
16876 if (TREE_CODE (form) != INTEGER_CST)
16877 {
16878 error ("argument 1 of __builtin_spe_predicate must be a constant");
16879 return const0_rtx;
16880 }
16881 else
16882 form_int = TREE_INT_CST_LOW (form);
16883
16884 gcc_assert (mode0 == mode1);
16885
16886 if (arg0 == error_mark_node || arg1 == error_mark_node)
16887 return const0_rtx;
16888
16889 if (target == 0
16890 || GET_MODE (target) != SImode
16891 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
16892 target = gen_reg_rtx (SImode);
16893
16894 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16895 op0 = copy_to_mode_reg (mode0, op0);
16896 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16897 op1 = copy_to_mode_reg (mode1, op1);
16898
16899 scratch = gen_reg_rtx (CCmode);
16900
16901 pat = GEN_FCN (icode) (scratch, op0, op1);
16902 if (! pat)
16903 return const0_rtx;
16904 emit_insn (pat);
16905
16906 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
16907 _lower_. We use one compare, but look in different bits of the
16908 CR for each variant.
16909
16910 There are 2 elements in each SPE simd type (upper/lower). The CR
16911 bits are set as follows:
16912
16913 BIT0 | BIT 1 | BIT 2 | BIT 3
16914 U | L | (U | L) | (U & L)
16915
16916 So, for an "all" relationship, BIT 3 would be set.
16917 For an "any" relationship, BIT 2 would be set. Etc.
16918
16919 Following traditional nomenclature, these bits map to:
16920
16921 BIT0 | BIT 1 | BIT 2 | BIT 3
16922 LT | GT | EQ | OV
16923
16924 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
16925 */
16926
16927 switch (form_int)
16928 {
16929 /* All variant. OV bit. */
16930 case 0:
16931 /* We need to get to the OV bit, which is the ORDERED bit. We
16932 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
16933 that's ugly and will make validate_condition_mode die.
16934 So let's just use another pattern. */
16935 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16936 return target;
16937 /* Any variant. EQ bit. */
16938 case 1:
16939 code = EQ;
16940 break;
16941 /* Upper variant. LT bit. */
16942 case 2:
16943 code = LT;
16944 break;
16945 /* Lower variant. GT bit. */
16946 case 3:
16947 code = GT;
16948 break;
16949 default:
16950 error ("argument 1 of __builtin_spe_predicate is out of range");
16951 return const0_rtx;
16952 }
16953
16954 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16955 emit_move_insn (target, tmp);
16956
16957 return target;
16958 }
16959
16960 /* The evsel builtins look like this:
16961
16962 e = __builtin_spe_evsel_OP (a, b, c, d);
16963
16964 and work like this:
16965
16966 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
16967 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
16968 */
16969
16970 static rtx
16971 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
16972 {
16973 rtx pat, scratch;
16974 tree arg0 = CALL_EXPR_ARG (exp, 0);
16975 tree arg1 = CALL_EXPR_ARG (exp, 1);
16976 tree arg2 = CALL_EXPR_ARG (exp, 2);
16977 tree arg3 = CALL_EXPR_ARG (exp, 3);
16978 rtx op0 = expand_normal (arg0);
16979 rtx op1 = expand_normal (arg1);
16980 rtx op2 = expand_normal (arg2);
16981 rtx op3 = expand_normal (arg3);
16982 machine_mode mode0 = insn_data[icode].operand[1].mode;
16983 machine_mode mode1 = insn_data[icode].operand[2].mode;
16984
16985 gcc_assert (mode0 == mode1);
16986
16987 if (arg0 == error_mark_node || arg1 == error_mark_node
16988 || arg2 == error_mark_node || arg3 == error_mark_node)
16989 return const0_rtx;
16990
16991 if (target == 0
16992 || GET_MODE (target) != mode0
16993 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
16994 target = gen_reg_rtx (mode0);
16995
16996 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16997 op0 = copy_to_mode_reg (mode0, op0);
16998 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16999 op1 = copy_to_mode_reg (mode0, op1);
17000 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
17001 op2 = copy_to_mode_reg (mode0, op2);
17002 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
17003 op3 = copy_to_mode_reg (mode0, op3);
17004
17005 /* Generate the compare. */
17006 scratch = gen_reg_rtx (CCmode);
17007 pat = GEN_FCN (icode) (scratch, op0, op1);
17008 if (! pat)
17009 return const0_rtx;
17010 emit_insn (pat);
17011
17012 if (mode0 == V2SImode)
17013 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
17014 else
17015 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
17016
17017 return target;
17018 }
17019
17020 /* Raise an error message for a builtin function that is called without the
17021 appropriate target options being set. */
17022
17023 static void
17024 rs6000_invalid_builtin (enum rs6000_builtins fncode)
17025 {
17026 size_t uns_fncode = (size_t)fncode;
17027 const char *name = rs6000_builtin_info[uns_fncode].name;
17028 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
17029
17030 gcc_assert (name != NULL);
17031 if ((fnmask & RS6000_BTM_CELL) != 0)
17032 error ("Builtin function %s is only valid for the cell processor", name);
17033 else if ((fnmask & RS6000_BTM_VSX) != 0)
17034 error ("Builtin function %s requires the -mvsx option", name);
17035 else if ((fnmask & RS6000_BTM_HTM) != 0)
17036 error ("Builtin function %s requires the -mhtm option", name);
17037 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
17038 error ("Builtin function %s requires the -maltivec option", name);
17039 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
17040 error ("Builtin function %s requires the -mpaired option", name);
17041 else if ((fnmask & RS6000_BTM_SPE) != 0)
17042 error ("Builtin function %s requires the -mspe option", name);
17043 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17044 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
17045 error ("Builtin function %s requires the -mhard-dfp and"
17046 " -mpower8-vector options", name);
17047 else if ((fnmask & RS6000_BTM_DFP) != 0)
17048 error ("Builtin function %s requires the -mhard-dfp option", name);
17049 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
17050 error ("Builtin function %s requires the -mpower8-vector option", name);
17051 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17052 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
17053 error ("Builtin function %s requires the -mcpu=power9 and"
17054 " -m64 options", name);
17055 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
17056 error ("Builtin function %s requires the -mcpu=power9 option", name);
17057 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17058 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
17059 error ("Builtin function %s requires the -mcpu=power9 and"
17060 " -m64 options", name);
17061 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
17062 error ("Builtin function %s requires the -mcpu=power9 option", name);
17063 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17064 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
17065 error ("Builtin function %s requires the -mhard-float and"
17066 " -mlong-double-128 options", name);
17067 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
17068 error ("Builtin function %s requires the -mhard-float option", name);
17069 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
17070 error ("Builtin function %s requires the -mfloat128 option", name);
17071 else
17072 error ("Builtin function %s is not supported with the current options",
17073 name);
17074 }
17075
17076 /* Target hook for early folding of built-ins, shamelessly stolen
17077 from ia64.c. */
17078
17079 static tree
17080 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
17081 tree *args, bool ignore ATTRIBUTE_UNUSED)
17082 {
17083 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17084 {
17085 enum rs6000_builtins fn_code
17086 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17087 switch (fn_code)
17088 {
17089 case RS6000_BUILTIN_NANQ:
17090 case RS6000_BUILTIN_NANSQ:
17091 {
17092 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17093 const char *str = c_getstr (*args);
17094 int quiet = fn_code == RS6000_BUILTIN_NANQ;
17095 REAL_VALUE_TYPE real;
17096
17097 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17098 return build_real (type, real);
17099 return NULL_TREE;
17100 }
17101 case RS6000_BUILTIN_INFQ:
17102 case RS6000_BUILTIN_HUGE_VALQ:
17103 {
17104 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17105 REAL_VALUE_TYPE inf;
17106 real_inf (&inf);
17107 return build_real (type, inf);
17108 }
17109 default:
17110 break;
17111 }
17112 }
17113 #ifdef SUBTARGET_FOLD_BUILTIN
17114 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17115 #else
17116 return NULL_TREE;
17117 #endif
17118 }
17119
17120 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
17121 a constant, use rs6000_fold_builtin.) */
17122
17123 bool
17124 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17125 {
17126 gimple *stmt = gsi_stmt (*gsi);
17127 tree fndecl = gimple_call_fndecl (stmt);
17128 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
17129 enum rs6000_builtins fn_code
17130 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
17131 tree arg0, arg1, lhs;
17132
17133 switch (fn_code)
17134 {
17135 /* Flavors of vec_add. We deliberately don't expand
17136 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
17137 TImode, resulting in much poorer code generation. */
17138 case ALTIVEC_BUILTIN_VADDUBM:
17139 case ALTIVEC_BUILTIN_VADDUHM:
17140 case ALTIVEC_BUILTIN_VADDUWM:
17141 case P8V_BUILTIN_VADDUDM:
17142 case ALTIVEC_BUILTIN_VADDFP:
17143 case VSX_BUILTIN_XVADDDP:
17144 {
17145 arg0 = gimple_call_arg (stmt, 0);
17146 arg1 = gimple_call_arg (stmt, 1);
17147 lhs = gimple_call_lhs (stmt);
17148 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
17149 gimple_set_location (g, gimple_location (stmt));
17150 gsi_replace (gsi, g, true);
17151 return true;
17152 }
17153 /* Flavors of vec_sub. We deliberately don't expand
17154 P8V_BUILTIN_VSUBUQM. */
17155 case ALTIVEC_BUILTIN_VSUBUBM:
17156 case ALTIVEC_BUILTIN_VSUBUHM:
17157 case ALTIVEC_BUILTIN_VSUBUWM:
17158 case P8V_BUILTIN_VSUBUDM:
17159 case ALTIVEC_BUILTIN_VSUBFP:
17160 case VSX_BUILTIN_XVSUBDP:
17161 {
17162 arg0 = gimple_call_arg (stmt, 0);
17163 arg1 = gimple_call_arg (stmt, 1);
17164 lhs = gimple_call_lhs (stmt);
17165 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
17166 gimple_set_location (g, gimple_location (stmt));
17167 gsi_replace (gsi, g, true);
17168 return true;
17169 }
17170 case VSX_BUILTIN_XVMULSP:
17171 case VSX_BUILTIN_XVMULDP:
17172 {
17173 arg0 = gimple_call_arg (stmt, 0);
17174 arg1 = gimple_call_arg (stmt, 1);
17175 lhs = gimple_call_lhs (stmt);
17176 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
17177 gimple_set_location (g, gimple_location (stmt));
17178 gsi_replace (gsi, g, true);
17179 return true;
17180 }
17181 /* Even element flavors of vec_mul (signed). */
17182 case ALTIVEC_BUILTIN_VMULESB:
17183 case ALTIVEC_BUILTIN_VMULESH:
17184 /* Even element flavors of vec_mul (unsigned). */
17185 case ALTIVEC_BUILTIN_VMULEUB:
17186 case ALTIVEC_BUILTIN_VMULEUH:
17187 {
17188 arg0 = gimple_call_arg (stmt, 0);
17189 arg1 = gimple_call_arg (stmt, 1);
17190 lhs = gimple_call_lhs (stmt);
17191 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
17192 gimple_set_location (g, gimple_location (stmt));
17193 gsi_replace (gsi, g, true);
17194 return true;
17195 }
17196 /* Odd element flavors of vec_mul (signed). */
17197 case ALTIVEC_BUILTIN_VMULOSB:
17198 case ALTIVEC_BUILTIN_VMULOSH:
17199 /* Odd element flavors of vec_mul (unsigned). */
17200 case ALTIVEC_BUILTIN_VMULOUB:
17201 case ALTIVEC_BUILTIN_VMULOUH:
17202 {
17203 arg0 = gimple_call_arg (stmt, 0);
17204 arg1 = gimple_call_arg (stmt, 1);
17205 lhs = gimple_call_lhs (stmt);
17206 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
17207 gimple_set_location (g, gimple_location (stmt));
17208 gsi_replace (gsi, g, true);
17209 return true;
17210 }
17211 /* Flavors of vec_div (Integer). */
17212 case VSX_BUILTIN_DIV_V2DI:
17213 case VSX_BUILTIN_UDIV_V2DI:
17214 {
17215 arg0 = gimple_call_arg (stmt, 0);
17216 arg1 = gimple_call_arg (stmt, 1);
17217 lhs = gimple_call_lhs (stmt);
17218 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
17219 gimple_set_location (g, gimple_location (stmt));
17220 gsi_replace (gsi, g, true);
17221 return true;
17222 }
17223 /* Flavors of vec_div (Float). */
17224 case VSX_BUILTIN_XVDIVSP:
17225 case VSX_BUILTIN_XVDIVDP:
17226 {
17227 arg0 = gimple_call_arg (stmt, 0);
17228 arg1 = gimple_call_arg (stmt, 1);
17229 lhs = gimple_call_lhs (stmt);
17230 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
17231 gimple_set_location (g, gimple_location (stmt));
17232 gsi_replace (gsi, g, true);
17233 return true;
17234 }
17235 /* Flavors of vec_and. */
17236 case ALTIVEC_BUILTIN_VAND:
17237 {
17238 arg0 = gimple_call_arg (stmt, 0);
17239 arg1 = gimple_call_arg (stmt, 1);
17240 lhs = gimple_call_lhs (stmt);
17241 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
17242 gimple_set_location (g, gimple_location (stmt));
17243 gsi_replace (gsi, g, true);
17244 return true;
17245 }
17246 /* Flavors of vec_andc. */
17247 case ALTIVEC_BUILTIN_VANDC:
17248 {
17249 arg0 = gimple_call_arg (stmt, 0);
17250 arg1 = gimple_call_arg (stmt, 1);
17251 lhs = gimple_call_lhs (stmt);
17252 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17253 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17254 gimple_set_location (g, gimple_location (stmt));
17255 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17256 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
17257 gimple_set_location (g, gimple_location (stmt));
17258 gsi_replace (gsi, g, true);
17259 return true;
17260 }
17261 /* Flavors of vec_nand. */
17262 case P8V_BUILTIN_VEC_NAND:
17263 case P8V_BUILTIN_NAND_V16QI:
17264 case P8V_BUILTIN_NAND_V8HI:
17265 case P8V_BUILTIN_NAND_V4SI:
17266 case P8V_BUILTIN_NAND_V4SF:
17267 case P8V_BUILTIN_NAND_V2DF:
17268 case P8V_BUILTIN_NAND_V2DI:
17269 {
17270 arg0 = gimple_call_arg (stmt, 0);
17271 arg1 = gimple_call_arg (stmt, 1);
17272 lhs = gimple_call_lhs (stmt);
17273 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17274 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
17275 gimple_set_location (g, gimple_location (stmt));
17276 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17277 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17278 gimple_set_location (g, gimple_location (stmt));
17279 gsi_replace (gsi, g, true);
17280 return true;
17281 }
17282 /* Flavors of vec_or. */
17283 case ALTIVEC_BUILTIN_VOR:
17284 {
17285 arg0 = gimple_call_arg (stmt, 0);
17286 arg1 = gimple_call_arg (stmt, 1);
17287 lhs = gimple_call_lhs (stmt);
17288 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
17289 gimple_set_location (g, gimple_location (stmt));
17290 gsi_replace (gsi, g, true);
17291 return true;
17292 }
17293 /* flavors of vec_orc. */
17294 case P8V_BUILTIN_ORC_V16QI:
17295 case P8V_BUILTIN_ORC_V8HI:
17296 case P8V_BUILTIN_ORC_V4SI:
17297 case P8V_BUILTIN_ORC_V4SF:
17298 case P8V_BUILTIN_ORC_V2DF:
17299 case P8V_BUILTIN_ORC_V2DI:
17300 {
17301 arg0 = gimple_call_arg (stmt, 0);
17302 arg1 = gimple_call_arg (stmt, 1);
17303 lhs = gimple_call_lhs (stmt);
17304 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17305 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
17306 gimple_set_location (g, gimple_location (stmt));
17307 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17308 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
17309 gimple_set_location (g, gimple_location (stmt));
17310 gsi_replace (gsi, g, true);
17311 return true;
17312 }
17313 /* Flavors of vec_xor. */
17314 case ALTIVEC_BUILTIN_VXOR:
17315 {
17316 arg0 = gimple_call_arg (stmt, 0);
17317 arg1 = gimple_call_arg (stmt, 1);
17318 lhs = gimple_call_lhs (stmt);
17319 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
17320 gimple_set_location (g, gimple_location (stmt));
17321 gsi_replace (gsi, g, true);
17322 return true;
17323 }
17324 /* Flavors of vec_nor. */
17325 case ALTIVEC_BUILTIN_VNOR:
17326 {
17327 arg0 = gimple_call_arg (stmt, 0);
17328 arg1 = gimple_call_arg (stmt, 1);
17329 lhs = gimple_call_lhs (stmt);
17330 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
17331 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
17332 gimple_set_location (g, gimple_location (stmt));
17333 gsi_insert_before(gsi, g, GSI_SAME_STMT);
17334 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
17335 gimple_set_location (g, gimple_location (stmt));
17336 gsi_replace (gsi, g, true);
17337 return true;
17338 }
17339 default:
17340 break;
17341 }
17342
17343 return false;
17344 }
17345
17346 /* Expand an expression EXP that calls a built-in function,
17347 with result going to TARGET if that's convenient
17348 (and in mode MODE if that's convenient).
17349 SUBTARGET may be used as the target for computing one of EXP's operands.
17350 IGNORE is nonzero if the value is to be ignored. */
17351
17352 static rtx
17353 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17354 machine_mode mode ATTRIBUTE_UNUSED,
17355 int ignore ATTRIBUTE_UNUSED)
17356 {
17357 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17358 enum rs6000_builtins fcode
17359 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
17360 size_t uns_fcode = (size_t)fcode;
17361 const struct builtin_description *d;
17362 size_t i;
17363 rtx ret;
17364 bool success;
17365 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
17366 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
17367
17368 if (TARGET_DEBUG_BUILTIN)
17369 {
17370 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
17371 const char *name1 = rs6000_builtin_info[uns_fcode].name;
17372 const char *name2 = ((icode != CODE_FOR_nothing)
17373 ? get_insn_name ((int)icode)
17374 : "nothing");
17375 const char *name3;
17376
17377 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
17378 {
17379 default: name3 = "unknown"; break;
17380 case RS6000_BTC_SPECIAL: name3 = "special"; break;
17381 case RS6000_BTC_UNARY: name3 = "unary"; break;
17382 case RS6000_BTC_BINARY: name3 = "binary"; break;
17383 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
17384 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
17385 case RS6000_BTC_ABS: name3 = "abs"; break;
17386 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
17387 case RS6000_BTC_DST: name3 = "dst"; break;
17388 }
17389
17390
17391 fprintf (stderr,
17392 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
17393 (name1) ? name1 : "---", fcode,
17394 (name2) ? name2 : "---", (int)icode,
17395 name3,
17396 func_valid_p ? "" : ", not valid");
17397 }
17398
17399 if (!func_valid_p)
17400 {
17401 rs6000_invalid_builtin (fcode);
17402
17403 /* Given it is invalid, just generate a normal call. */
17404 return expand_call (exp, target, ignore);
17405 }
17406
17407 switch (fcode)
17408 {
17409 case RS6000_BUILTIN_RECIP:
17410 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
17411
17412 case RS6000_BUILTIN_RECIPF:
17413 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
17414
17415 case RS6000_BUILTIN_RSQRTF:
17416 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
17417
17418 case RS6000_BUILTIN_RSQRT:
17419 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
17420
17421 case POWER7_BUILTIN_BPERMD:
17422 return rs6000_expand_binop_builtin (((TARGET_64BIT)
17423 ? CODE_FOR_bpermd_di
17424 : CODE_FOR_bpermd_si), exp, target);
17425
17426 case RS6000_BUILTIN_GET_TB:
17427 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
17428 target);
17429
17430 case RS6000_BUILTIN_MFTB:
17431 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
17432 ? CODE_FOR_rs6000_mftb_di
17433 : CODE_FOR_rs6000_mftb_si),
17434 target);
17435
17436 case RS6000_BUILTIN_MFFS:
17437 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
17438
17439 case RS6000_BUILTIN_MTFSF:
17440 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
17441
17442 case RS6000_BUILTIN_CPU_INIT:
17443 case RS6000_BUILTIN_CPU_IS:
17444 case RS6000_BUILTIN_CPU_SUPPORTS:
17445 return cpu_expand_builtin (fcode, exp, target);
17446
17447 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
17448 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
17449 {
17450 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
17451 : (int) CODE_FOR_altivec_lvsl_direct);
17452 machine_mode tmode = insn_data[icode].operand[0].mode;
17453 machine_mode mode = insn_data[icode].operand[1].mode;
17454 tree arg;
17455 rtx op, addr, pat;
17456
17457 gcc_assert (TARGET_ALTIVEC);
17458
17459 arg = CALL_EXPR_ARG (exp, 0);
17460 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
17461 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
17462 addr = memory_address (mode, op);
17463 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
17464 op = addr;
17465 else
17466 {
17467 /* For the load case need to negate the address. */
17468 op = gen_reg_rtx (GET_MODE (addr));
17469 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
17470 }
17471 op = gen_rtx_MEM (mode, op);
17472
17473 if (target == 0
17474 || GET_MODE (target) != tmode
17475 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17476 target = gen_reg_rtx (tmode);
17477
17478 pat = GEN_FCN (icode) (target, op);
17479 if (!pat)
17480 return 0;
17481 emit_insn (pat);
17482
17483 return target;
17484 }
17485
17486 case ALTIVEC_BUILTIN_VCFUX:
17487 case ALTIVEC_BUILTIN_VCFSX:
17488 case ALTIVEC_BUILTIN_VCTUXS:
17489 case ALTIVEC_BUILTIN_VCTSXS:
17490 /* FIXME: There's got to be a nicer way to handle this case than
17491 constructing a new CALL_EXPR. */
17492 if (call_expr_nargs (exp) == 1)
17493 {
17494 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
17495 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
17496 }
17497 break;
17498
17499 default:
17500 break;
17501 }
17502
17503 if (TARGET_ALTIVEC)
17504 {
17505 ret = altivec_expand_builtin (exp, target, &success);
17506
17507 if (success)
17508 return ret;
17509 }
17510 if (TARGET_SPE)
17511 {
17512 ret = spe_expand_builtin (exp, target, &success);
17513
17514 if (success)
17515 return ret;
17516 }
17517 if (TARGET_PAIRED_FLOAT)
17518 {
17519 ret = paired_expand_builtin (exp, target, &success);
17520
17521 if (success)
17522 return ret;
17523 }
17524 if (TARGET_HTM)
17525 {
17526 ret = htm_expand_builtin (exp, target, &success);
17527
17528 if (success)
17529 return ret;
17530 }
17531
17532 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
17533 /* RS6000_BTC_SPECIAL represents no-operand operators. */
17534 gcc_assert (attr == RS6000_BTC_UNARY
17535 || attr == RS6000_BTC_BINARY
17536 || attr == RS6000_BTC_TERNARY
17537 || attr == RS6000_BTC_SPECIAL);
17538
17539 /* Handle simple unary operations. */
17540 d = bdesc_1arg;
17541 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17542 if (d->code == fcode)
17543 return rs6000_expand_unop_builtin (d->icode, exp, target);
17544
17545 /* Handle simple binary operations. */
17546 d = bdesc_2arg;
17547 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17548 if (d->code == fcode)
17549 return rs6000_expand_binop_builtin (d->icode, exp, target);
17550
17551 /* Handle simple ternary operations. */
17552 d = bdesc_3arg;
17553 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17554 if (d->code == fcode)
17555 return rs6000_expand_ternop_builtin (d->icode, exp, target);
17556
17557 /* Handle simple no-argument operations. */
17558 d = bdesc_0arg;
17559 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17560 if (d->code == fcode)
17561 return rs6000_expand_zeroop_builtin (d->icode, target);
17562
17563 gcc_unreachable ();
17564 }
17565
17566 /* Create a builtin vector type with a name. Taking care not to give
17567 the canonical type a name. */
17568
17569 static tree
17570 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
17571 {
17572 tree result = build_vector_type (elt_type, num_elts);
17573
17574 /* Copy so we don't give the canonical type a name. */
17575 result = build_variant_type_copy (result);
17576
17577 add_builtin_type (name, result);
17578
17579 return result;
17580 }
17581
17582 static void
17583 rs6000_init_builtins (void)
17584 {
17585 tree tdecl;
17586 tree ftype;
17587 machine_mode mode;
17588
17589 if (TARGET_DEBUG_BUILTIN)
17590 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
17591 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
17592 (TARGET_SPE) ? ", spe" : "",
17593 (TARGET_ALTIVEC) ? ", altivec" : "",
17594 (TARGET_VSX) ? ", vsx" : "");
17595
17596 V2SI_type_node = build_vector_type (intSI_type_node, 2);
17597 V2SF_type_node = build_vector_type (float_type_node, 2);
17598 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
17599 : "__vector long long",
17600 intDI_type_node, 2);
17601 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
17602 V4HI_type_node = build_vector_type (intHI_type_node, 4);
17603 V4SI_type_node = rs6000_vector_type ("__vector signed int",
17604 intSI_type_node, 4);
17605 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
17606 V8HI_type_node = rs6000_vector_type ("__vector signed short",
17607 intHI_type_node, 8);
17608 V16QI_type_node = rs6000_vector_type ("__vector signed char",
17609 intQI_type_node, 16);
17610
17611 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
17612 unsigned_intQI_type_node, 16);
17613 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
17614 unsigned_intHI_type_node, 8);
17615 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
17616 unsigned_intSI_type_node, 4);
17617 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17618 ? "__vector unsigned long"
17619 : "__vector unsigned long long",
17620 unsigned_intDI_type_node, 2);
17621
17622 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
17623 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
17624 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
17625 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
17626
17627 const_str_type_node
17628 = build_pointer_type (build_qualified_type (char_type_node,
17629 TYPE_QUAL_CONST));
17630
17631 /* We use V1TI mode as a special container to hold __int128_t items that
17632 must live in VSX registers. */
17633 if (intTI_type_node)
17634 {
17635 V1TI_type_node = rs6000_vector_type ("__vector __int128",
17636 intTI_type_node, 1);
17637 unsigned_V1TI_type_node
17638 = rs6000_vector_type ("__vector unsigned __int128",
17639 unsigned_intTI_type_node, 1);
17640 }
17641
17642 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
17643 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
17644 'vector unsigned short'. */
17645
17646 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
17647 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17648 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
17649 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
17650 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
17651
17652 long_integer_type_internal_node = long_integer_type_node;
17653 long_unsigned_type_internal_node = long_unsigned_type_node;
17654 long_long_integer_type_internal_node = long_long_integer_type_node;
17655 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
17656 intQI_type_internal_node = intQI_type_node;
17657 uintQI_type_internal_node = unsigned_intQI_type_node;
17658 intHI_type_internal_node = intHI_type_node;
17659 uintHI_type_internal_node = unsigned_intHI_type_node;
17660 intSI_type_internal_node = intSI_type_node;
17661 uintSI_type_internal_node = unsigned_intSI_type_node;
17662 intDI_type_internal_node = intDI_type_node;
17663 uintDI_type_internal_node = unsigned_intDI_type_node;
17664 intTI_type_internal_node = intTI_type_node;
17665 uintTI_type_internal_node = unsigned_intTI_type_node;
17666 float_type_internal_node = float_type_node;
17667 double_type_internal_node = double_type_node;
17668 long_double_type_internal_node = long_double_type_node;
17669 dfloat64_type_internal_node = dfloat64_type_node;
17670 dfloat128_type_internal_node = dfloat128_type_node;
17671 void_type_internal_node = void_type_node;
17672
17673 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
17674 IFmode is the IBM extended 128-bit format that is a pair of doubles.
17675 TFmode will be either IEEE 128-bit floating point or the IBM double-double
17676 format that uses a pair of doubles, depending on the switches and
17677 defaults.
17678
17679 We do not enable the actual __float128 keyword unless the user explicitly
17680 asks for it, because the library support is not yet complete.
17681
17682 If we don't support for either 128-bit IBM double double or IEEE 128-bit
17683 floating point, we need make sure the type is non-zero or else self-test
17684 fails during bootstrap.
17685
17686 We don't register a built-in type for __ibm128 if the type is the same as
17687 long double. Instead we add a #define for __ibm128 in
17688 rs6000_cpu_cpp_builtins to long double. */
17689 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17690 {
17691 ibm128_float_type_node = make_node (REAL_TYPE);
17692 TYPE_PRECISION (ibm128_float_type_node) = 128;
17693 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17694 layout_type (ibm128_float_type_node);
17695
17696 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17697 "__ibm128");
17698 }
17699 else
17700 ibm128_float_type_node = long_double_type_node;
17701
17702 if (TARGET_FLOAT128_KEYWORD)
17703 {
17704 ieee128_float_type_node = float128_type_node;
17705 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17706 "__float128");
17707 }
17708
17709 else if (TARGET_FLOAT128_TYPE)
17710 {
17711 ieee128_float_type_node = make_node (REAL_TYPE);
17712 TYPE_PRECISION (ibm128_float_type_node) = 128;
17713 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17714 layout_type (ieee128_float_type_node);
17715
17716 /* If we are not exporting the __float128/_Float128 keywords, we need a
17717 keyword to get the types created. Use __ieee128 as the dummy
17718 keyword. */
17719 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17720 "__ieee128");
17721 }
17722
17723 else
17724 ieee128_float_type_node = long_double_type_node;
17725
17726 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17727 tree type node. */
17728 builtin_mode_to_type[QImode][0] = integer_type_node;
17729 builtin_mode_to_type[HImode][0] = integer_type_node;
17730 builtin_mode_to_type[SImode][0] = intSI_type_node;
17731 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17732 builtin_mode_to_type[DImode][0] = intDI_type_node;
17733 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17734 builtin_mode_to_type[TImode][0] = intTI_type_node;
17735 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17736 builtin_mode_to_type[SFmode][0] = float_type_node;
17737 builtin_mode_to_type[DFmode][0] = double_type_node;
17738 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17739 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17740 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17741 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17742 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17743 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17744 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17745 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17746 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17747 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17748 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17749 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17750 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
17751 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17752 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17753 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17754 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17755 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17756 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17757 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17758
17759 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17760 TYPE_NAME (bool_char_type_node) = tdecl;
17761
17762 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17763 TYPE_NAME (bool_short_type_node) = tdecl;
17764
17765 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17766 TYPE_NAME (bool_int_type_node) = tdecl;
17767
17768 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17769 TYPE_NAME (pixel_type_node) = tdecl;
17770
17771 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17772 bool_char_type_node, 16);
17773 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17774 bool_short_type_node, 8);
17775 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17776 bool_int_type_node, 4);
17777 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17778 ? "__vector __bool long"
17779 : "__vector __bool long long",
17780 bool_long_type_node, 2);
17781 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17782 pixel_type_node, 8);
17783
17784 /* Paired and SPE builtins are only available if you build a compiler with
17785 the appropriate options, so only create those builtins with the
17786 appropriate compiler option. Create Altivec and VSX builtins on machines
17787 with at least the general purpose extensions (970 and newer) to allow the
17788 use of the target attribute. */
17789 if (TARGET_PAIRED_FLOAT)
17790 paired_init_builtins ();
17791 if (TARGET_SPE)
17792 spe_init_builtins ();
17793 if (TARGET_EXTRA_BUILTINS)
17794 altivec_init_builtins ();
17795 if (TARGET_HTM)
17796 htm_init_builtins ();
17797
17798 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
17799 rs6000_common_init_builtins ();
17800
17801 ftype = build_function_type_list (ieee128_float_type_node,
17802 const_str_type_node, NULL_TREE);
17803 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17804 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17805
17806 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17807 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17808 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17809
17810 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17811 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17812 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17813
17814 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17815 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17816 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17817
17818 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17819 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17820 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17821
17822 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17823 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17824 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17825
17826 mode = (TARGET_64BIT) ? DImode : SImode;
17827 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17828 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17829 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17830
17831 ftype = build_function_type_list (unsigned_intDI_type_node,
17832 NULL_TREE);
17833 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17834
17835 if (TARGET_64BIT)
17836 ftype = build_function_type_list (unsigned_intDI_type_node,
17837 NULL_TREE);
17838 else
17839 ftype = build_function_type_list (unsigned_intSI_type_node,
17840 NULL_TREE);
17841 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17842
17843 ftype = build_function_type_list (double_type_node, NULL_TREE);
17844 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17845
17846 ftype = build_function_type_list (void_type_node,
17847 intSI_type_node, double_type_node,
17848 NULL_TREE);
17849 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17850
17851 ftype = build_function_type_list (void_type_node, NULL_TREE);
17852 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17853
17854 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17855 NULL_TREE);
17856 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17857 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17858
17859 /* AIX libm provides clog as __clog. */
17860 if (TARGET_XCOFF &&
17861 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17862 set_user_assembler_name (tdecl, "__clog");
17863
17864 #ifdef SUBTARGET_INIT_BUILTINS
17865 SUBTARGET_INIT_BUILTINS;
17866 #endif
17867 }
17868
17869 /* Returns the rs6000 builtin decl for CODE. */
17870
17871 static tree
17872 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17873 {
17874 HOST_WIDE_INT fnmask;
17875
17876 if (code >= RS6000_BUILTIN_COUNT)
17877 return error_mark_node;
17878
17879 fnmask = rs6000_builtin_info[code].mask;
17880 if ((fnmask & rs6000_builtin_mask) != fnmask)
17881 {
17882 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17883 return error_mark_node;
17884 }
17885
17886 return rs6000_builtin_decls[code];
17887 }
17888
17889 static void
17890 spe_init_builtins (void)
17891 {
17892 tree puint_type_node = build_pointer_type (unsigned_type_node);
17893 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
17894 const struct builtin_description *d;
17895 size_t i;
17896 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17897
17898 tree v2si_ftype_4_v2si
17899 = build_function_type_list (opaque_V2SI_type_node,
17900 opaque_V2SI_type_node,
17901 opaque_V2SI_type_node,
17902 opaque_V2SI_type_node,
17903 opaque_V2SI_type_node,
17904 NULL_TREE);
17905
17906 tree v2sf_ftype_4_v2sf
17907 = build_function_type_list (opaque_V2SF_type_node,
17908 opaque_V2SF_type_node,
17909 opaque_V2SF_type_node,
17910 opaque_V2SF_type_node,
17911 opaque_V2SF_type_node,
17912 NULL_TREE);
17913
17914 tree int_ftype_int_v2si_v2si
17915 = build_function_type_list (integer_type_node,
17916 integer_type_node,
17917 opaque_V2SI_type_node,
17918 opaque_V2SI_type_node,
17919 NULL_TREE);
17920
17921 tree int_ftype_int_v2sf_v2sf
17922 = build_function_type_list (integer_type_node,
17923 integer_type_node,
17924 opaque_V2SF_type_node,
17925 opaque_V2SF_type_node,
17926 NULL_TREE);
17927
17928 tree void_ftype_v2si_puint_int
17929 = build_function_type_list (void_type_node,
17930 opaque_V2SI_type_node,
17931 puint_type_node,
17932 integer_type_node,
17933 NULL_TREE);
17934
17935 tree void_ftype_v2si_puint_char
17936 = build_function_type_list (void_type_node,
17937 opaque_V2SI_type_node,
17938 puint_type_node,
17939 char_type_node,
17940 NULL_TREE);
17941
17942 tree void_ftype_v2si_pv2si_int
17943 = build_function_type_list (void_type_node,
17944 opaque_V2SI_type_node,
17945 opaque_p_V2SI_type_node,
17946 integer_type_node,
17947 NULL_TREE);
17948
17949 tree void_ftype_v2si_pv2si_char
17950 = build_function_type_list (void_type_node,
17951 opaque_V2SI_type_node,
17952 opaque_p_V2SI_type_node,
17953 char_type_node,
17954 NULL_TREE);
17955
17956 tree void_ftype_int
17957 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17958
17959 tree int_ftype_void
17960 = build_function_type_list (integer_type_node, NULL_TREE);
17961
17962 tree v2si_ftype_pv2si_int
17963 = build_function_type_list (opaque_V2SI_type_node,
17964 opaque_p_V2SI_type_node,
17965 integer_type_node,
17966 NULL_TREE);
17967
17968 tree v2si_ftype_puint_int
17969 = build_function_type_list (opaque_V2SI_type_node,
17970 puint_type_node,
17971 integer_type_node,
17972 NULL_TREE);
17973
17974 tree v2si_ftype_pushort_int
17975 = build_function_type_list (opaque_V2SI_type_node,
17976 pushort_type_node,
17977 integer_type_node,
17978 NULL_TREE);
17979
17980 tree v2si_ftype_signed_char
17981 = build_function_type_list (opaque_V2SI_type_node,
17982 signed_char_type_node,
17983 NULL_TREE);
17984
17985 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
17986
17987 /* Initialize irregular SPE builtins. */
17988
17989 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
17990 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
17991 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
17992 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
17993 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
17994 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
17995 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
17996 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
17997 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
17998 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
17999 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
18000 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
18001 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
18002 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
18003 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
18004 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
18005 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
18006 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
18007
18008 /* Loads. */
18009 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
18010 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
18011 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
18012 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
18013 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
18014 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
18015 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
18016 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
18017 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
18018 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
18019 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
18020 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
18021 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
18022 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
18023 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
18024 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
18025 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
18026 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
18027 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
18028 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
18029 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
18030 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
18031
18032 /* Predicates. */
18033 d = bdesc_spe_predicates;
18034 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
18035 {
18036 tree type;
18037 HOST_WIDE_INT mask = d->mask;
18038
18039 if ((mask & builtin_mask) != mask)
18040 {
18041 if (TARGET_DEBUG_BUILTIN)
18042 fprintf (stderr, "spe_init_builtins, skip predicate %s\n",
18043 d->name);
18044 continue;
18045 }
18046
18047 /* Cannot define builtin if the instruction is disabled. */
18048 gcc_assert (d->icode != CODE_FOR_nothing);
18049 switch (insn_data[d->icode].operand[1].mode)
18050 {
18051 case E_V2SImode:
18052 type = int_ftype_int_v2si_v2si;
18053 break;
18054 case E_V2SFmode:
18055 type = int_ftype_int_v2sf_v2sf;
18056 break;
18057 default:
18058 gcc_unreachable ();
18059 }
18060
18061 def_builtin (d->name, type, d->code);
18062 }
18063
18064 /* Evsel predicates. */
18065 d = bdesc_spe_evsel;
18066 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
18067 {
18068 tree type;
18069 HOST_WIDE_INT mask = d->mask;
18070
18071 if ((mask & builtin_mask) != mask)
18072 {
18073 if (TARGET_DEBUG_BUILTIN)
18074 fprintf (stderr, "spe_init_builtins, skip evsel %s\n",
18075 d->name);
18076 continue;
18077 }
18078
18079 /* Cannot define builtin if the instruction is disabled. */
18080 gcc_assert (d->icode != CODE_FOR_nothing);
18081 switch (insn_data[d->icode].operand[1].mode)
18082 {
18083 case E_V2SImode:
18084 type = v2si_ftype_4_v2si;
18085 break;
18086 case E_V2SFmode:
18087 type = v2sf_ftype_4_v2sf;
18088 break;
18089 default:
18090 gcc_unreachable ();
18091 }
18092
18093 def_builtin (d->name, type, d->code);
18094 }
18095 }
18096
18097 static void
18098 paired_init_builtins (void)
18099 {
18100 const struct builtin_description *d;
18101 size_t i;
18102 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18103
18104 tree int_ftype_int_v2sf_v2sf
18105 = build_function_type_list (integer_type_node,
18106 integer_type_node,
18107 V2SF_type_node,
18108 V2SF_type_node,
18109 NULL_TREE);
18110 tree pcfloat_type_node =
18111 build_pointer_type (build_qualified_type
18112 (float_type_node, TYPE_QUAL_CONST));
18113
18114 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
18115 long_integer_type_node,
18116 pcfloat_type_node,
18117 NULL_TREE);
18118 tree void_ftype_v2sf_long_pcfloat =
18119 build_function_type_list (void_type_node,
18120 V2SF_type_node,
18121 long_integer_type_node,
18122 pcfloat_type_node,
18123 NULL_TREE);
18124
18125
18126 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
18127 PAIRED_BUILTIN_LX);
18128
18129
18130 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
18131 PAIRED_BUILTIN_STX);
18132
18133 /* Predicates. */
18134 d = bdesc_paired_preds;
18135 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
18136 {
18137 tree type;
18138 HOST_WIDE_INT mask = d->mask;
18139
18140 if ((mask & builtin_mask) != mask)
18141 {
18142 if (TARGET_DEBUG_BUILTIN)
18143 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
18144 d->name);
18145 continue;
18146 }
18147
18148 /* Cannot define builtin if the instruction is disabled. */
18149 gcc_assert (d->icode != CODE_FOR_nothing);
18150
18151 if (TARGET_DEBUG_BUILTIN)
18152 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
18153 (int)i, get_insn_name (d->icode), (int)d->icode,
18154 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
18155
18156 switch (insn_data[d->icode].operand[1].mode)
18157 {
18158 case E_V2SFmode:
18159 type = int_ftype_int_v2sf_v2sf;
18160 break;
18161 default:
18162 gcc_unreachable ();
18163 }
18164
18165 def_builtin (d->name, type, d->code);
18166 }
18167 }
18168
18169 static void
18170 altivec_init_builtins (void)
18171 {
18172 const struct builtin_description *d;
18173 size_t i;
18174 tree ftype;
18175 tree decl;
18176 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18177
18178 tree pvoid_type_node = build_pointer_type (void_type_node);
18179
18180 tree pcvoid_type_node
18181 = build_pointer_type (build_qualified_type (void_type_node,
18182 TYPE_QUAL_CONST));
18183
18184 tree int_ftype_opaque
18185 = build_function_type_list (integer_type_node,
18186 opaque_V4SI_type_node, NULL_TREE);
18187 tree opaque_ftype_opaque
18188 = build_function_type_list (integer_type_node, NULL_TREE);
18189 tree opaque_ftype_opaque_int
18190 = build_function_type_list (opaque_V4SI_type_node,
18191 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
18192 tree opaque_ftype_opaque_opaque_int
18193 = build_function_type_list (opaque_V4SI_type_node,
18194 opaque_V4SI_type_node, opaque_V4SI_type_node,
18195 integer_type_node, NULL_TREE);
18196 tree opaque_ftype_opaque_opaque_opaque
18197 = build_function_type_list (opaque_V4SI_type_node,
18198 opaque_V4SI_type_node, opaque_V4SI_type_node,
18199 opaque_V4SI_type_node, NULL_TREE);
18200 tree opaque_ftype_opaque_opaque
18201 = build_function_type_list (opaque_V4SI_type_node,
18202 opaque_V4SI_type_node, opaque_V4SI_type_node,
18203 NULL_TREE);
18204 tree int_ftype_int_opaque_opaque
18205 = build_function_type_list (integer_type_node,
18206 integer_type_node, opaque_V4SI_type_node,
18207 opaque_V4SI_type_node, NULL_TREE);
18208 tree int_ftype_int_v4si_v4si
18209 = build_function_type_list (integer_type_node,
18210 integer_type_node, V4SI_type_node,
18211 V4SI_type_node, NULL_TREE);
18212 tree int_ftype_int_v2di_v2di
18213 = build_function_type_list (integer_type_node,
18214 integer_type_node, V2DI_type_node,
18215 V2DI_type_node, NULL_TREE);
18216 tree void_ftype_v4si
18217 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
18218 tree v8hi_ftype_void
18219 = build_function_type_list (V8HI_type_node, NULL_TREE);
18220 tree void_ftype_void
18221 = build_function_type_list (void_type_node, NULL_TREE);
18222 tree void_ftype_int
18223 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
18224
18225 tree opaque_ftype_long_pcvoid
18226 = build_function_type_list (opaque_V4SI_type_node,
18227 long_integer_type_node, pcvoid_type_node,
18228 NULL_TREE);
18229 tree v16qi_ftype_long_pcvoid
18230 = build_function_type_list (V16QI_type_node,
18231 long_integer_type_node, pcvoid_type_node,
18232 NULL_TREE);
18233 tree v8hi_ftype_long_pcvoid
18234 = build_function_type_list (V8HI_type_node,
18235 long_integer_type_node, pcvoid_type_node,
18236 NULL_TREE);
18237 tree v4si_ftype_long_pcvoid
18238 = build_function_type_list (V4SI_type_node,
18239 long_integer_type_node, pcvoid_type_node,
18240 NULL_TREE);
18241 tree v4sf_ftype_long_pcvoid
18242 = build_function_type_list (V4SF_type_node,
18243 long_integer_type_node, pcvoid_type_node,
18244 NULL_TREE);
18245 tree v2df_ftype_long_pcvoid
18246 = build_function_type_list (V2DF_type_node,
18247 long_integer_type_node, pcvoid_type_node,
18248 NULL_TREE);
18249 tree v2di_ftype_long_pcvoid
18250 = build_function_type_list (V2DI_type_node,
18251 long_integer_type_node, pcvoid_type_node,
18252 NULL_TREE);
18253
18254 tree void_ftype_opaque_long_pvoid
18255 = build_function_type_list (void_type_node,
18256 opaque_V4SI_type_node, long_integer_type_node,
18257 pvoid_type_node, NULL_TREE);
18258 tree void_ftype_v4si_long_pvoid
18259 = build_function_type_list (void_type_node,
18260 V4SI_type_node, long_integer_type_node,
18261 pvoid_type_node, NULL_TREE);
18262 tree void_ftype_v16qi_long_pvoid
18263 = build_function_type_list (void_type_node,
18264 V16QI_type_node, long_integer_type_node,
18265 pvoid_type_node, NULL_TREE);
18266
18267 tree void_ftype_v16qi_pvoid_long
18268 = build_function_type_list (void_type_node,
18269 V16QI_type_node, pvoid_type_node,
18270 long_integer_type_node, NULL_TREE);
18271
18272 tree void_ftype_v8hi_long_pvoid
18273 = build_function_type_list (void_type_node,
18274 V8HI_type_node, long_integer_type_node,
18275 pvoid_type_node, NULL_TREE);
18276 tree void_ftype_v4sf_long_pvoid
18277 = build_function_type_list (void_type_node,
18278 V4SF_type_node, long_integer_type_node,
18279 pvoid_type_node, NULL_TREE);
18280 tree void_ftype_v2df_long_pvoid
18281 = build_function_type_list (void_type_node,
18282 V2DF_type_node, long_integer_type_node,
18283 pvoid_type_node, NULL_TREE);
18284 tree void_ftype_v2di_long_pvoid
18285 = build_function_type_list (void_type_node,
18286 V2DI_type_node, long_integer_type_node,
18287 pvoid_type_node, NULL_TREE);
18288 tree int_ftype_int_v8hi_v8hi
18289 = build_function_type_list (integer_type_node,
18290 integer_type_node, V8HI_type_node,
18291 V8HI_type_node, NULL_TREE);
18292 tree int_ftype_int_v16qi_v16qi
18293 = build_function_type_list (integer_type_node,
18294 integer_type_node, V16QI_type_node,
18295 V16QI_type_node, NULL_TREE);
18296 tree int_ftype_int_v4sf_v4sf
18297 = build_function_type_list (integer_type_node,
18298 integer_type_node, V4SF_type_node,
18299 V4SF_type_node, NULL_TREE);
18300 tree int_ftype_int_v2df_v2df
18301 = build_function_type_list (integer_type_node,
18302 integer_type_node, V2DF_type_node,
18303 V2DF_type_node, NULL_TREE);
18304 tree v2di_ftype_v2di
18305 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
18306 tree v4si_ftype_v4si
18307 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18308 tree v8hi_ftype_v8hi
18309 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18310 tree v16qi_ftype_v16qi
18311 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18312 tree v4sf_ftype_v4sf
18313 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18314 tree v2df_ftype_v2df
18315 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18316 tree void_ftype_pcvoid_int_int
18317 = build_function_type_list (void_type_node,
18318 pcvoid_type_node, integer_type_node,
18319 integer_type_node, NULL_TREE);
18320
18321 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
18322 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
18323 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
18324 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
18325 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
18326 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
18327 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
18328 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
18329 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
18330 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
18331 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
18332 ALTIVEC_BUILTIN_LVXL_V2DF);
18333 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
18334 ALTIVEC_BUILTIN_LVXL_V2DI);
18335 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
18336 ALTIVEC_BUILTIN_LVXL_V4SF);
18337 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
18338 ALTIVEC_BUILTIN_LVXL_V4SI);
18339 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
18340 ALTIVEC_BUILTIN_LVXL_V8HI);
18341 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
18342 ALTIVEC_BUILTIN_LVXL_V16QI);
18343 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
18344 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
18345 ALTIVEC_BUILTIN_LVX_V2DF);
18346 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
18347 ALTIVEC_BUILTIN_LVX_V2DI);
18348 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
18349 ALTIVEC_BUILTIN_LVX_V4SF);
18350 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
18351 ALTIVEC_BUILTIN_LVX_V4SI);
18352 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
18353 ALTIVEC_BUILTIN_LVX_V8HI);
18354 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
18355 ALTIVEC_BUILTIN_LVX_V16QI);
18356 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
18357 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
18358 ALTIVEC_BUILTIN_STVX_V2DF);
18359 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
18360 ALTIVEC_BUILTIN_STVX_V2DI);
18361 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
18362 ALTIVEC_BUILTIN_STVX_V4SF);
18363 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
18364 ALTIVEC_BUILTIN_STVX_V4SI);
18365 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
18366 ALTIVEC_BUILTIN_STVX_V8HI);
18367 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
18368 ALTIVEC_BUILTIN_STVX_V16QI);
18369 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
18370 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
18371 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
18372 ALTIVEC_BUILTIN_STVXL_V2DF);
18373 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
18374 ALTIVEC_BUILTIN_STVXL_V2DI);
18375 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
18376 ALTIVEC_BUILTIN_STVXL_V4SF);
18377 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
18378 ALTIVEC_BUILTIN_STVXL_V4SI);
18379 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
18380 ALTIVEC_BUILTIN_STVXL_V8HI);
18381 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
18382 ALTIVEC_BUILTIN_STVXL_V16QI);
18383 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
18384 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
18385 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
18386 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
18387 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
18388 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
18389 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
18390 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
18391 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
18392 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
18393 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
18394 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
18395 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
18396 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
18397 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
18398 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
18399
18400 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
18401 VSX_BUILTIN_LXVD2X_V2DF);
18402 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
18403 VSX_BUILTIN_LXVD2X_V2DI);
18404 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
18405 VSX_BUILTIN_LXVW4X_V4SF);
18406 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
18407 VSX_BUILTIN_LXVW4X_V4SI);
18408 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
18409 VSX_BUILTIN_LXVW4X_V8HI);
18410 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
18411 VSX_BUILTIN_LXVW4X_V16QI);
18412 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
18413 VSX_BUILTIN_STXVD2X_V2DF);
18414 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
18415 VSX_BUILTIN_STXVD2X_V2DI);
18416 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
18417 VSX_BUILTIN_STXVW4X_V4SF);
18418 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
18419 VSX_BUILTIN_STXVW4X_V4SI);
18420 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
18421 VSX_BUILTIN_STXVW4X_V8HI);
18422 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
18423 VSX_BUILTIN_STXVW4X_V16QI);
18424
18425 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
18426 VSX_BUILTIN_LD_ELEMREV_V2DF);
18427 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
18428 VSX_BUILTIN_LD_ELEMREV_V2DI);
18429 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
18430 VSX_BUILTIN_LD_ELEMREV_V4SF);
18431 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
18432 VSX_BUILTIN_LD_ELEMREV_V4SI);
18433 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
18434 VSX_BUILTIN_ST_ELEMREV_V2DF);
18435 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
18436 VSX_BUILTIN_ST_ELEMREV_V2DI);
18437 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
18438 VSX_BUILTIN_ST_ELEMREV_V4SF);
18439 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
18440 VSX_BUILTIN_ST_ELEMREV_V4SI);
18441
18442 if (TARGET_P9_VECTOR)
18443 {
18444 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
18445 VSX_BUILTIN_LD_ELEMREV_V8HI);
18446 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
18447 VSX_BUILTIN_LD_ELEMREV_V16QI);
18448 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
18449 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
18450 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
18451 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
18452 }
18453 else
18454 {
18455 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
18456 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
18457 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
18458 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
18459 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
18460 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
18461 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
18462 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
18463 }
18464
18465 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
18466 VSX_BUILTIN_VEC_LD);
18467 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
18468 VSX_BUILTIN_VEC_ST);
18469 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
18470 VSX_BUILTIN_VEC_XL);
18471 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
18472 VSX_BUILTIN_VEC_XST);
18473
18474 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
18475 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
18476 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
18477
18478 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
18479 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
18480 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
18481 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
18482 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
18483 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
18484 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
18485 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
18486 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
18487 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
18488 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
18489 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
18490
18491 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
18492 ALTIVEC_BUILTIN_VEC_ADDE);
18493 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
18494 ALTIVEC_BUILTIN_VEC_ADDEC);
18495 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
18496 ALTIVEC_BUILTIN_VEC_CMPNE);
18497 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
18498 ALTIVEC_BUILTIN_VEC_MUL);
18499
18500 /* Cell builtins. */
18501 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
18502 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
18503 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
18504 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
18505
18506 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
18507 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
18508 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
18509 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
18510
18511 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
18512 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
18513 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
18514 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
18515
18516 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
18517 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
18518 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
18519 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
18520
18521 if (TARGET_P9_VECTOR)
18522 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
18523 P9V_BUILTIN_STXVL);
18524
18525 /* Add the DST variants. */
18526 d = bdesc_dst;
18527 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
18528 {
18529 HOST_WIDE_INT mask = d->mask;
18530
18531 /* It is expected that these dst built-in functions may have
18532 d->icode equal to CODE_FOR_nothing. */
18533 if ((mask & builtin_mask) != mask)
18534 {
18535 if (TARGET_DEBUG_BUILTIN)
18536 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
18537 d->name);
18538 continue;
18539 }
18540 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
18541 }
18542
18543 /* Initialize the predicates. */
18544 d = bdesc_altivec_preds;
18545 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
18546 {
18547 machine_mode mode1;
18548 tree type;
18549 HOST_WIDE_INT mask = d->mask;
18550
18551 if ((mask & builtin_mask) != mask)
18552 {
18553 if (TARGET_DEBUG_BUILTIN)
18554 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
18555 d->name);
18556 continue;
18557 }
18558
18559 if (rs6000_overloaded_builtin_p (d->code))
18560 mode1 = VOIDmode;
18561 else
18562 {
18563 /* Cannot define builtin if the instruction is disabled. */
18564 gcc_assert (d->icode != CODE_FOR_nothing);
18565 mode1 = insn_data[d->icode].operand[1].mode;
18566 }
18567
18568 switch (mode1)
18569 {
18570 case E_VOIDmode:
18571 type = int_ftype_int_opaque_opaque;
18572 break;
18573 case E_V2DImode:
18574 type = int_ftype_int_v2di_v2di;
18575 break;
18576 case E_V4SImode:
18577 type = int_ftype_int_v4si_v4si;
18578 break;
18579 case E_V8HImode:
18580 type = int_ftype_int_v8hi_v8hi;
18581 break;
18582 case E_V16QImode:
18583 type = int_ftype_int_v16qi_v16qi;
18584 break;
18585 case E_V4SFmode:
18586 type = int_ftype_int_v4sf_v4sf;
18587 break;
18588 case E_V2DFmode:
18589 type = int_ftype_int_v2df_v2df;
18590 break;
18591 default:
18592 gcc_unreachable ();
18593 }
18594
18595 def_builtin (d->name, type, d->code);
18596 }
18597
18598 /* Initialize the abs* operators. */
18599 d = bdesc_abs;
18600 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
18601 {
18602 machine_mode mode0;
18603 tree type;
18604 HOST_WIDE_INT mask = d->mask;
18605
18606 if ((mask & builtin_mask) != mask)
18607 {
18608 if (TARGET_DEBUG_BUILTIN)
18609 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
18610 d->name);
18611 continue;
18612 }
18613
18614 /* Cannot define builtin if the instruction is disabled. */
18615 gcc_assert (d->icode != CODE_FOR_nothing);
18616 mode0 = insn_data[d->icode].operand[0].mode;
18617
18618 switch (mode0)
18619 {
18620 case E_V2DImode:
18621 type = v2di_ftype_v2di;
18622 break;
18623 case E_V4SImode:
18624 type = v4si_ftype_v4si;
18625 break;
18626 case E_V8HImode:
18627 type = v8hi_ftype_v8hi;
18628 break;
18629 case E_V16QImode:
18630 type = v16qi_ftype_v16qi;
18631 break;
18632 case E_V4SFmode:
18633 type = v4sf_ftype_v4sf;
18634 break;
18635 case E_V2DFmode:
18636 type = v2df_ftype_v2df;
18637 break;
18638 default:
18639 gcc_unreachable ();
18640 }
18641
18642 def_builtin (d->name, type, d->code);
18643 }
18644
18645 /* Initialize target builtin that implements
18646 targetm.vectorize.builtin_mask_for_load. */
18647
18648 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
18649 v16qi_ftype_long_pcvoid,
18650 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
18651 BUILT_IN_MD, NULL, NULL_TREE);
18652 TREE_READONLY (decl) = 1;
18653 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
18654 altivec_builtin_mask_for_load = decl;
18655
18656 /* Access to the vec_init patterns. */
18657 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
18658 integer_type_node, integer_type_node,
18659 integer_type_node, NULL_TREE);
18660 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
18661
18662 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
18663 short_integer_type_node,
18664 short_integer_type_node,
18665 short_integer_type_node,
18666 short_integer_type_node,
18667 short_integer_type_node,
18668 short_integer_type_node,
18669 short_integer_type_node, NULL_TREE);
18670 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
18671
18672 ftype = build_function_type_list (V16QI_type_node, char_type_node,
18673 char_type_node, char_type_node,
18674 char_type_node, char_type_node,
18675 char_type_node, char_type_node,
18676 char_type_node, char_type_node,
18677 char_type_node, char_type_node,
18678 char_type_node, char_type_node,
18679 char_type_node, char_type_node,
18680 char_type_node, NULL_TREE);
18681 def_builtin ("__builtin_vec_init_v16qi", ftype,
18682 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
18683
18684 ftype = build_function_type_list (V4SF_type_node, float_type_node,
18685 float_type_node, float_type_node,
18686 float_type_node, NULL_TREE);
18687 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
18688
18689 /* VSX builtins. */
18690 ftype = build_function_type_list (V2DF_type_node, double_type_node,
18691 double_type_node, NULL_TREE);
18692 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
18693
18694 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
18695 intDI_type_node, NULL_TREE);
18696 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
18697
18698 /* Access to the vec_set patterns. */
18699 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18700 intSI_type_node,
18701 integer_type_node, NULL_TREE);
18702 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
18703
18704 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18705 intHI_type_node,
18706 integer_type_node, NULL_TREE);
18707 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
18708
18709 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18710 intQI_type_node,
18711 integer_type_node, NULL_TREE);
18712 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
18713
18714 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18715 float_type_node,
18716 integer_type_node, NULL_TREE);
18717 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
18718
18719 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
18720 double_type_node,
18721 integer_type_node, NULL_TREE);
18722 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
18723
18724 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18725 intDI_type_node,
18726 integer_type_node, NULL_TREE);
18727 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
18728
18729 /* Access to the vec_extract patterns. */
18730 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18731 integer_type_node, NULL_TREE);
18732 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
18733
18734 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18735 integer_type_node, NULL_TREE);
18736 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
18737
18738 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18739 integer_type_node, NULL_TREE);
18740 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
18741
18742 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18743 integer_type_node, NULL_TREE);
18744 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
18745
18746 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18747 integer_type_node, NULL_TREE);
18748 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
18749
18750 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
18751 integer_type_node, NULL_TREE);
18752 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
18753
18754
18755 if (V1TI_type_node)
18756 {
18757 tree v1ti_ftype_long_pcvoid
18758 = build_function_type_list (V1TI_type_node,
18759 long_integer_type_node, pcvoid_type_node,
18760 NULL_TREE);
18761 tree void_ftype_v1ti_long_pvoid
18762 = build_function_type_list (void_type_node,
18763 V1TI_type_node, long_integer_type_node,
18764 pvoid_type_node, NULL_TREE);
18765 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
18766 VSX_BUILTIN_LXVD2X_V1TI);
18767 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
18768 VSX_BUILTIN_STXVD2X_V1TI);
18769 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
18770 NULL_TREE, NULL_TREE);
18771 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
18772 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
18773 intTI_type_node,
18774 integer_type_node, NULL_TREE);
18775 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
18776 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
18777 integer_type_node, NULL_TREE);
18778 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
18779 }
18780
18781 }
18782
18783 static void
18784 htm_init_builtins (void)
18785 {
18786 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18787 const struct builtin_description *d;
18788 size_t i;
18789
18790 d = bdesc_htm;
18791 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
18792 {
18793 tree op[MAX_HTM_OPERANDS], type;
18794 HOST_WIDE_INT mask = d->mask;
18795 unsigned attr = rs6000_builtin_info[d->code].attr;
18796 bool void_func = (attr & RS6000_BTC_VOID);
18797 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
18798 int nopnds = 0;
18799 tree gpr_type_node;
18800 tree rettype;
18801 tree argtype;
18802
18803 /* It is expected that these htm built-in functions may have
18804 d->icode equal to CODE_FOR_nothing. */
18805
18806 if (TARGET_32BIT && TARGET_POWERPC64)
18807 gpr_type_node = long_long_unsigned_type_node;
18808 else
18809 gpr_type_node = long_unsigned_type_node;
18810
18811 if (attr & RS6000_BTC_SPR)
18812 {
18813 rettype = gpr_type_node;
18814 argtype = gpr_type_node;
18815 }
18816 else if (d->code == HTM_BUILTIN_TABORTDC
18817 || d->code == HTM_BUILTIN_TABORTDCI)
18818 {
18819 rettype = unsigned_type_node;
18820 argtype = gpr_type_node;
18821 }
18822 else
18823 {
18824 rettype = unsigned_type_node;
18825 argtype = unsigned_type_node;
18826 }
18827
18828 if ((mask & builtin_mask) != mask)
18829 {
18830 if (TARGET_DEBUG_BUILTIN)
18831 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
18832 continue;
18833 }
18834
18835 if (d->name == 0)
18836 {
18837 if (TARGET_DEBUG_BUILTIN)
18838 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
18839 (long unsigned) i);
18840 continue;
18841 }
18842
18843 op[nopnds++] = (void_func) ? void_type_node : rettype;
18844
18845 if (attr_args == RS6000_BTC_UNARY)
18846 op[nopnds++] = argtype;
18847 else if (attr_args == RS6000_BTC_BINARY)
18848 {
18849 op[nopnds++] = argtype;
18850 op[nopnds++] = argtype;
18851 }
18852 else if (attr_args == RS6000_BTC_TERNARY)
18853 {
18854 op[nopnds++] = argtype;
18855 op[nopnds++] = argtype;
18856 op[nopnds++] = argtype;
18857 }
18858
18859 switch (nopnds)
18860 {
18861 case 1:
18862 type = build_function_type_list (op[0], NULL_TREE);
18863 break;
18864 case 2:
18865 type = build_function_type_list (op[0], op[1], NULL_TREE);
18866 break;
18867 case 3:
18868 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
18869 break;
18870 case 4:
18871 type = build_function_type_list (op[0], op[1], op[2], op[3],
18872 NULL_TREE);
18873 break;
18874 default:
18875 gcc_unreachable ();
18876 }
18877
18878 def_builtin (d->name, type, d->code);
18879 }
18880 }
18881
18882 /* Hash function for builtin functions with up to 3 arguments and a return
18883 type. */
18884 hashval_t
18885 builtin_hasher::hash (builtin_hash_struct *bh)
18886 {
18887 unsigned ret = 0;
18888 int i;
18889
18890 for (i = 0; i < 4; i++)
18891 {
18892 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
18893 ret = (ret * 2) + bh->uns_p[i];
18894 }
18895
18896 return ret;
18897 }
18898
18899 /* Compare builtin hash entries H1 and H2 for equivalence. */
18900 bool
18901 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
18902 {
18903 return ((p1->mode[0] == p2->mode[0])
18904 && (p1->mode[1] == p2->mode[1])
18905 && (p1->mode[2] == p2->mode[2])
18906 && (p1->mode[3] == p2->mode[3])
18907 && (p1->uns_p[0] == p2->uns_p[0])
18908 && (p1->uns_p[1] == p2->uns_p[1])
18909 && (p1->uns_p[2] == p2->uns_p[2])
18910 && (p1->uns_p[3] == p2->uns_p[3]));
18911 }
18912
18913 /* Map types for builtin functions with an explicit return type and up to 3
18914 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18915 of the argument. */
18916 static tree
18917 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
18918 machine_mode mode_arg1, machine_mode mode_arg2,
18919 enum rs6000_builtins builtin, const char *name)
18920 {
18921 struct builtin_hash_struct h;
18922 struct builtin_hash_struct *h2;
18923 int num_args = 3;
18924 int i;
18925 tree ret_type = NULL_TREE;
18926 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18927
18928 /* Create builtin_hash_table. */
18929 if (builtin_hash_table == NULL)
18930 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18931
18932 h.type = NULL_TREE;
18933 h.mode[0] = mode_ret;
18934 h.mode[1] = mode_arg0;
18935 h.mode[2] = mode_arg1;
18936 h.mode[3] = mode_arg2;
18937 h.uns_p[0] = 0;
18938 h.uns_p[1] = 0;
18939 h.uns_p[2] = 0;
18940 h.uns_p[3] = 0;
18941
18942 /* If the builtin is a type that produces unsigned results or takes unsigned
18943 arguments, and it is returned as a decl for the vectorizer (such as
18944 widening multiplies, permute), make sure the arguments and return value
18945 are type correct. */
18946 switch (builtin)
18947 {
18948 /* unsigned 1 argument functions. */
18949 case CRYPTO_BUILTIN_VSBOX:
18950 case P8V_BUILTIN_VGBBD:
18951 case MISC_BUILTIN_CDTBCD:
18952 case MISC_BUILTIN_CBCDTD:
18953 h.uns_p[0] = 1;
18954 h.uns_p[1] = 1;
18955 break;
18956
18957 /* unsigned 2 argument functions. */
18958 case ALTIVEC_BUILTIN_VMULEUB:
18959 case ALTIVEC_BUILTIN_VMULEUH:
18960 case ALTIVEC_BUILTIN_VMULOUB:
18961 case ALTIVEC_BUILTIN_VMULOUH:
18962 case CRYPTO_BUILTIN_VCIPHER:
18963 case CRYPTO_BUILTIN_VCIPHERLAST:
18964 case CRYPTO_BUILTIN_VNCIPHER:
18965 case CRYPTO_BUILTIN_VNCIPHERLAST:
18966 case CRYPTO_BUILTIN_VPMSUMB:
18967 case CRYPTO_BUILTIN_VPMSUMH:
18968 case CRYPTO_BUILTIN_VPMSUMW:
18969 case CRYPTO_BUILTIN_VPMSUMD:
18970 case CRYPTO_BUILTIN_VPMSUM:
18971 case MISC_BUILTIN_ADDG6S:
18972 case MISC_BUILTIN_DIVWEU:
18973 case MISC_BUILTIN_DIVWEUO:
18974 case MISC_BUILTIN_DIVDEU:
18975 case MISC_BUILTIN_DIVDEUO:
18976 case VSX_BUILTIN_UDIV_V2DI:
18977 h.uns_p[0] = 1;
18978 h.uns_p[1] = 1;
18979 h.uns_p[2] = 1;
18980 break;
18981
18982 /* unsigned 3 argument functions. */
18983 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18984 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18985 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18986 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18987 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18988 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18989 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18990 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18991 case VSX_BUILTIN_VPERM_16QI_UNS:
18992 case VSX_BUILTIN_VPERM_8HI_UNS:
18993 case VSX_BUILTIN_VPERM_4SI_UNS:
18994 case VSX_BUILTIN_VPERM_2DI_UNS:
18995 case VSX_BUILTIN_XXSEL_16QI_UNS:
18996 case VSX_BUILTIN_XXSEL_8HI_UNS:
18997 case VSX_BUILTIN_XXSEL_4SI_UNS:
18998 case VSX_BUILTIN_XXSEL_2DI_UNS:
18999 case CRYPTO_BUILTIN_VPERMXOR:
19000 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
19001 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
19002 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
19003 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
19004 case CRYPTO_BUILTIN_VSHASIGMAW:
19005 case CRYPTO_BUILTIN_VSHASIGMAD:
19006 case CRYPTO_BUILTIN_VSHASIGMA:
19007 h.uns_p[0] = 1;
19008 h.uns_p[1] = 1;
19009 h.uns_p[2] = 1;
19010 h.uns_p[3] = 1;
19011 break;
19012
19013 /* signed permute functions with unsigned char mask. */
19014 case ALTIVEC_BUILTIN_VPERM_16QI:
19015 case ALTIVEC_BUILTIN_VPERM_8HI:
19016 case ALTIVEC_BUILTIN_VPERM_4SI:
19017 case ALTIVEC_BUILTIN_VPERM_4SF:
19018 case ALTIVEC_BUILTIN_VPERM_2DI:
19019 case ALTIVEC_BUILTIN_VPERM_2DF:
19020 case VSX_BUILTIN_VPERM_16QI:
19021 case VSX_BUILTIN_VPERM_8HI:
19022 case VSX_BUILTIN_VPERM_4SI:
19023 case VSX_BUILTIN_VPERM_4SF:
19024 case VSX_BUILTIN_VPERM_2DI:
19025 case VSX_BUILTIN_VPERM_2DF:
19026 h.uns_p[3] = 1;
19027 break;
19028
19029 /* unsigned args, signed return. */
19030 case VSX_BUILTIN_XVCVUXDSP:
19031 case VSX_BUILTIN_XVCVUXDDP_UNS:
19032 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
19033 h.uns_p[1] = 1;
19034 break;
19035
19036 /* signed args, unsigned return. */
19037 case VSX_BUILTIN_XVCVDPUXDS_UNS:
19038 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
19039 case MISC_BUILTIN_UNPACK_TD:
19040 case MISC_BUILTIN_UNPACK_V1TI:
19041 h.uns_p[0] = 1;
19042 break;
19043
19044 /* unsigned arguments for 128-bit pack instructions. */
19045 case MISC_BUILTIN_PACK_TD:
19046 case MISC_BUILTIN_PACK_V1TI:
19047 h.uns_p[1] = 1;
19048 h.uns_p[2] = 1;
19049 break;
19050
19051 default:
19052 break;
19053 }
19054
19055 /* Figure out how many args are present. */
19056 while (num_args > 0 && h.mode[num_args] == VOIDmode)
19057 num_args--;
19058
19059 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
19060 if (!ret_type && h.uns_p[0])
19061 ret_type = builtin_mode_to_type[h.mode[0]][0];
19062
19063 if (!ret_type)
19064 fatal_error (input_location,
19065 "internal error: builtin function %s had an unexpected "
19066 "return type %s", name, GET_MODE_NAME (h.mode[0]));
19067
19068 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
19069 arg_type[i] = NULL_TREE;
19070
19071 for (i = 0; i < num_args; i++)
19072 {
19073 int m = (int) h.mode[i+1];
19074 int uns_p = h.uns_p[i+1];
19075
19076 arg_type[i] = builtin_mode_to_type[m][uns_p];
19077 if (!arg_type[i] && uns_p)
19078 arg_type[i] = builtin_mode_to_type[m][0];
19079
19080 if (!arg_type[i])
19081 fatal_error (input_location,
19082 "internal error: builtin function %s, argument %d "
19083 "had unexpected argument type %s", name, i,
19084 GET_MODE_NAME (m));
19085 }
19086
19087 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
19088 if (*found == NULL)
19089 {
19090 h2 = ggc_alloc<builtin_hash_struct> ();
19091 *h2 = h;
19092 *found = h2;
19093
19094 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
19095 arg_type[2], NULL_TREE);
19096 }
19097
19098 return (*found)->type;
19099 }
19100
19101 static void
19102 rs6000_common_init_builtins (void)
19103 {
19104 const struct builtin_description *d;
19105 size_t i;
19106
19107 tree opaque_ftype_opaque = NULL_TREE;
19108 tree opaque_ftype_opaque_opaque = NULL_TREE;
19109 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
19110 tree v2si_ftype = NULL_TREE;
19111 tree v2si_ftype_qi = NULL_TREE;
19112 tree v2si_ftype_v2si_qi = NULL_TREE;
19113 tree v2si_ftype_int_qi = NULL_TREE;
19114 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
19115
19116 if (!TARGET_PAIRED_FLOAT)
19117 {
19118 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
19119 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
19120 }
19121
19122 /* Paired and SPE builtins are only available if you build a compiler with
19123 the appropriate options, so only create those builtins with the
19124 appropriate compiler option. Create Altivec and VSX builtins on machines
19125 with at least the general purpose extensions (970 and newer) to allow the
19126 use of the target attribute.. */
19127
19128 if (TARGET_EXTRA_BUILTINS)
19129 builtin_mask |= RS6000_BTM_COMMON;
19130
19131 /* Add the ternary operators. */
19132 d = bdesc_3arg;
19133 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
19134 {
19135 tree type;
19136 HOST_WIDE_INT mask = d->mask;
19137
19138 if ((mask & builtin_mask) != mask)
19139 {
19140 if (TARGET_DEBUG_BUILTIN)
19141 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
19142 continue;
19143 }
19144
19145 if (rs6000_overloaded_builtin_p (d->code))
19146 {
19147 if (! (type = opaque_ftype_opaque_opaque_opaque))
19148 type = opaque_ftype_opaque_opaque_opaque
19149 = build_function_type_list (opaque_V4SI_type_node,
19150 opaque_V4SI_type_node,
19151 opaque_V4SI_type_node,
19152 opaque_V4SI_type_node,
19153 NULL_TREE);
19154 }
19155 else
19156 {
19157 enum insn_code icode = d->icode;
19158 if (d->name == 0)
19159 {
19160 if (TARGET_DEBUG_BUILTIN)
19161 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
19162 (long unsigned)i);
19163
19164 continue;
19165 }
19166
19167 if (icode == CODE_FOR_nothing)
19168 {
19169 if (TARGET_DEBUG_BUILTIN)
19170 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
19171 d->name);
19172
19173 continue;
19174 }
19175
19176 type = builtin_function_type (insn_data[icode].operand[0].mode,
19177 insn_data[icode].operand[1].mode,
19178 insn_data[icode].operand[2].mode,
19179 insn_data[icode].operand[3].mode,
19180 d->code, d->name);
19181 }
19182
19183 def_builtin (d->name, type, d->code);
19184 }
19185
19186 /* Add the binary operators. */
19187 d = bdesc_2arg;
19188 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19189 {
19190 machine_mode mode0, mode1, mode2;
19191 tree type;
19192 HOST_WIDE_INT mask = d->mask;
19193
19194 if ((mask & builtin_mask) != mask)
19195 {
19196 if (TARGET_DEBUG_BUILTIN)
19197 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
19198 continue;
19199 }
19200
19201 if (rs6000_overloaded_builtin_p (d->code))
19202 {
19203 if (! (type = opaque_ftype_opaque_opaque))
19204 type = opaque_ftype_opaque_opaque
19205 = build_function_type_list (opaque_V4SI_type_node,
19206 opaque_V4SI_type_node,
19207 opaque_V4SI_type_node,
19208 NULL_TREE);
19209 }
19210 else
19211 {
19212 enum insn_code icode = d->icode;
19213 if (d->name == 0)
19214 {
19215 if (TARGET_DEBUG_BUILTIN)
19216 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
19217 (long unsigned)i);
19218
19219 continue;
19220 }
19221
19222 if (icode == CODE_FOR_nothing)
19223 {
19224 if (TARGET_DEBUG_BUILTIN)
19225 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
19226 d->name);
19227
19228 continue;
19229 }
19230
19231 mode0 = insn_data[icode].operand[0].mode;
19232 mode1 = insn_data[icode].operand[1].mode;
19233 mode2 = insn_data[icode].operand[2].mode;
19234
19235 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
19236 {
19237 if (! (type = v2si_ftype_v2si_qi))
19238 type = v2si_ftype_v2si_qi
19239 = build_function_type_list (opaque_V2SI_type_node,
19240 opaque_V2SI_type_node,
19241 char_type_node,
19242 NULL_TREE);
19243 }
19244
19245 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
19246 && mode2 == QImode)
19247 {
19248 if (! (type = v2si_ftype_int_qi))
19249 type = v2si_ftype_int_qi
19250 = build_function_type_list (opaque_V2SI_type_node,
19251 integer_type_node,
19252 char_type_node,
19253 NULL_TREE);
19254 }
19255
19256 else
19257 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
19258 d->code, d->name);
19259 }
19260
19261 def_builtin (d->name, type, d->code);
19262 }
19263
19264 /* Add the simple unary operators. */
19265 d = bdesc_1arg;
19266 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19267 {
19268 machine_mode mode0, mode1;
19269 tree type;
19270 HOST_WIDE_INT mask = d->mask;
19271
19272 if ((mask & builtin_mask) != mask)
19273 {
19274 if (TARGET_DEBUG_BUILTIN)
19275 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
19276 continue;
19277 }
19278
19279 if (rs6000_overloaded_builtin_p (d->code))
19280 {
19281 if (! (type = opaque_ftype_opaque))
19282 type = opaque_ftype_opaque
19283 = build_function_type_list (opaque_V4SI_type_node,
19284 opaque_V4SI_type_node,
19285 NULL_TREE);
19286 }
19287 else
19288 {
19289 enum insn_code icode = d->icode;
19290 if (d->name == 0)
19291 {
19292 if (TARGET_DEBUG_BUILTIN)
19293 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
19294 (long unsigned)i);
19295
19296 continue;
19297 }
19298
19299 if (icode == CODE_FOR_nothing)
19300 {
19301 if (TARGET_DEBUG_BUILTIN)
19302 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
19303 d->name);
19304
19305 continue;
19306 }
19307
19308 mode0 = insn_data[icode].operand[0].mode;
19309 mode1 = insn_data[icode].operand[1].mode;
19310
19311 if (mode0 == V2SImode && mode1 == QImode)
19312 {
19313 if (! (type = v2si_ftype_qi))
19314 type = v2si_ftype_qi
19315 = build_function_type_list (opaque_V2SI_type_node,
19316 char_type_node,
19317 NULL_TREE);
19318 }
19319
19320 else
19321 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
19322 d->code, d->name);
19323 }
19324
19325 def_builtin (d->name, type, d->code);
19326 }
19327
19328 /* Add the simple no-argument operators. */
19329 d = bdesc_0arg;
19330 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
19331 {
19332 machine_mode mode0;
19333 tree type;
19334 HOST_WIDE_INT mask = d->mask;
19335
19336 if ((mask & builtin_mask) != mask)
19337 {
19338 if (TARGET_DEBUG_BUILTIN)
19339 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
19340 continue;
19341 }
19342 if (rs6000_overloaded_builtin_p (d->code))
19343 {
19344 if (!opaque_ftype_opaque)
19345 opaque_ftype_opaque
19346 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
19347 type = opaque_ftype_opaque;
19348 }
19349 else
19350 {
19351 enum insn_code icode = d->icode;
19352 if (d->name == 0)
19353 {
19354 if (TARGET_DEBUG_BUILTIN)
19355 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
19356 (long unsigned) i);
19357 continue;
19358 }
19359 if (icode == CODE_FOR_nothing)
19360 {
19361 if (TARGET_DEBUG_BUILTIN)
19362 fprintf (stderr,
19363 "rs6000_builtin, skip no-argument %s (no code)\n",
19364 d->name);
19365 continue;
19366 }
19367 mode0 = insn_data[icode].operand[0].mode;
19368 if (mode0 == V2SImode)
19369 {
19370 /* code for SPE */
19371 if (! (type = v2si_ftype))
19372 {
19373 v2si_ftype
19374 = build_function_type_list (opaque_V2SI_type_node,
19375 NULL_TREE);
19376 type = v2si_ftype;
19377 }
19378 }
19379 else
19380 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
19381 d->code, d->name);
19382 }
19383 def_builtin (d->name, type, d->code);
19384 }
19385 }
19386
19387 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
19388 static void
19389 init_float128_ibm (machine_mode mode)
19390 {
19391 if (!TARGET_XL_COMPAT)
19392 {
19393 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
19394 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
19395 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
19396 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
19397
19398 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
19399 {
19400 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
19401 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
19402 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
19403 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
19404 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
19405 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
19406 set_optab_libfunc (le_optab, mode, "__gcc_qle");
19407
19408 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
19409 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
19410 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
19411 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
19412 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
19413 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
19414 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
19415 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
19416 }
19417
19418 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
19419 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
19420 }
19421 else
19422 {
19423 set_optab_libfunc (add_optab, mode, "_xlqadd");
19424 set_optab_libfunc (sub_optab, mode, "_xlqsub");
19425 set_optab_libfunc (smul_optab, mode, "_xlqmul");
19426 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
19427 }
19428
19429 /* Add various conversions for IFmode to use the traditional TFmode
19430 names. */
19431 if (mode == IFmode)
19432 {
19433 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
19434 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
19435 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
19436 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
19437 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
19438 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
19439
19440 if (TARGET_POWERPC64)
19441 {
19442 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
19443 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
19444 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
19445 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
19446 }
19447 }
19448 }
19449
19450 /* Set up IEEE 128-bit floating point routines. Use different names if the
19451 arguments can be passed in a vector register. The historical PowerPC
19452 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
19453 continue to use that if we aren't using vector registers to pass IEEE
19454 128-bit floating point. */
19455
19456 static void
19457 init_float128_ieee (machine_mode mode)
19458 {
19459 if (FLOAT128_VECTOR_P (mode))
19460 {
19461 set_optab_libfunc (add_optab, mode, "__addkf3");
19462 set_optab_libfunc (sub_optab, mode, "__subkf3");
19463 set_optab_libfunc (neg_optab, mode, "__negkf2");
19464 set_optab_libfunc (smul_optab, mode, "__mulkf3");
19465 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
19466 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
19467 set_optab_libfunc (abs_optab, mode, "__abstkf2");
19468
19469 set_optab_libfunc (eq_optab, mode, "__eqkf2");
19470 set_optab_libfunc (ne_optab, mode, "__nekf2");
19471 set_optab_libfunc (gt_optab, mode, "__gtkf2");
19472 set_optab_libfunc (ge_optab, mode, "__gekf2");
19473 set_optab_libfunc (lt_optab, mode, "__ltkf2");
19474 set_optab_libfunc (le_optab, mode, "__lekf2");
19475 set_optab_libfunc (unord_optab, mode, "__unordkf2");
19476
19477 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
19478 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
19479 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
19480 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
19481
19482 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
19483 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19484 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
19485
19486 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
19487 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
19488 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
19489
19490 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
19491 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
19492 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
19493 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
19494 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
19495 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
19496
19497 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
19498 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
19499 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
19500 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
19501
19502 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
19503 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
19504 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
19505 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
19506
19507 if (TARGET_POWERPC64)
19508 {
19509 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
19510 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
19511 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
19512 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
19513 }
19514 }
19515
19516 else
19517 {
19518 set_optab_libfunc (add_optab, mode, "_q_add");
19519 set_optab_libfunc (sub_optab, mode, "_q_sub");
19520 set_optab_libfunc (neg_optab, mode, "_q_neg");
19521 set_optab_libfunc (smul_optab, mode, "_q_mul");
19522 set_optab_libfunc (sdiv_optab, mode, "_q_div");
19523 if (TARGET_PPC_GPOPT)
19524 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
19525
19526 set_optab_libfunc (eq_optab, mode, "_q_feq");
19527 set_optab_libfunc (ne_optab, mode, "_q_fne");
19528 set_optab_libfunc (gt_optab, mode, "_q_fgt");
19529 set_optab_libfunc (ge_optab, mode, "_q_fge");
19530 set_optab_libfunc (lt_optab, mode, "_q_flt");
19531 set_optab_libfunc (le_optab, mode, "_q_fle");
19532
19533 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
19534 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
19535 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
19536 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
19537 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
19538 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
19539 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
19540 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
19541 }
19542 }
19543
19544 static void
19545 rs6000_init_libfuncs (void)
19546 {
19547 /* __float128 support. */
19548 if (TARGET_FLOAT128_TYPE)
19549 {
19550 init_float128_ibm (IFmode);
19551 init_float128_ieee (KFmode);
19552 }
19553
19554 /* AIX/Darwin/64-bit Linux quad floating point routines. */
19555 if (TARGET_LONG_DOUBLE_128)
19556 {
19557 if (!TARGET_IEEEQUAD)
19558 init_float128_ibm (TFmode);
19559
19560 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
19561 else
19562 init_float128_ieee (TFmode);
19563 }
19564 }
19565
19566 \f
19567 /* Expand a block clear operation, and return 1 if successful. Return 0
19568 if we should let the compiler generate normal code.
19569
19570 operands[0] is the destination
19571 operands[1] is the length
19572 operands[3] is the alignment */
19573
19574 int
19575 expand_block_clear (rtx operands[])
19576 {
19577 rtx orig_dest = operands[0];
19578 rtx bytes_rtx = operands[1];
19579 rtx align_rtx = operands[3];
19580 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
19581 HOST_WIDE_INT align;
19582 HOST_WIDE_INT bytes;
19583 int offset;
19584 int clear_bytes;
19585 int clear_step;
19586
19587 /* If this is not a fixed size move, just call memcpy */
19588 if (! constp)
19589 return 0;
19590
19591 /* This must be a fixed size alignment */
19592 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
19593 align = INTVAL (align_rtx) * BITS_PER_UNIT;
19594
19595 /* Anything to clear? */
19596 bytes = INTVAL (bytes_rtx);
19597 if (bytes <= 0)
19598 return 1;
19599
19600 /* Use the builtin memset after a point, to avoid huge code bloat.
19601 When optimize_size, avoid any significant code bloat; calling
19602 memset is about 4 instructions, so allow for one instruction to
19603 load zero and three to do clearing. */
19604 if (TARGET_ALTIVEC && align >= 128)
19605 clear_step = 16;
19606 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
19607 clear_step = 8;
19608 else if (TARGET_SPE && align >= 64)
19609 clear_step = 8;
19610 else
19611 clear_step = 4;
19612
19613 if (optimize_size && bytes > 3 * clear_step)
19614 return 0;
19615 if (! optimize_size && bytes > 8 * clear_step)
19616 return 0;
19617
19618 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
19619 {
19620 machine_mode mode = BLKmode;
19621 rtx dest;
19622
19623 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
19624 {
19625 clear_bytes = 16;
19626 mode = V4SImode;
19627 }
19628 else if (bytes >= 8 && TARGET_SPE && align >= 64)
19629 {
19630 clear_bytes = 8;
19631 mode = V2SImode;
19632 }
19633 else if (bytes >= 8 && TARGET_POWERPC64
19634 && (align >= 64 || !STRICT_ALIGNMENT))
19635 {
19636 clear_bytes = 8;
19637 mode = DImode;
19638 if (offset == 0 && align < 64)
19639 {
19640 rtx addr;
19641
19642 /* If the address form is reg+offset with offset not a
19643 multiple of four, reload into reg indirect form here
19644 rather than waiting for reload. This way we get one
19645 reload, not one per store. */
19646 addr = XEXP (orig_dest, 0);
19647 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
19648 && GET_CODE (XEXP (addr, 1)) == CONST_INT
19649 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
19650 {
19651 addr = copy_addr_to_reg (addr);
19652 orig_dest = replace_equiv_address (orig_dest, addr);
19653 }
19654 }
19655 }
19656 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
19657 { /* move 4 bytes */
19658 clear_bytes = 4;
19659 mode = SImode;
19660 }
19661 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
19662 { /* move 2 bytes */
19663 clear_bytes = 2;
19664 mode = HImode;
19665 }
19666 else /* move 1 byte at a time */
19667 {
19668 clear_bytes = 1;
19669 mode = QImode;
19670 }
19671
19672 dest = adjust_address (orig_dest, mode, offset);
19673
19674 emit_move_insn (dest, CONST0_RTX (mode));
19675 }
19676
19677 return 1;
19678 }
19679
19680 /* Emit a potentially record-form instruction, setting DST from SRC.
19681 If DOT is 0, that is all; otherwise, set CCREG to the result of the
19682 signed comparison of DST with zero. If DOT is 1, the generated RTL
19683 doesn't care about the DST result; if DOT is 2, it does. If CCREG
19684 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
19685 a separate COMPARE. */
19686
19687 static void
19688 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
19689 {
19690 if (dot == 0)
19691 {
19692 emit_move_insn (dst, src);
19693 return;
19694 }
19695
19696 if (cc_reg_not_cr0_operand (ccreg, CCmode))
19697 {
19698 emit_move_insn (dst, src);
19699 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
19700 return;
19701 }
19702
19703 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
19704 if (dot == 1)
19705 {
19706 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
19707 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
19708 }
19709 else
19710 {
19711 rtx set = gen_rtx_SET (dst, src);
19712 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
19713 }
19714 }
19715
19716 /* Figure out the correct instructions to generate to load data for
19717 block compare. MODE is used for the read from memory, and
19718 data is zero extended if REG is wider than MODE. If LE code
19719 is being generated, bswap loads are used.
19720
19721 REG is the destination register to move the data into.
19722 MEM is the memory block being read.
19723 MODE is the mode of memory to use for the read. */
19724 static void
19725 do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
19726 {
19727 switch (GET_MODE (reg))
19728 {
19729 case E_DImode:
19730 switch (mode)
19731 {
19732 case E_QImode:
19733 emit_insn (gen_zero_extendqidi2 (reg, mem));
19734 break;
19735 case E_HImode:
19736 {
19737 rtx src = mem;
19738 if (!BYTES_BIG_ENDIAN)
19739 {
19740 src = gen_reg_rtx (HImode);
19741 emit_insn (gen_bswaphi2 (src, mem));
19742 }
19743 emit_insn (gen_zero_extendhidi2 (reg, src));
19744 break;
19745 }
19746 case E_SImode:
19747 {
19748 rtx src = mem;
19749 if (!BYTES_BIG_ENDIAN)
19750 {
19751 src = gen_reg_rtx (SImode);
19752 emit_insn (gen_bswapsi2 (src, mem));
19753 }
19754 emit_insn (gen_zero_extendsidi2 (reg, src));
19755 }
19756 break;
19757 case E_DImode:
19758 if (!BYTES_BIG_ENDIAN)
19759 emit_insn (gen_bswapdi2 (reg, mem));
19760 else
19761 emit_insn (gen_movdi (reg, mem));
19762 break;
19763 default:
19764 gcc_unreachable ();
19765 }
19766 break;
19767
19768 case E_SImode:
19769 switch (mode)
19770 {
19771 case E_QImode:
19772 emit_insn (gen_zero_extendqisi2 (reg, mem));
19773 break;
19774 case E_HImode:
19775 {
19776 rtx src = mem;
19777 if (!BYTES_BIG_ENDIAN)
19778 {
19779 src = gen_reg_rtx (HImode);
19780 emit_insn (gen_bswaphi2 (src, mem));
19781 }
19782 emit_insn (gen_zero_extendhisi2 (reg, src));
19783 break;
19784 }
19785 case E_SImode:
19786 if (!BYTES_BIG_ENDIAN)
19787 emit_insn (gen_bswapsi2 (reg, mem));
19788 else
19789 emit_insn (gen_movsi (reg, mem));
19790 break;
19791 case E_DImode:
19792 /* DImode is larger than the destination reg so is not expected. */
19793 gcc_unreachable ();
19794 break;
19795 default:
19796 gcc_unreachable ();
19797 }
19798 break;
19799 default:
19800 gcc_unreachable ();
19801 break;
19802 }
19803 }
19804
19805 /* Select the mode to be used for reading the next chunk of bytes
19806 in the compare.
19807
19808 OFFSET is the current read offset from the beginning of the block.
19809 BYTES is the number of bytes remaining to be read.
19810 ALIGN is the minimum alignment of the memory blocks being compared in bytes.
19811 WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
19812 the largest allowable mode. */
19813 static machine_mode
19814 select_block_compare_mode (unsigned HOST_WIDE_INT offset,
19815 unsigned HOST_WIDE_INT bytes,
19816 unsigned HOST_WIDE_INT align, bool word_mode_ok)
19817 {
19818 /* First see if we can do a whole load unit
19819 as that will be more efficient than a larger load + shift. */
19820
19821 /* If big, use biggest chunk.
19822 If exactly chunk size, use that size.
19823 If remainder can be done in one piece with shifting, do that.
19824 Do largest chunk possible without violating alignment rules. */
19825
19826 /* The most we can read without potential page crossing. */
19827 unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
19828
19829 if (word_mode_ok && bytes >= UNITS_PER_WORD)
19830 return word_mode;
19831 else if (bytes == GET_MODE_SIZE (SImode))
19832 return SImode;
19833 else if (bytes == GET_MODE_SIZE (HImode))
19834 return HImode;
19835 else if (bytes == GET_MODE_SIZE (QImode))
19836 return QImode;
19837 else if (bytes < GET_MODE_SIZE (SImode)
19838 && offset >= GET_MODE_SIZE (SImode) - bytes)
19839 /* This matches the case were we have SImode and 3 bytes
19840 and offset >= 1 and permits us to move back one and overlap
19841 with the previous read, thus avoiding having to shift
19842 unwanted bytes off of the input. */
19843 return SImode;
19844 else if (word_mode_ok && bytes < UNITS_PER_WORD
19845 && offset >= UNITS_PER_WORD-bytes)
19846 /* Similarly, if we can use DImode it will get matched here and
19847 can do an overlapping read that ends at the end of the block. */
19848 return word_mode;
19849 else if (word_mode_ok && maxread >= UNITS_PER_WORD)
19850 /* It is safe to do all remaining in one load of largest size,
19851 possibly with a shift to get rid of unwanted bytes. */
19852 return word_mode;
19853 else if (maxread >= GET_MODE_SIZE (SImode))
19854 /* It is safe to do all remaining in one SImode load,
19855 possibly with a shift to get rid of unwanted bytes. */
19856 return SImode;
19857 else if (bytes > GET_MODE_SIZE (SImode))
19858 return SImode;
19859 else if (bytes > GET_MODE_SIZE (HImode))
19860 return HImode;
19861
19862 /* final fallback is do one byte */
19863 return QImode;
19864 }
19865
19866 /* Compute the alignment of pointer+OFFSET where the original alignment
19867 of pointer was BASE_ALIGN. */
19868 static unsigned HOST_WIDE_INT
19869 compute_current_alignment (unsigned HOST_WIDE_INT base_align,
19870 unsigned HOST_WIDE_INT offset)
19871 {
19872 if (offset == 0)
19873 return base_align;
19874 return min (base_align, offset & -offset);
19875 }
19876
19877 /* Expand a block compare operation, and return true if successful.
19878 Return false if we should let the compiler generate normal code,
19879 probably a memcmp call.
19880
19881 OPERANDS[0] is the target (result).
19882 OPERANDS[1] is the first source.
19883 OPERANDS[2] is the second source.
19884 OPERANDS[3] is the length.
19885 OPERANDS[4] is the alignment. */
19886 bool
19887 expand_block_compare (rtx operands[])
19888 {
19889 rtx target = operands[0];
19890 rtx orig_src1 = operands[1];
19891 rtx orig_src2 = operands[2];
19892 rtx bytes_rtx = operands[3];
19893 rtx align_rtx = operands[4];
19894 HOST_WIDE_INT cmp_bytes = 0;
19895 rtx src1 = orig_src1;
19896 rtx src2 = orig_src2;
19897
19898 /* This case is complicated to handle because the subtract
19899 with carry instructions do not generate the 64-bit
19900 carry and so we must emit code to calculate it ourselves.
19901 We choose not to implement this yet. */
19902 if (TARGET_32BIT && TARGET_POWERPC64)
19903 return false;
19904
19905 /* If this is not a fixed size compare, just call memcmp. */
19906 if (!CONST_INT_P (bytes_rtx))
19907 return false;
19908
19909 /* This must be a fixed size alignment. */
19910 if (!CONST_INT_P (align_rtx))
19911 return false;
19912
19913 unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
19914
19915 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
19916 if (SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src1))
19917 || SLOW_UNALIGNED_ACCESS (word_mode, MEM_ALIGN (orig_src2)))
19918 return false;
19919
19920 gcc_assert (GET_MODE (target) == SImode);
19921
19922 /* Anything to move? */
19923 unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx);
19924 if (bytes == 0)
19925 return true;
19926
19927 /* The code generated for p7 and older is not faster than glibc
19928 memcmp if alignment is small and length is not short, so bail
19929 out to avoid those conditions. */
19930 if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
19931 && ((base_align == 1 && bytes > 16)
19932 || (base_align == 2 && bytes > 32)))
19933 return false;
19934
19935 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
19936 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
19937 /* P7/P8 code uses cond for subfc. but P9 uses
19938 it for cmpld which needs CCUNSmode. */
19939 rtx cond;
19940 if (TARGET_P9_MISC)
19941 cond = gen_reg_rtx (CCUNSmode);
19942 else
19943 cond = gen_reg_rtx (CCmode);
19944
19945 /* If we have an LE target without ldbrx and word_mode is DImode,
19946 then we must avoid using word_mode. */
19947 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
19948 && word_mode == DImode);
19949
19950 /* Strategy phase. How many ops will this take and should we expand it? */
19951
19952 unsigned HOST_WIDE_INT offset = 0;
19953 machine_mode load_mode =
19954 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
19955 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
19956
19957 /* We don't want to generate too much code. */
19958 unsigned HOST_WIDE_INT max_bytes =
19959 load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit;
19960 if (!IN_RANGE (bytes, 1, max_bytes))
19961 return false;
19962
19963 bool generate_6432_conversion = false;
19964 rtx convert_label = NULL;
19965 rtx final_label = NULL;
19966
19967 /* Example of generated code for 18 bytes aligned 1 byte.
19968 Compiled with -fno-reorder-blocks for clarity.
19969 ldbrx 10,31,8
19970 ldbrx 9,7,8
19971 subfc. 9,9,10
19972 bne 0,.L6487
19973 addi 9,12,8
19974 addi 5,11,8
19975 ldbrx 10,0,9
19976 ldbrx 9,0,5
19977 subfc. 9,9,10
19978 bne 0,.L6487
19979 addi 9,12,16
19980 lhbrx 10,0,9
19981 addi 9,11,16
19982 lhbrx 9,0,9
19983 subf 9,9,10
19984 b .L6488
19985 .p2align 4,,15
19986 .L6487: #convert_label
19987 popcntd 9,9
19988 subfe 10,10,10
19989 or 9,9,10
19990 .L6488: #final_label
19991 extsw 10,9
19992
19993 We start off with DImode for two blocks that jump to the DI->SI conversion
19994 if the difference is found there, then a final block of HImode that skips
19995 the DI->SI conversion. */
19996
19997 while (bytes > 0)
19998 {
19999 unsigned int align = compute_current_alignment (base_align, offset);
20000 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20001 load_mode = select_block_compare_mode (offset, bytes, align,
20002 word_mode_ok);
20003 else
20004 load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
20005 load_mode_size = GET_MODE_SIZE (load_mode);
20006 if (bytes >= load_mode_size)
20007 cmp_bytes = load_mode_size;
20008 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20009 {
20010 /* Move this load back so it doesn't go past the end.
20011 P8/P9 can do this efficiently. */
20012 unsigned int extra_bytes = load_mode_size - bytes;
20013 cmp_bytes = bytes;
20014 if (extra_bytes < offset)
20015 {
20016 offset -= extra_bytes;
20017 cmp_bytes = load_mode_size;
20018 bytes = cmp_bytes;
20019 }
20020 }
20021 else
20022 /* P7 and earlier can't do the overlapping load trick fast,
20023 so this forces a non-overlapping load and a shift to get
20024 rid of the extra bytes. */
20025 cmp_bytes = bytes;
20026
20027 src1 = adjust_address (orig_src1, load_mode, offset);
20028 src2 = adjust_address (orig_src2, load_mode, offset);
20029
20030 if (!REG_P (XEXP (src1, 0)))
20031 {
20032 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20033 src1 = replace_equiv_address (src1, src1_reg);
20034 }
20035 set_mem_size (src1, cmp_bytes);
20036
20037 if (!REG_P (XEXP (src2, 0)))
20038 {
20039 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20040 src2 = replace_equiv_address (src2, src2_reg);
20041 }
20042 set_mem_size (src2, cmp_bytes);
20043
20044 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20045 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20046
20047 if (cmp_bytes < load_mode_size)
20048 {
20049 /* Shift unneeded bytes off. */
20050 rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes));
20051 if (word_mode == DImode)
20052 {
20053 emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh));
20054 emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh));
20055 }
20056 else
20057 {
20058 emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20059 emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20060 }
20061 }
20062
20063 int remain = bytes - cmp_bytes;
20064 if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode))
20065 {
20066 /* Target is larger than load size so we don't need to
20067 reduce result size. */
20068
20069 /* We previously did a block that need 64->32 conversion but
20070 the current block does not, so a label is needed to jump
20071 to the end. */
20072 if (generate_6432_conversion && !final_label)
20073 final_label = gen_label_rtx ();
20074
20075 if (remain > 0)
20076 {
20077 /* This is not the last block, branch to the end if the result
20078 of this subtract is not zero. */
20079 if (!final_label)
20080 final_label = gen_label_rtx ();
20081 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20082 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20083 rtx cr = gen_reg_rtx (CCmode);
20084 rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr);
20085 emit_insn (gen_movsi (target,
20086 gen_lowpart (SImode, tmp_reg_src2)));
20087 rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
20088 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20089 fin_ref, pc_rtx);
20090 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20091 JUMP_LABEL (j) = final_label;
20092 LABEL_NUSES (final_label) += 1;
20093 }
20094 else
20095 {
20096 if (word_mode == DImode)
20097 {
20098 emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1,
20099 tmp_reg_src2));
20100 emit_insn (gen_movsi (target,
20101 gen_lowpart (SImode, tmp_reg_src2)));
20102 }
20103 else
20104 emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2));
20105
20106 if (final_label)
20107 {
20108 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20109 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20110 JUMP_LABEL(j) = final_label;
20111 LABEL_NUSES (final_label) += 1;
20112 emit_barrier ();
20113 }
20114 }
20115 }
20116 else
20117 {
20118 /* Do we need a 64->32 conversion block? We need the 64->32
20119 conversion even if target size == load_mode size because
20120 the subtract generates one extra bit. */
20121 generate_6432_conversion = true;
20122
20123 if (remain > 0)
20124 {
20125 if (!convert_label)
20126 convert_label = gen_label_rtx ();
20127
20128 /* Compare to zero and branch to convert_label if not zero. */
20129 rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label);
20130 if (TARGET_P9_MISC)
20131 {
20132 /* Generate a compare, and convert with a setb later. */
20133 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20134 tmp_reg_src2);
20135 emit_insn (gen_rtx_SET (cond, cmp));
20136 }
20137 else
20138 /* Generate a subfc. and use the longer
20139 sequence for conversion. */
20140 if (TARGET_64BIT)
20141 emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20142 tmp_reg_src1, cond));
20143 else
20144 emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2,
20145 tmp_reg_src1, cond));
20146 rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20147 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
20148 cvt_ref, pc_rtx);
20149 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20150 JUMP_LABEL(j) = convert_label;
20151 LABEL_NUSES (convert_label) += 1;
20152 }
20153 else
20154 {
20155 /* Just do the subtract/compare. Since this is the last block
20156 the convert code will be generated immediately following. */
20157 if (TARGET_P9_MISC)
20158 {
20159 rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1,
20160 tmp_reg_src2);
20161 emit_insn (gen_rtx_SET (cond, cmp));
20162 }
20163 else
20164 if (TARGET_64BIT)
20165 emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2,
20166 tmp_reg_src1));
20167 else
20168 emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2,
20169 tmp_reg_src1));
20170 }
20171 }
20172
20173 offset += cmp_bytes;
20174 bytes -= cmp_bytes;
20175 }
20176
20177 if (generate_6432_conversion)
20178 {
20179 if (convert_label)
20180 emit_label (convert_label);
20181
20182 /* We need to produce DI result from sub, then convert to target SI
20183 while maintaining <0 / ==0 / >0 properties. This sequence works:
20184 subfc L,A,B
20185 subfe H,H,H
20186 popcntd L,L
20187 rldimi L,H,6,0
20188
20189 This is an alternate one Segher cooked up if somebody
20190 wants to expand this for something that doesn't have popcntd:
20191 subfc L,a,b
20192 subfe H,x,x
20193 addic t,L,-1
20194 subfe v,t,L
20195 or z,v,H
20196
20197 And finally, p9 can just do this:
20198 cmpld A,B
20199 setb r */
20200
20201 if (TARGET_P9_MISC)
20202 {
20203 emit_insn (gen_setb_unsigned (target, cond));
20204 }
20205 else
20206 {
20207 if (TARGET_64BIT)
20208 {
20209 rtx tmp_reg_ca = gen_reg_rtx (DImode);
20210 emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
20211 emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2));
20212 emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca));
20213 emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2)));
20214 }
20215 else
20216 {
20217 rtx tmp_reg_ca = gen_reg_rtx (SImode);
20218 emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
20219 emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2));
20220 emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca));
20221 }
20222 }
20223 }
20224
20225 if (final_label)
20226 emit_label (final_label);
20227
20228 gcc_assert (bytes == 0);
20229 return true;
20230 }
20231
20232 /* Generate alignment check and branch code to set up for
20233 strncmp when we don't have DI alignment.
20234 STRNCMP_LABEL is the label to branch if there is a page crossing.
20235 SRC is the string pointer to be examined.
20236 BYTES is the max number of bytes to compare. */
20237 static void
20238 expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
20239 {
20240 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
20241 rtx src_check = copy_addr_to_reg (XEXP (src, 0));
20242 if (GET_MODE (src_check) == SImode)
20243 emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
20244 else
20245 emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
20246 rtx cond = gen_reg_rtx (CCmode);
20247 emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
20248 GEN_INT (4096 - bytes)));
20249
20250 rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx);
20251
20252 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20253 pc_rtx, lab_ref);
20254 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20255 JUMP_LABEL (j) = strncmp_label;
20256 LABEL_NUSES (strncmp_label) += 1;
20257 }
20258
20259 /* Expand a string compare operation with length, and return
20260 true if successful. Return false if we should let the
20261 compiler generate normal code, probably a strncmp call.
20262
20263 OPERANDS[0] is the target (result).
20264 OPERANDS[1] is the first source.
20265 OPERANDS[2] is the second source.
20266 If NO_LENGTH is zero, then:
20267 OPERANDS[3] is the length.
20268 OPERANDS[4] is the alignment in bytes.
20269 If NO_LENGTH is nonzero, then:
20270 OPERANDS[3] is the alignment in bytes. */
20271 bool
20272 expand_strn_compare (rtx operands[], int no_length)
20273 {
20274 rtx target = operands[0];
20275 rtx orig_src1 = operands[1];
20276 rtx orig_src2 = operands[2];
20277 rtx bytes_rtx, align_rtx;
20278 if (no_length)
20279 {
20280 bytes_rtx = NULL;
20281 align_rtx = operands[3];
20282 }
20283 else
20284 {
20285 bytes_rtx = operands[3];
20286 align_rtx = operands[4];
20287 }
20288 unsigned HOST_WIDE_INT cmp_bytes = 0;
20289 rtx src1 = orig_src1;
20290 rtx src2 = orig_src2;
20291
20292 /* If we have a length, it must be constant. This simplifies things
20293 a bit as we don't have to generate code to check if we've exceeded
20294 the length. Later this could be expanded to handle this case. */
20295 if (!no_length && !CONST_INT_P (bytes_rtx))
20296 return false;
20297
20298 /* This must be a fixed size alignment. */
20299 if (!CONST_INT_P (align_rtx))
20300 return false;
20301
20302 unsigned int base_align = UINTVAL (align_rtx);
20303 int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
20304 int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
20305
20306 /* SLOW_UNALIGNED_ACCESS -- don't do unaligned stuff. */
20307 if (SLOW_UNALIGNED_ACCESS (word_mode, align1)
20308 || SLOW_UNALIGNED_ACCESS (word_mode, align2))
20309 return false;
20310
20311 gcc_assert (GET_MODE (target) == SImode);
20312
20313 /* If we have an LE target without ldbrx and word_mode is DImode,
20314 then we must avoid using word_mode. */
20315 int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
20316 && word_mode == DImode);
20317
20318 unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
20319
20320 unsigned HOST_WIDE_INT offset = 0;
20321 unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */
20322 unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */
20323 if (no_length)
20324 /* Use this as a standin to determine the mode to use. */
20325 bytes = rs6000_string_compare_inline_limit * word_mode_size;
20326 else
20327 bytes = UINTVAL (bytes_rtx);
20328
20329 machine_mode load_mode =
20330 select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
20331 unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
20332 compare_length = rs6000_string_compare_inline_limit * load_mode_size;
20333
20334 /* If we have equality at the end of the last compare and we have not
20335 found the end of the string, we need to call strcmp/strncmp to
20336 compare the remainder. */
20337 bool equality_compare_rest = false;
20338
20339 if (no_length)
20340 {
20341 bytes = compare_length;
20342 equality_compare_rest = true;
20343 }
20344 else
20345 {
20346 if (bytes <= compare_length)
20347 compare_length = bytes;
20348 else
20349 equality_compare_rest = true;
20350 }
20351
20352 rtx result_reg = gen_reg_rtx (word_mode);
20353 rtx final_move_label = gen_label_rtx ();
20354 rtx final_label = gen_label_rtx ();
20355 rtx begin_compare_label = NULL;
20356
20357 if (base_align < 8)
20358 {
20359 /* Generate code that checks distance to 4k boundary for this case. */
20360 begin_compare_label = gen_label_rtx ();
20361 rtx strncmp_label = gen_label_rtx ();
20362 rtx jmp;
20363
20364 /* Strncmp for power8 in glibc does this:
20365 rldicl r8,r3,0,52
20366 cmpldi cr7,r8,4096-16
20367 bgt cr7,L(pagecross) */
20368
20369 /* Make sure that the length we use for the alignment test and
20370 the subsequent code generation are in agreement so we do not
20371 go past the length we tested for a 4k boundary crossing. */
20372 unsigned HOST_WIDE_INT align_test = compare_length;
20373 if (align_test < 8)
20374 {
20375 align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test);
20376 base_align = align_test;
20377 }
20378 else
20379 {
20380 align_test = ROUND_UP (align_test, 8);
20381 base_align = 8;
20382 }
20383
20384 if (align1 < 8)
20385 expand_strncmp_align_check (strncmp_label, src1, align_test);
20386 if (align2 < 8)
20387 expand_strncmp_align_check (strncmp_label, src2, align_test);
20388
20389 /* Now generate the following sequence:
20390 - branch to begin_compare
20391 - strncmp_label
20392 - call to strncmp
20393 - branch to final_label
20394 - begin_compare_label */
20395
20396 rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label);
20397 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref));
20398 JUMP_LABEL (jmp) = begin_compare_label;
20399 LABEL_NUSES (begin_compare_label) += 1;
20400 emit_barrier ();
20401
20402 emit_label (strncmp_label);
20403
20404 if (!REG_P (XEXP (src1, 0)))
20405 {
20406 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20407 src1 = replace_equiv_address (src1, src1_reg);
20408 }
20409
20410 if (!REG_P (XEXP (src2, 0)))
20411 {
20412 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20413 src2 = replace_equiv_address (src2, src2_reg);
20414 }
20415
20416 if (no_length)
20417 {
20418 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20419 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20420 target, LCT_NORMAL, GET_MODE (target),
20421 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20422 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20423 }
20424 else
20425 {
20426 /* -m32 -mpowerpc64 results in word_mode being DImode even
20427 though otherwise it is 32-bit. The length arg to strncmp
20428 is a size_t which will be the same size as pointers. */
20429 rtx len_rtx;
20430 if (TARGET_64BIT)
20431 len_rtx = gen_reg_rtx (DImode);
20432 else
20433 len_rtx = gen_reg_rtx (SImode);
20434
20435 emit_move_insn (len_rtx, bytes_rtx);
20436
20437 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20438 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20439 target, LCT_NORMAL, GET_MODE (target),
20440 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20441 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20442 len_rtx, GET_MODE (len_rtx));
20443 }
20444
20445 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20446 jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20447 JUMP_LABEL (jmp) = final_label;
20448 LABEL_NUSES (final_label) += 1;
20449 emit_barrier ();
20450 emit_label (begin_compare_label);
20451 }
20452
20453 rtx cleanup_label = NULL;
20454 rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
20455 rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
20456
20457 /* Generate sequence of ld/ldbrx, cmpb to compare out
20458 to the length specified. */
20459 unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
20460 while (bytes_to_compare > 0)
20461 {
20462 /* Compare sequence:
20463 check each 8B with: ld/ld cmpd bne
20464 If equal, use rldicr/cmpb to check for zero byte.
20465 cleanup code at end:
20466 cmpb get byte that differs
20467 cmpb look for zero byte
20468 orc combine
20469 cntlzd get bit of first zero/diff byte
20470 subfic convert for rldcl use
20471 rldcl rldcl extract diff/zero byte
20472 subf subtract for final result
20473
20474 The last compare can branch around the cleanup code if the
20475 result is zero because the strings are exactly equal. */
20476 unsigned int align = compute_current_alignment (base_align, offset);
20477 if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20478 load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
20479 word_mode_ok);
20480 else
20481 load_mode = select_block_compare_mode (0, bytes_to_compare, align,
20482 word_mode_ok);
20483 load_mode_size = GET_MODE_SIZE (load_mode);
20484 if (bytes_to_compare >= load_mode_size)
20485 cmp_bytes = load_mode_size;
20486 else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
20487 {
20488 /* Move this load back so it doesn't go past the end.
20489 P8/P9 can do this efficiently. */
20490 unsigned int extra_bytes = load_mode_size - bytes_to_compare;
20491 cmp_bytes = bytes_to_compare;
20492 if (extra_bytes < offset)
20493 {
20494 offset -= extra_bytes;
20495 cmp_bytes = load_mode_size;
20496 bytes_to_compare = cmp_bytes;
20497 }
20498 }
20499 else
20500 /* P7 and earlier can't do the overlapping load trick fast,
20501 so this forces a non-overlapping load and a shift to get
20502 rid of the extra bytes. */
20503 cmp_bytes = bytes_to_compare;
20504
20505 src1 = adjust_address (orig_src1, load_mode, offset);
20506 src2 = adjust_address (orig_src2, load_mode, offset);
20507
20508 if (!REG_P (XEXP (src1, 0)))
20509 {
20510 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20511 src1 = replace_equiv_address (src1, src1_reg);
20512 }
20513 set_mem_size (src1, cmp_bytes);
20514
20515 if (!REG_P (XEXP (src2, 0)))
20516 {
20517 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20518 src2 = replace_equiv_address (src2, src2_reg);
20519 }
20520 set_mem_size (src2, cmp_bytes);
20521
20522 do_load_for_compare (tmp_reg_src1, src1, load_mode);
20523 do_load_for_compare (tmp_reg_src2, src2, load_mode);
20524
20525 /* We must always left-align the data we read, and
20526 clear any bytes to the right that are beyond the string.
20527 Otherwise the cmpb sequence won't produce the correct
20528 results. The beginning of the compare will be done
20529 with word_mode so will not have any extra shifts or
20530 clear rights. */
20531
20532 if (load_mode_size < word_mode_size)
20533 {
20534 /* Rotate left first. */
20535 rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
20536 if (word_mode == DImode)
20537 {
20538 emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
20539 emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
20540 }
20541 else
20542 {
20543 emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
20544 emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
20545 }
20546 }
20547
20548 if (cmp_bytes < word_mode_size)
20549 {
20550 /* Now clear right. This plus the rotate can be
20551 turned into a rldicr instruction. */
20552 HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20553 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20554 if (word_mode == DImode)
20555 {
20556 emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20557 emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20558 }
20559 else
20560 {
20561 emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
20562 emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
20563 }
20564 }
20565
20566 /* Cases to handle. A and B are chunks of the two strings.
20567 1: Not end of comparison:
20568 A != B: branch to cleanup code to compute result.
20569 A == B: check for 0 byte, next block if not found.
20570 2: End of the inline comparison:
20571 A != B: branch to cleanup code to compute result.
20572 A == B: check for 0 byte, call strcmp/strncmp
20573 3: compared requested N bytes:
20574 A == B: branch to result 0.
20575 A != B: cleanup code to compute result. */
20576
20577 unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
20578
20579 rtx dst_label;
20580 if (remain > 0 || equality_compare_rest)
20581 {
20582 /* Branch to cleanup code, otherwise fall through to do
20583 more compares. */
20584 if (!cleanup_label)
20585 cleanup_label = gen_label_rtx ();
20586 dst_label = cleanup_label;
20587 }
20588 else
20589 /* Branch to end and produce result of 0. */
20590 dst_label = final_move_label;
20591
20592 rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
20593 rtx cond = gen_reg_rtx (CCmode);
20594
20595 /* Always produce the 0 result, it is needed if
20596 cmpb finds a 0 byte in this chunk. */
20597 rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
20598 rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
20599
20600 rtx cmp_rtx;
20601 if (remain == 0 && !equality_compare_rest)
20602 cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
20603 else
20604 cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20605
20606 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
20607 lab_ref, pc_rtx);
20608 rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20609 JUMP_LABEL (j) = dst_label;
20610 LABEL_NUSES (dst_label) += 1;
20611
20612 if (remain > 0 || equality_compare_rest)
20613 {
20614 /* Generate a cmpb to test for a 0 byte and branch
20615 to final result if found. */
20616 rtx cmpb_zero = gen_reg_rtx (word_mode);
20617 rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
20618 rtx condz = gen_reg_rtx (CCmode);
20619 rtx zero_reg = gen_reg_rtx (word_mode);
20620 if (word_mode == SImode)
20621 {
20622 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20623 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20624 if (cmp_bytes < word_mode_size)
20625 {
20626 /* Don't want to look at zero bytes past end. */
20627 HOST_WIDE_INT mb =
20628 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20629 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20630 emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
20631 }
20632 }
20633 else
20634 {
20635 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20636 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20637 if (cmp_bytes < word_mode_size)
20638 {
20639 /* Don't want to look at zero bytes past end. */
20640 HOST_WIDE_INT mb =
20641 BITS_PER_UNIT * (word_mode_size - cmp_bytes);
20642 rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
20643 emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
20644 }
20645 }
20646
20647 emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
20648 rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
20649 rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
20650 lab_ref_fin, pc_rtx);
20651 rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
20652 JUMP_LABEL (j2) = final_move_label;
20653 LABEL_NUSES (final_move_label) += 1;
20654
20655 }
20656
20657 offset += cmp_bytes;
20658 bytes_to_compare -= cmp_bytes;
20659 }
20660
20661 if (equality_compare_rest)
20662 {
20663 /* Update pointers past what has been compared already. */
20664 src1 = adjust_address (orig_src1, load_mode, offset);
20665 src2 = adjust_address (orig_src2, load_mode, offset);
20666
20667 if (!REG_P (XEXP (src1, 0)))
20668 {
20669 rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
20670 src1 = replace_equiv_address (src1, src1_reg);
20671 }
20672 set_mem_size (src1, cmp_bytes);
20673
20674 if (!REG_P (XEXP (src2, 0)))
20675 {
20676 rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
20677 src2 = replace_equiv_address (src2, src2_reg);
20678 }
20679 set_mem_size (src2, cmp_bytes);
20680
20681 /* Construct call to strcmp/strncmp to compare the rest of the string. */
20682 if (no_length)
20683 {
20684 tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
20685 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20686 target, LCT_NORMAL, GET_MODE (target),
20687 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20688 force_reg (Pmode, XEXP (src2, 0)), Pmode);
20689 }
20690 else
20691 {
20692 rtx len_rtx;
20693 if (TARGET_64BIT)
20694 len_rtx = gen_reg_rtx (DImode);
20695 else
20696 len_rtx = gen_reg_rtx (SImode);
20697
20698 emit_move_insn (len_rtx, GEN_INT (bytes - compare_length));
20699 tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
20700 emit_library_call_value (XEXP (DECL_RTL (fun), 0),
20701 target, LCT_NORMAL, GET_MODE (target),
20702 force_reg (Pmode, XEXP (src1, 0)), Pmode,
20703 force_reg (Pmode, XEXP (src2, 0)), Pmode,
20704 len_rtx, GET_MODE (len_rtx));
20705 }
20706
20707 rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
20708 rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
20709 JUMP_LABEL (jmp) = final_label;
20710 LABEL_NUSES (final_label) += 1;
20711 emit_barrier ();
20712 }
20713
20714 if (cleanup_label)
20715 emit_label (cleanup_label);
20716
20717 /* Generate the final sequence that identifies the differing
20718 byte and generates the final result, taking into account
20719 zero bytes:
20720
20721 cmpb cmpb_result1, src1, src2
20722 cmpb cmpb_result2, src1, zero
20723 orc cmpb_result1, cmp_result1, cmpb_result2
20724 cntlzd get bit of first zero/diff byte
20725 addi convert for rldcl use
20726 rldcl rldcl extract diff/zero byte
20727 subf subtract for final result
20728 */
20729
20730 rtx cmpb_diff = gen_reg_rtx (word_mode);
20731 rtx cmpb_zero = gen_reg_rtx (word_mode);
20732 rtx rot_amt = gen_reg_rtx (word_mode);
20733 rtx zero_reg = gen_reg_rtx (word_mode);
20734
20735 rtx rot1_1 = gen_reg_rtx (word_mode);
20736 rtx rot1_2 = gen_reg_rtx (word_mode);
20737 rtx rot2_1 = gen_reg_rtx (word_mode);
20738 rtx rot2_2 = gen_reg_rtx (word_mode);
20739
20740 if (word_mode == SImode)
20741 {
20742 emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20743 emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
20744 emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20745 emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
20746 emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20747 emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
20748 emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
20749 emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
20750 gen_lowpart (SImode, rot_amt)));
20751 emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20752 emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
20753 gen_lowpart (SImode, rot_amt)));
20754 emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20755 emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
20756 }
20757 else
20758 {
20759 emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
20760 emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
20761 emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
20762 emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
20763 emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
20764 emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
20765 emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
20766 emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
20767 gen_lowpart (SImode, rot_amt)));
20768 emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
20769 emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
20770 gen_lowpart (SImode, rot_amt)));
20771 emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
20772 emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
20773 }
20774
20775 emit_label (final_move_label);
20776 emit_insn (gen_movsi (target,
20777 gen_lowpart (SImode, result_reg)));
20778 emit_label (final_label);
20779 return true;
20780 }
20781
20782 /* Expand a block move operation, and return 1 if successful. Return 0
20783 if we should let the compiler generate normal code.
20784
20785 operands[0] is the destination
20786 operands[1] is the source
20787 operands[2] is the length
20788 operands[3] is the alignment */
20789
20790 #define MAX_MOVE_REG 4
20791
20792 int
20793 expand_block_move (rtx operands[])
20794 {
20795 rtx orig_dest = operands[0];
20796 rtx orig_src = operands[1];
20797 rtx bytes_rtx = operands[2];
20798 rtx align_rtx = operands[3];
20799 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
20800 int align;
20801 int bytes;
20802 int offset;
20803 int move_bytes;
20804 rtx stores[MAX_MOVE_REG];
20805 int num_reg = 0;
20806
20807 /* If this is not a fixed size move, just call memcpy */
20808 if (! constp)
20809 return 0;
20810
20811 /* This must be a fixed size alignment */
20812 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
20813 align = INTVAL (align_rtx) * BITS_PER_UNIT;
20814
20815 /* Anything to move? */
20816 bytes = INTVAL (bytes_rtx);
20817 if (bytes <= 0)
20818 return 1;
20819
20820 if (bytes > rs6000_block_move_inline_limit)
20821 return 0;
20822
20823 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
20824 {
20825 union {
20826 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
20827 rtx (*mov) (rtx, rtx);
20828 } gen_func;
20829 machine_mode mode = BLKmode;
20830 rtx src, dest;
20831
20832 /* Altivec first, since it will be faster than a string move
20833 when it applies, and usually not significantly larger. */
20834 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
20835 {
20836 move_bytes = 16;
20837 mode = V4SImode;
20838 gen_func.mov = gen_movv4si;
20839 }
20840 else if (TARGET_SPE && bytes >= 8 && align >= 64)
20841 {
20842 move_bytes = 8;
20843 mode = V2SImode;
20844 gen_func.mov = gen_movv2si;
20845 }
20846 else if (TARGET_STRING
20847 && bytes > 24 /* move up to 32 bytes at a time */
20848 && ! fixed_regs[5]
20849 && ! fixed_regs[6]
20850 && ! fixed_regs[7]
20851 && ! fixed_regs[8]
20852 && ! fixed_regs[9]
20853 && ! fixed_regs[10]
20854 && ! fixed_regs[11]
20855 && ! fixed_regs[12])
20856 {
20857 move_bytes = (bytes > 32) ? 32 : bytes;
20858 gen_func.movmemsi = gen_movmemsi_8reg;
20859 }
20860 else if (TARGET_STRING
20861 && bytes > 16 /* move up to 24 bytes at a time */
20862 && ! fixed_regs[5]
20863 && ! fixed_regs[6]
20864 && ! fixed_regs[7]
20865 && ! fixed_regs[8]
20866 && ! fixed_regs[9]
20867 && ! fixed_regs[10])
20868 {
20869 move_bytes = (bytes > 24) ? 24 : bytes;
20870 gen_func.movmemsi = gen_movmemsi_6reg;
20871 }
20872 else if (TARGET_STRING
20873 && bytes > 8 /* move up to 16 bytes at a time */
20874 && ! fixed_regs[5]
20875 && ! fixed_regs[6]
20876 && ! fixed_regs[7]
20877 && ! fixed_regs[8])
20878 {
20879 move_bytes = (bytes > 16) ? 16 : bytes;
20880 gen_func.movmemsi = gen_movmemsi_4reg;
20881 }
20882 else if (bytes >= 8 && TARGET_POWERPC64
20883 && (align >= 64 || !STRICT_ALIGNMENT))
20884 {
20885 move_bytes = 8;
20886 mode = DImode;
20887 gen_func.mov = gen_movdi;
20888 if (offset == 0 && align < 64)
20889 {
20890 rtx addr;
20891
20892 /* If the address form is reg+offset with offset not a
20893 multiple of four, reload into reg indirect form here
20894 rather than waiting for reload. This way we get one
20895 reload, not one per load and/or store. */
20896 addr = XEXP (orig_dest, 0);
20897 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20898 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20899 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20900 {
20901 addr = copy_addr_to_reg (addr);
20902 orig_dest = replace_equiv_address (orig_dest, addr);
20903 }
20904 addr = XEXP (orig_src, 0);
20905 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
20906 && GET_CODE (XEXP (addr, 1)) == CONST_INT
20907 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
20908 {
20909 addr = copy_addr_to_reg (addr);
20910 orig_src = replace_equiv_address (orig_src, addr);
20911 }
20912 }
20913 }
20914 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
20915 { /* move up to 8 bytes at a time */
20916 move_bytes = (bytes > 8) ? 8 : bytes;
20917 gen_func.movmemsi = gen_movmemsi_2reg;
20918 }
20919 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
20920 { /* move 4 bytes */
20921 move_bytes = 4;
20922 mode = SImode;
20923 gen_func.mov = gen_movsi;
20924 }
20925 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
20926 { /* move 2 bytes */
20927 move_bytes = 2;
20928 mode = HImode;
20929 gen_func.mov = gen_movhi;
20930 }
20931 else if (TARGET_STRING && bytes > 1)
20932 { /* move up to 4 bytes at a time */
20933 move_bytes = (bytes > 4) ? 4 : bytes;
20934 gen_func.movmemsi = gen_movmemsi_1reg;
20935 }
20936 else /* move 1 byte at a time */
20937 {
20938 move_bytes = 1;
20939 mode = QImode;
20940 gen_func.mov = gen_movqi;
20941 }
20942
20943 src = adjust_address (orig_src, mode, offset);
20944 dest = adjust_address (orig_dest, mode, offset);
20945
20946 if (mode != BLKmode)
20947 {
20948 rtx tmp_reg = gen_reg_rtx (mode);
20949
20950 emit_insn ((*gen_func.mov) (tmp_reg, src));
20951 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
20952 }
20953
20954 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
20955 {
20956 int i;
20957 for (i = 0; i < num_reg; i++)
20958 emit_insn (stores[i]);
20959 num_reg = 0;
20960 }
20961
20962 if (mode == BLKmode)
20963 {
20964 /* Move the address into scratch registers. The movmemsi
20965 patterns require zero offset. */
20966 if (!REG_P (XEXP (src, 0)))
20967 {
20968 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
20969 src = replace_equiv_address (src, src_reg);
20970 }
20971 set_mem_size (src, move_bytes);
20972
20973 if (!REG_P (XEXP (dest, 0)))
20974 {
20975 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
20976 dest = replace_equiv_address (dest, dest_reg);
20977 }
20978 set_mem_size (dest, move_bytes);
20979
20980 emit_insn ((*gen_func.movmemsi) (dest, src,
20981 GEN_INT (move_bytes & 31),
20982 align_rtx));
20983 }
20984 }
20985
20986 return 1;
20987 }
20988
20989 \f
20990 /* Return a string to perform a load_multiple operation.
20991 operands[0] is the vector.
20992 operands[1] is the source address.
20993 operands[2] is the first destination register. */
20994
20995 const char *
20996 rs6000_output_load_multiple (rtx operands[3])
20997 {
20998 /* We have to handle the case where the pseudo used to contain the address
20999 is assigned to one of the output registers. */
21000 int i, j;
21001 int words = XVECLEN (operands[0], 0);
21002 rtx xop[10];
21003
21004 if (XVECLEN (operands[0], 0) == 1)
21005 return "lwz %2,0(%1)";
21006
21007 for (i = 0; i < words; i++)
21008 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
21009 {
21010 if (i == words-1)
21011 {
21012 xop[0] = GEN_INT (4 * (words-1));
21013 xop[1] = operands[1];
21014 xop[2] = operands[2];
21015 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
21016 return "";
21017 }
21018 else if (i == 0)
21019 {
21020 xop[0] = GEN_INT (4 * (words-1));
21021 xop[1] = operands[1];
21022 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
21023 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
21024 return "";
21025 }
21026 else
21027 {
21028 for (j = 0; j < words; j++)
21029 if (j != i)
21030 {
21031 xop[0] = GEN_INT (j * 4);
21032 xop[1] = operands[1];
21033 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
21034 output_asm_insn ("lwz %2,%0(%1)", xop);
21035 }
21036 xop[0] = GEN_INT (i * 4);
21037 xop[1] = operands[1];
21038 output_asm_insn ("lwz %1,%0(%1)", xop);
21039 return "";
21040 }
21041 }
21042
21043 return "lswi %2,%1,%N0";
21044 }
21045
21046 \f
21047 /* A validation routine: say whether CODE, a condition code, and MODE
21048 match. The other alternatives either don't make sense or should
21049 never be generated. */
21050
21051 void
21052 validate_condition_mode (enum rtx_code code, machine_mode mode)
21053 {
21054 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
21055 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
21056 && GET_MODE_CLASS (mode) == MODE_CC);
21057
21058 /* These don't make sense. */
21059 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
21060 || mode != CCUNSmode);
21061
21062 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
21063 || mode == CCUNSmode);
21064
21065 gcc_assert (mode == CCFPmode
21066 || (code != ORDERED && code != UNORDERED
21067 && code != UNEQ && code != LTGT
21068 && code != UNGT && code != UNLT
21069 && code != UNGE && code != UNLE));
21070
21071 /* These should never be generated except for
21072 flag_finite_math_only. */
21073 gcc_assert (mode != CCFPmode
21074 || flag_finite_math_only
21075 || (code != LE && code != GE
21076 && code != UNEQ && code != LTGT
21077 && code != UNGT && code != UNLT));
21078
21079 /* These are invalid; the information is not there. */
21080 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
21081 }
21082
21083 \f
21084 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
21085 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
21086 not zero, store there the bit offset (counted from the right) where
21087 the single stretch of 1 bits begins; and similarly for B, the bit
21088 offset where it ends. */
21089
21090 bool
21091 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
21092 {
21093 unsigned HOST_WIDE_INT val = INTVAL (mask);
21094 unsigned HOST_WIDE_INT bit;
21095 int nb, ne;
21096 int n = GET_MODE_PRECISION (mode);
21097
21098 if (mode != DImode && mode != SImode)
21099 return false;
21100
21101 if (INTVAL (mask) >= 0)
21102 {
21103 bit = val & -val;
21104 ne = exact_log2 (bit);
21105 nb = exact_log2 (val + bit);
21106 }
21107 else if (val + 1 == 0)
21108 {
21109 nb = n;
21110 ne = 0;
21111 }
21112 else if (val & 1)
21113 {
21114 val = ~val;
21115 bit = val & -val;
21116 nb = exact_log2 (bit);
21117 ne = exact_log2 (val + bit);
21118 }
21119 else
21120 {
21121 bit = val & -val;
21122 ne = exact_log2 (bit);
21123 if (val + bit == 0)
21124 nb = n;
21125 else
21126 nb = 0;
21127 }
21128
21129 nb--;
21130
21131 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
21132 return false;
21133
21134 if (b)
21135 *b = nb;
21136 if (e)
21137 *e = ne;
21138
21139 return true;
21140 }
21141
21142 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
21143 or rldicr instruction, to implement an AND with it in mode MODE. */
21144
21145 bool
21146 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
21147 {
21148 int nb, ne;
21149
21150 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21151 return false;
21152
21153 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
21154 does not wrap. */
21155 if (mode == DImode)
21156 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
21157
21158 /* For SImode, rlwinm can do everything. */
21159 if (mode == SImode)
21160 return (nb < 32 && ne < 32);
21161
21162 return false;
21163 }
21164
21165 /* Return the instruction template for an AND with mask in mode MODE, with
21166 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21167
21168 const char *
21169 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
21170 {
21171 int nb, ne;
21172
21173 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
21174 gcc_unreachable ();
21175
21176 if (mode == DImode && ne == 0)
21177 {
21178 operands[3] = GEN_INT (63 - nb);
21179 if (dot)
21180 return "rldicl. %0,%1,0,%3";
21181 return "rldicl %0,%1,0,%3";
21182 }
21183
21184 if (mode == DImode && nb == 63)
21185 {
21186 operands[3] = GEN_INT (63 - ne);
21187 if (dot)
21188 return "rldicr. %0,%1,0,%3";
21189 return "rldicr %0,%1,0,%3";
21190 }
21191
21192 if (nb < 32 && ne < 32)
21193 {
21194 operands[3] = GEN_INT (31 - nb);
21195 operands[4] = GEN_INT (31 - ne);
21196 if (dot)
21197 return "rlwinm. %0,%1,0,%3,%4";
21198 return "rlwinm %0,%1,0,%3,%4";
21199 }
21200
21201 gcc_unreachable ();
21202 }
21203
21204 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
21205 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
21206 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
21207
21208 bool
21209 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
21210 {
21211 int nb, ne;
21212
21213 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21214 return false;
21215
21216 int n = GET_MODE_PRECISION (mode);
21217 int sh = -1;
21218
21219 if (CONST_INT_P (XEXP (shift, 1)))
21220 {
21221 sh = INTVAL (XEXP (shift, 1));
21222 if (sh < 0 || sh >= n)
21223 return false;
21224 }
21225
21226 rtx_code code = GET_CODE (shift);
21227
21228 /* Convert any shift by 0 to a rotate, to simplify below code. */
21229 if (sh == 0)
21230 code = ROTATE;
21231
21232 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21233 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21234 code = ASHIFT;
21235 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21236 {
21237 code = LSHIFTRT;
21238 sh = n - sh;
21239 }
21240
21241 /* DImode rotates need rld*. */
21242 if (mode == DImode && code == ROTATE)
21243 return (nb == 63 || ne == 0 || ne == sh);
21244
21245 /* SImode rotates need rlw*. */
21246 if (mode == SImode && code == ROTATE)
21247 return (nb < 32 && ne < 32 && sh < 32);
21248
21249 /* Wrap-around masks are only okay for rotates. */
21250 if (ne > nb)
21251 return false;
21252
21253 /* Variable shifts are only okay for rotates. */
21254 if (sh < 0)
21255 return false;
21256
21257 /* Don't allow ASHIFT if the mask is wrong for that. */
21258 if (code == ASHIFT && ne < sh)
21259 return false;
21260
21261 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
21262 if the mask is wrong for that. */
21263 if (nb < 32 && ne < 32 && sh < 32
21264 && !(code == LSHIFTRT && nb >= 32 - sh))
21265 return true;
21266
21267 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
21268 if the mask is wrong for that. */
21269 if (code == LSHIFTRT)
21270 sh = 64 - sh;
21271 if (nb == 63 || ne == 0 || ne == sh)
21272 return !(code == LSHIFTRT && nb >= sh);
21273
21274 return false;
21275 }
21276
21277 /* Return the instruction template for a shift with mask in mode MODE, with
21278 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21279
21280 const char *
21281 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
21282 {
21283 int nb, ne;
21284
21285 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21286 gcc_unreachable ();
21287
21288 if (mode == DImode && ne == 0)
21289 {
21290 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21291 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
21292 operands[3] = GEN_INT (63 - nb);
21293 if (dot)
21294 return "rld%I2cl. %0,%1,%2,%3";
21295 return "rld%I2cl %0,%1,%2,%3";
21296 }
21297
21298 if (mode == DImode && nb == 63)
21299 {
21300 operands[3] = GEN_INT (63 - ne);
21301 if (dot)
21302 return "rld%I2cr. %0,%1,%2,%3";
21303 return "rld%I2cr %0,%1,%2,%3";
21304 }
21305
21306 if (mode == DImode
21307 && GET_CODE (operands[4]) != LSHIFTRT
21308 && CONST_INT_P (operands[2])
21309 && ne == INTVAL (operands[2]))
21310 {
21311 operands[3] = GEN_INT (63 - nb);
21312 if (dot)
21313 return "rld%I2c. %0,%1,%2,%3";
21314 return "rld%I2c %0,%1,%2,%3";
21315 }
21316
21317 if (nb < 32 && ne < 32)
21318 {
21319 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21320 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21321 operands[3] = GEN_INT (31 - nb);
21322 operands[4] = GEN_INT (31 - ne);
21323 /* This insn can also be a 64-bit rotate with mask that really makes
21324 it just a shift right (with mask); the %h below are to adjust for
21325 that situation (shift count is >= 32 in that case). */
21326 if (dot)
21327 return "rlw%I2nm. %0,%1,%h2,%3,%4";
21328 return "rlw%I2nm %0,%1,%h2,%3,%4";
21329 }
21330
21331 gcc_unreachable ();
21332 }
21333
21334 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
21335 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
21336 ASHIFT, or LSHIFTRT) in mode MODE. */
21337
21338 bool
21339 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
21340 {
21341 int nb, ne;
21342
21343 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
21344 return false;
21345
21346 int n = GET_MODE_PRECISION (mode);
21347
21348 int sh = INTVAL (XEXP (shift, 1));
21349 if (sh < 0 || sh >= n)
21350 return false;
21351
21352 rtx_code code = GET_CODE (shift);
21353
21354 /* Convert any shift by 0 to a rotate, to simplify below code. */
21355 if (sh == 0)
21356 code = ROTATE;
21357
21358 /* Convert rotate to simple shift if we can, to make analysis simpler. */
21359 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
21360 code = ASHIFT;
21361 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
21362 {
21363 code = LSHIFTRT;
21364 sh = n - sh;
21365 }
21366
21367 /* DImode rotates need rldimi. */
21368 if (mode == DImode && code == ROTATE)
21369 return (ne == sh);
21370
21371 /* SImode rotates need rlwimi. */
21372 if (mode == SImode && code == ROTATE)
21373 return (nb < 32 && ne < 32 && sh < 32);
21374
21375 /* Wrap-around masks are only okay for rotates. */
21376 if (ne > nb)
21377 return false;
21378
21379 /* Don't allow ASHIFT if the mask is wrong for that. */
21380 if (code == ASHIFT && ne < sh)
21381 return false;
21382
21383 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
21384 if the mask is wrong for that. */
21385 if (nb < 32 && ne < 32 && sh < 32
21386 && !(code == LSHIFTRT && nb >= 32 - sh))
21387 return true;
21388
21389 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
21390 if the mask is wrong for that. */
21391 if (code == LSHIFTRT)
21392 sh = 64 - sh;
21393 if (ne == sh)
21394 return !(code == LSHIFTRT && nb >= sh);
21395
21396 return false;
21397 }
21398
21399 /* Return the instruction template for an insert with mask in mode MODE, with
21400 operands OPERANDS. If DOT is true, make it a record-form instruction. */
21401
21402 const char *
21403 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
21404 {
21405 int nb, ne;
21406
21407 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
21408 gcc_unreachable ();
21409
21410 /* Prefer rldimi because rlwimi is cracked. */
21411 if (TARGET_POWERPC64
21412 && (!dot || mode == DImode)
21413 && GET_CODE (operands[4]) != LSHIFTRT
21414 && ne == INTVAL (operands[2]))
21415 {
21416 operands[3] = GEN_INT (63 - nb);
21417 if (dot)
21418 return "rldimi. %0,%1,%2,%3";
21419 return "rldimi %0,%1,%2,%3";
21420 }
21421
21422 if (nb < 32 && ne < 32)
21423 {
21424 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
21425 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
21426 operands[3] = GEN_INT (31 - nb);
21427 operands[4] = GEN_INT (31 - ne);
21428 if (dot)
21429 return "rlwimi. %0,%1,%2,%3,%4";
21430 return "rlwimi %0,%1,%2,%3,%4";
21431 }
21432
21433 gcc_unreachable ();
21434 }
21435
21436 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
21437 using two machine instructions. */
21438
21439 bool
21440 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
21441 {
21442 /* There are two kinds of AND we can handle with two insns:
21443 1) those we can do with two rl* insn;
21444 2) ori[s];xori[s].
21445
21446 We do not handle that last case yet. */
21447
21448 /* If there is just one stretch of ones, we can do it. */
21449 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
21450 return true;
21451
21452 /* Otherwise, fill in the lowest "hole"; if we can do the result with
21453 one insn, we can do the whole thing with two. */
21454 unsigned HOST_WIDE_INT val = INTVAL (c);
21455 unsigned HOST_WIDE_INT bit1 = val & -val;
21456 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21457 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21458 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21459 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
21460 }
21461
21462 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
21463 If EXPAND is true, split rotate-and-mask instructions we generate to
21464 their constituent parts as well (this is used during expand); if DOT
21465 is 1, make the last insn a record-form instruction clobbering the
21466 destination GPR and setting the CC reg (from operands[3]); if 2, set
21467 that GPR as well as the CC reg. */
21468
21469 void
21470 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
21471 {
21472 gcc_assert (!(expand && dot));
21473
21474 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
21475
21476 /* If it is one stretch of ones, it is DImode; shift left, mask, then
21477 shift right. This generates better code than doing the masks without
21478 shifts, or shifting first right and then left. */
21479 int nb, ne;
21480 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
21481 {
21482 gcc_assert (mode == DImode);
21483
21484 int shift = 63 - nb;
21485 if (expand)
21486 {
21487 rtx tmp1 = gen_reg_rtx (DImode);
21488 rtx tmp2 = gen_reg_rtx (DImode);
21489 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
21490 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
21491 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
21492 }
21493 else
21494 {
21495 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
21496 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
21497 emit_move_insn (operands[0], tmp);
21498 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
21499 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21500 }
21501 return;
21502 }
21503
21504 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
21505 that does the rest. */
21506 unsigned HOST_WIDE_INT bit1 = val & -val;
21507 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
21508 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
21509 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
21510
21511 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
21512 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
21513
21514 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
21515
21516 /* Two "no-rotate"-and-mask instructions, for SImode. */
21517 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
21518 {
21519 gcc_assert (mode == SImode);
21520
21521 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21522 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
21523 emit_move_insn (reg, tmp);
21524 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21525 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21526 return;
21527 }
21528
21529 gcc_assert (mode == DImode);
21530
21531 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
21532 insns; we have to do the first in SImode, because it wraps. */
21533 if (mask2 <= 0xffffffff
21534 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
21535 {
21536 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
21537 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
21538 GEN_INT (mask1));
21539 rtx reg_low = gen_lowpart (SImode, reg);
21540 emit_move_insn (reg_low, tmp);
21541 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
21542 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21543 return;
21544 }
21545
21546 /* Two rld* insns: rotate, clear the hole in the middle (which now is
21547 at the top end), rotate back and clear the other hole. */
21548 int right = exact_log2 (bit3);
21549 int left = 64 - right;
21550
21551 /* Rotate the mask too. */
21552 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
21553
21554 if (expand)
21555 {
21556 rtx tmp1 = gen_reg_rtx (DImode);
21557 rtx tmp2 = gen_reg_rtx (DImode);
21558 rtx tmp3 = gen_reg_rtx (DImode);
21559 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
21560 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
21561 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
21562 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
21563 }
21564 else
21565 {
21566 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
21567 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
21568 emit_move_insn (operands[0], tmp);
21569 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
21570 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
21571 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
21572 }
21573 }
21574 \f
21575 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
21576 for lfq and stfq insns iff the registers are hard registers. */
21577
21578 int
21579 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
21580 {
21581 /* We might have been passed a SUBREG. */
21582 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
21583 return 0;
21584
21585 /* We might have been passed non floating point registers. */
21586 if (!FP_REGNO_P (REGNO (reg1))
21587 || !FP_REGNO_P (REGNO (reg2)))
21588 return 0;
21589
21590 return (REGNO (reg1) == REGNO (reg2) - 1);
21591 }
21592
21593 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
21594 addr1 and addr2 must be in consecutive memory locations
21595 (addr2 == addr1 + 8). */
21596
21597 int
21598 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
21599 {
21600 rtx addr1, addr2;
21601 unsigned int reg1, reg2;
21602 int offset1, offset2;
21603
21604 /* The mems cannot be volatile. */
21605 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
21606 return 0;
21607
21608 addr1 = XEXP (mem1, 0);
21609 addr2 = XEXP (mem2, 0);
21610
21611 /* Extract an offset (if used) from the first addr. */
21612 if (GET_CODE (addr1) == PLUS)
21613 {
21614 /* If not a REG, return zero. */
21615 if (GET_CODE (XEXP (addr1, 0)) != REG)
21616 return 0;
21617 else
21618 {
21619 reg1 = REGNO (XEXP (addr1, 0));
21620 /* The offset must be constant! */
21621 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
21622 return 0;
21623 offset1 = INTVAL (XEXP (addr1, 1));
21624 }
21625 }
21626 else if (GET_CODE (addr1) != REG)
21627 return 0;
21628 else
21629 {
21630 reg1 = REGNO (addr1);
21631 /* This was a simple (mem (reg)) expression. Offset is 0. */
21632 offset1 = 0;
21633 }
21634
21635 /* And now for the second addr. */
21636 if (GET_CODE (addr2) == PLUS)
21637 {
21638 /* If not a REG, return zero. */
21639 if (GET_CODE (XEXP (addr2, 0)) != REG)
21640 return 0;
21641 else
21642 {
21643 reg2 = REGNO (XEXP (addr2, 0));
21644 /* The offset must be constant. */
21645 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
21646 return 0;
21647 offset2 = INTVAL (XEXP (addr2, 1));
21648 }
21649 }
21650 else if (GET_CODE (addr2) != REG)
21651 return 0;
21652 else
21653 {
21654 reg2 = REGNO (addr2);
21655 /* This was a simple (mem (reg)) expression. Offset is 0. */
21656 offset2 = 0;
21657 }
21658
21659 /* Both of these must have the same base register. */
21660 if (reg1 != reg2)
21661 return 0;
21662
21663 /* The offset for the second addr must be 8 more than the first addr. */
21664 if (offset2 != offset1 + 8)
21665 return 0;
21666
21667 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
21668 instructions. */
21669 return 1;
21670 }
21671 \f
21672
21673 rtx
21674 rs6000_secondary_memory_needed_rtx (machine_mode mode)
21675 {
21676 static bool eliminated = false;
21677 rtx ret;
21678
21679 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
21680 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21681 else
21682 {
21683 rtx mem = cfun->machine->sdmode_stack_slot;
21684 gcc_assert (mem != NULL_RTX);
21685
21686 if (!eliminated)
21687 {
21688 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
21689 cfun->machine->sdmode_stack_slot = mem;
21690 eliminated = true;
21691 }
21692 ret = mem;
21693 }
21694
21695 if (TARGET_DEBUG_ADDR)
21696 {
21697 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
21698 GET_MODE_NAME (mode));
21699 if (!ret)
21700 fprintf (stderr, "\tNULL_RTX\n");
21701 else
21702 debug_rtx (ret);
21703 }
21704
21705 return ret;
21706 }
21707
21708 /* Return the mode to be used for memory when a secondary memory
21709 location is needed. For SDmode values we need to use DDmode, in
21710 all other cases we can use the same mode. */
21711 machine_mode
21712 rs6000_secondary_memory_needed_mode (machine_mode mode)
21713 {
21714 if (lra_in_progress && mode == SDmode)
21715 return DDmode;
21716 return mode;
21717 }
21718
21719 static tree
21720 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
21721 {
21722 /* Don't walk into types. */
21723 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
21724 {
21725 *walk_subtrees = 0;
21726 return NULL_TREE;
21727 }
21728
21729 switch (TREE_CODE (*tp))
21730 {
21731 case VAR_DECL:
21732 case PARM_DECL:
21733 case FIELD_DECL:
21734 case RESULT_DECL:
21735 case SSA_NAME:
21736 case REAL_CST:
21737 case MEM_REF:
21738 case VIEW_CONVERT_EXPR:
21739 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
21740 return *tp;
21741 break;
21742 default:
21743 break;
21744 }
21745
21746 return NULL_TREE;
21747 }
21748
21749 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
21750 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
21751 only work on the traditional altivec registers, note if an altivec register
21752 was chosen. */
21753
21754 static enum rs6000_reg_type
21755 register_to_reg_type (rtx reg, bool *is_altivec)
21756 {
21757 HOST_WIDE_INT regno;
21758 enum reg_class rclass;
21759
21760 if (GET_CODE (reg) == SUBREG)
21761 reg = SUBREG_REG (reg);
21762
21763 if (!REG_P (reg))
21764 return NO_REG_TYPE;
21765
21766 regno = REGNO (reg);
21767 if (regno >= FIRST_PSEUDO_REGISTER)
21768 {
21769 if (!lra_in_progress && !reload_in_progress && !reload_completed)
21770 return PSEUDO_REG_TYPE;
21771
21772 regno = true_regnum (reg);
21773 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
21774 return PSEUDO_REG_TYPE;
21775 }
21776
21777 gcc_assert (regno >= 0);
21778
21779 if (is_altivec && ALTIVEC_REGNO_P (regno))
21780 *is_altivec = true;
21781
21782 rclass = rs6000_regno_regclass[regno];
21783 return reg_class_to_reg_type[(int)rclass];
21784 }
21785
21786 /* Helper function to return the cost of adding a TOC entry address. */
21787
21788 static inline int
21789 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
21790 {
21791 int ret;
21792
21793 if (TARGET_CMODEL != CMODEL_SMALL)
21794 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
21795
21796 else
21797 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
21798
21799 return ret;
21800 }
21801
21802 /* Helper function for rs6000_secondary_reload to determine whether the memory
21803 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
21804 needs reloading. Return negative if the memory is not handled by the memory
21805 helper functions and to try a different reload method, 0 if no additional
21806 instructions are need, and positive to give the extra cost for the
21807 memory. */
21808
21809 static int
21810 rs6000_secondary_reload_memory (rtx addr,
21811 enum reg_class rclass,
21812 machine_mode mode)
21813 {
21814 int extra_cost = 0;
21815 rtx reg, and_arg, plus_arg0, plus_arg1;
21816 addr_mask_type addr_mask;
21817 const char *type = NULL;
21818 const char *fail_msg = NULL;
21819
21820 if (GPR_REG_CLASS_P (rclass))
21821 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
21822
21823 else if (rclass == FLOAT_REGS)
21824 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
21825
21826 else if (rclass == ALTIVEC_REGS)
21827 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
21828
21829 /* For the combined VSX_REGS, turn off Altivec AND -16. */
21830 else if (rclass == VSX_REGS)
21831 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
21832 & ~RELOAD_REG_AND_M16);
21833
21834 /* If the register allocator hasn't made up its mind yet on the register
21835 class to use, settle on defaults to use. */
21836 else if (rclass == NO_REGS)
21837 {
21838 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
21839 & ~RELOAD_REG_AND_M16);
21840
21841 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
21842 addr_mask &= ~(RELOAD_REG_INDEXED
21843 | RELOAD_REG_PRE_INCDEC
21844 | RELOAD_REG_PRE_MODIFY);
21845 }
21846
21847 else
21848 addr_mask = 0;
21849
21850 /* If the register isn't valid in this register class, just return now. */
21851 if ((addr_mask & RELOAD_REG_VALID) == 0)
21852 {
21853 if (TARGET_DEBUG_ADDR)
21854 {
21855 fprintf (stderr,
21856 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
21857 "not valid in class\n",
21858 GET_MODE_NAME (mode), reg_class_names[rclass]);
21859 debug_rtx (addr);
21860 }
21861
21862 return -1;
21863 }
21864
21865 switch (GET_CODE (addr))
21866 {
21867 /* Does the register class supports auto update forms for this mode? We
21868 don't need a scratch register, since the powerpc only supports
21869 PRE_INC, PRE_DEC, and PRE_MODIFY. */
21870 case PRE_INC:
21871 case PRE_DEC:
21872 reg = XEXP (addr, 0);
21873 if (!base_reg_operand (addr, GET_MODE (reg)))
21874 {
21875 fail_msg = "no base register #1";
21876 extra_cost = -1;
21877 }
21878
21879 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
21880 {
21881 extra_cost = 1;
21882 type = "update";
21883 }
21884 break;
21885
21886 case PRE_MODIFY:
21887 reg = XEXP (addr, 0);
21888 plus_arg1 = XEXP (addr, 1);
21889 if (!base_reg_operand (reg, GET_MODE (reg))
21890 || GET_CODE (plus_arg1) != PLUS
21891 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
21892 {
21893 fail_msg = "bad PRE_MODIFY";
21894 extra_cost = -1;
21895 }
21896
21897 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
21898 {
21899 extra_cost = 1;
21900 type = "update";
21901 }
21902 break;
21903
21904 /* Do we need to simulate AND -16 to clear the bottom address bits used
21905 in VMX load/stores? Only allow the AND for vector sizes. */
21906 case AND:
21907 and_arg = XEXP (addr, 0);
21908 if (GET_MODE_SIZE (mode) != 16
21909 || GET_CODE (XEXP (addr, 1)) != CONST_INT
21910 || INTVAL (XEXP (addr, 1)) != -16)
21911 {
21912 fail_msg = "bad Altivec AND #1";
21913 extra_cost = -1;
21914 }
21915
21916 if (rclass != ALTIVEC_REGS)
21917 {
21918 if (legitimate_indirect_address_p (and_arg, false))
21919 extra_cost = 1;
21920
21921 else if (legitimate_indexed_address_p (and_arg, false))
21922 extra_cost = 2;
21923
21924 else
21925 {
21926 fail_msg = "bad Altivec AND #2";
21927 extra_cost = -1;
21928 }
21929
21930 type = "and";
21931 }
21932 break;
21933
21934 /* If this is an indirect address, make sure it is a base register. */
21935 case REG:
21936 case SUBREG:
21937 if (!legitimate_indirect_address_p (addr, false))
21938 {
21939 extra_cost = 1;
21940 type = "move";
21941 }
21942 break;
21943
21944 /* If this is an indexed address, make sure the register class can handle
21945 indexed addresses for this mode. */
21946 case PLUS:
21947 plus_arg0 = XEXP (addr, 0);
21948 plus_arg1 = XEXP (addr, 1);
21949
21950 /* (plus (plus (reg) (constant)) (constant)) is generated during
21951 push_reload processing, so handle it now. */
21952 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
21953 {
21954 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
21955 {
21956 extra_cost = 1;
21957 type = "offset";
21958 }
21959 }
21960
21961 /* (plus (plus (reg) (constant)) (reg)) is also generated during
21962 push_reload processing, so handle it now. */
21963 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
21964 {
21965 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
21966 {
21967 extra_cost = 1;
21968 type = "indexed #2";
21969 }
21970 }
21971
21972 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
21973 {
21974 fail_msg = "no base register #2";
21975 extra_cost = -1;
21976 }
21977
21978 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
21979 {
21980 if ((addr_mask & RELOAD_REG_INDEXED) == 0
21981 || !legitimate_indexed_address_p (addr, false))
21982 {
21983 extra_cost = 1;
21984 type = "indexed";
21985 }
21986 }
21987
21988 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
21989 && CONST_INT_P (plus_arg1))
21990 {
21991 if (!quad_address_offset_p (INTVAL (plus_arg1)))
21992 {
21993 extra_cost = 1;
21994 type = "vector d-form offset";
21995 }
21996 }
21997
21998 /* Make sure the register class can handle offset addresses. */
21999 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22000 {
22001 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22002 {
22003 extra_cost = 1;
22004 type = "offset #2";
22005 }
22006 }
22007
22008 else
22009 {
22010 fail_msg = "bad PLUS";
22011 extra_cost = -1;
22012 }
22013
22014 break;
22015
22016 case LO_SUM:
22017 /* Quad offsets are restricted and can't handle normal addresses. */
22018 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22019 {
22020 extra_cost = -1;
22021 type = "vector d-form lo_sum";
22022 }
22023
22024 else if (!legitimate_lo_sum_address_p (mode, addr, false))
22025 {
22026 fail_msg = "bad LO_SUM";
22027 extra_cost = -1;
22028 }
22029
22030 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22031 {
22032 extra_cost = 1;
22033 type = "lo_sum";
22034 }
22035 break;
22036
22037 /* Static addresses need to create a TOC entry. */
22038 case CONST:
22039 case SYMBOL_REF:
22040 case LABEL_REF:
22041 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22042 {
22043 extra_cost = -1;
22044 type = "vector d-form lo_sum #2";
22045 }
22046
22047 else
22048 {
22049 type = "address";
22050 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
22051 }
22052 break;
22053
22054 /* TOC references look like offsetable memory. */
22055 case UNSPEC:
22056 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
22057 {
22058 fail_msg = "bad UNSPEC";
22059 extra_cost = -1;
22060 }
22061
22062 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
22063 {
22064 extra_cost = -1;
22065 type = "vector d-form lo_sum #3";
22066 }
22067
22068 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22069 {
22070 extra_cost = 1;
22071 type = "toc reference";
22072 }
22073 break;
22074
22075 default:
22076 {
22077 fail_msg = "bad address";
22078 extra_cost = -1;
22079 }
22080 }
22081
22082 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
22083 {
22084 if (extra_cost < 0)
22085 fprintf (stderr,
22086 "rs6000_secondary_reload_memory error: mode = %s, "
22087 "class = %s, addr_mask = '%s', %s\n",
22088 GET_MODE_NAME (mode),
22089 reg_class_names[rclass],
22090 rs6000_debug_addr_mask (addr_mask, false),
22091 (fail_msg != NULL) ? fail_msg : "<bad address>");
22092
22093 else
22094 fprintf (stderr,
22095 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
22096 "addr_mask = '%s', extra cost = %d, %s\n",
22097 GET_MODE_NAME (mode),
22098 reg_class_names[rclass],
22099 rs6000_debug_addr_mask (addr_mask, false),
22100 extra_cost,
22101 (type) ? type : "<none>");
22102
22103 debug_rtx (addr);
22104 }
22105
22106 return extra_cost;
22107 }
22108
22109 /* Helper function for rs6000_secondary_reload to return true if a move to a
22110 different register classe is really a simple move. */
22111
22112 static bool
22113 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
22114 enum rs6000_reg_type from_type,
22115 machine_mode mode)
22116 {
22117 int size = GET_MODE_SIZE (mode);
22118
22119 /* Add support for various direct moves available. In this function, we only
22120 look at cases where we don't need any extra registers, and one or more
22121 simple move insns are issued. Originally small integers are not allowed
22122 in FPR/VSX registers. Single precision binary floating is not a simple
22123 move because we need to convert to the single precision memory layout.
22124 The 4-byte SDmode can be moved. TDmode values are disallowed since they
22125 need special direct move handling, which we do not support yet. */
22126 if (TARGET_DIRECT_MOVE
22127 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22128 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
22129 {
22130 if (TARGET_POWERPC64)
22131 {
22132 /* ISA 2.07: MTVSRD or MVFVSRD. */
22133 if (size == 8)
22134 return true;
22135
22136 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
22137 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
22138 return true;
22139 }
22140
22141 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22142 if (TARGET_VSX_SMALL_INTEGER)
22143 {
22144 if (mode == SImode)
22145 return true;
22146
22147 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
22148 return true;
22149 }
22150
22151 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
22152 if (mode == SDmode)
22153 return true;
22154 }
22155
22156 /* Power6+: MFTGPR or MFFGPR. */
22157 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
22158 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
22159 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22160 return true;
22161
22162 /* Move to/from SPR. */
22163 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
22164 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
22165 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
22166 return true;
22167
22168 return false;
22169 }
22170
22171 /* Direct move helper function for rs6000_secondary_reload, handle all of the
22172 special direct moves that involve allocating an extra register, return the
22173 insn code of the helper function if there is such a function or
22174 CODE_FOR_nothing if not. */
22175
22176 static bool
22177 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
22178 enum rs6000_reg_type from_type,
22179 machine_mode mode,
22180 secondary_reload_info *sri,
22181 bool altivec_p)
22182 {
22183 bool ret = false;
22184 enum insn_code icode = CODE_FOR_nothing;
22185 int cost = 0;
22186 int size = GET_MODE_SIZE (mode);
22187
22188 if (TARGET_POWERPC64 && size == 16)
22189 {
22190 /* Handle moving 128-bit values from GPRs to VSX point registers on
22191 ISA 2.07 (power8, power9) when running in 64-bit mode using
22192 XXPERMDI to glue the two 64-bit values back together. */
22193 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22194 {
22195 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
22196 icode = reg_addr[mode].reload_vsx_gpr;
22197 }
22198
22199 /* Handle moving 128-bit values from VSX point registers to GPRs on
22200 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
22201 bottom 64-bit value. */
22202 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22203 {
22204 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
22205 icode = reg_addr[mode].reload_gpr_vsx;
22206 }
22207 }
22208
22209 else if (TARGET_POWERPC64 && mode == SFmode)
22210 {
22211 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
22212 {
22213 cost = 3; /* xscvdpspn, mfvsrd, and. */
22214 icode = reg_addr[mode].reload_gpr_vsx;
22215 }
22216
22217 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
22218 {
22219 cost = 2; /* mtvsrz, xscvspdpn. */
22220 icode = reg_addr[mode].reload_vsx_gpr;
22221 }
22222 }
22223
22224 else if (!TARGET_POWERPC64 && size == 8)
22225 {
22226 /* Handle moving 64-bit values from GPRs to floating point registers on
22227 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
22228 32-bit values back together. Altivec register classes must be handled
22229 specially since a different instruction is used, and the secondary
22230 reload support requires a single instruction class in the scratch
22231 register constraint. However, right now TFmode is not allowed in
22232 Altivec registers, so the pattern will never match. */
22233 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
22234 {
22235 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
22236 icode = reg_addr[mode].reload_fpr_gpr;
22237 }
22238 }
22239
22240 if (icode != CODE_FOR_nothing)
22241 {
22242 ret = true;
22243 if (sri)
22244 {
22245 sri->icode = icode;
22246 sri->extra_cost = cost;
22247 }
22248 }
22249
22250 return ret;
22251 }
22252
22253 /* Return whether a move between two register classes can be done either
22254 directly (simple move) or via a pattern that uses a single extra temporary
22255 (using ISA 2.07's direct move in this case. */
22256
22257 static bool
22258 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
22259 enum rs6000_reg_type from_type,
22260 machine_mode mode,
22261 secondary_reload_info *sri,
22262 bool altivec_p)
22263 {
22264 /* Fall back to load/store reloads if either type is not a register. */
22265 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
22266 return false;
22267
22268 /* If we haven't allocated registers yet, assume the move can be done for the
22269 standard register types. */
22270 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
22271 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
22272 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
22273 return true;
22274
22275 /* Moves to the same set of registers is a simple move for non-specialized
22276 registers. */
22277 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
22278 return true;
22279
22280 /* Check whether a simple move can be done directly. */
22281 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
22282 {
22283 if (sri)
22284 {
22285 sri->icode = CODE_FOR_nothing;
22286 sri->extra_cost = 0;
22287 }
22288 return true;
22289 }
22290
22291 /* Now check if we can do it in a few steps. */
22292 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
22293 altivec_p);
22294 }
22295
22296 /* Inform reload about cases where moving X with a mode MODE to a register in
22297 RCLASS requires an extra scratch or immediate register. Return the class
22298 needed for the immediate register.
22299
22300 For VSX and Altivec, we may need a register to convert sp+offset into
22301 reg+sp.
22302
22303 For misaligned 64-bit gpr loads and stores we need a register to
22304 convert an offset address to indirect. */
22305
22306 static reg_class_t
22307 rs6000_secondary_reload (bool in_p,
22308 rtx x,
22309 reg_class_t rclass_i,
22310 machine_mode mode,
22311 secondary_reload_info *sri)
22312 {
22313 enum reg_class rclass = (enum reg_class) rclass_i;
22314 reg_class_t ret = ALL_REGS;
22315 enum insn_code icode;
22316 bool default_p = false;
22317 bool done_p = false;
22318
22319 /* Allow subreg of memory before/during reload. */
22320 bool memory_p = (MEM_P (x)
22321 || (!reload_completed && GET_CODE (x) == SUBREG
22322 && MEM_P (SUBREG_REG (x))));
22323
22324 sri->icode = CODE_FOR_nothing;
22325 sri->t_icode = CODE_FOR_nothing;
22326 sri->extra_cost = 0;
22327 icode = ((in_p)
22328 ? reg_addr[mode].reload_load
22329 : reg_addr[mode].reload_store);
22330
22331 if (REG_P (x) || register_operand (x, mode))
22332 {
22333 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
22334 bool altivec_p = (rclass == ALTIVEC_REGS);
22335 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
22336
22337 if (!in_p)
22338 std::swap (to_type, from_type);
22339
22340 /* Can we do a direct move of some sort? */
22341 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
22342 altivec_p))
22343 {
22344 icode = (enum insn_code)sri->icode;
22345 default_p = false;
22346 done_p = true;
22347 ret = NO_REGS;
22348 }
22349 }
22350
22351 /* Make sure 0.0 is not reloaded or forced into memory. */
22352 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
22353 {
22354 ret = NO_REGS;
22355 default_p = false;
22356 done_p = true;
22357 }
22358
22359 /* If this is a scalar floating point value and we want to load it into the
22360 traditional Altivec registers, do it via a move via a traditional floating
22361 point register, unless we have D-form addressing. Also make sure that
22362 non-zero constants use a FPR. */
22363 if (!done_p && reg_addr[mode].scalar_in_vmx_p
22364 && !mode_supports_vmx_dform (mode)
22365 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
22366 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
22367 {
22368 ret = FLOAT_REGS;
22369 default_p = false;
22370 done_p = true;
22371 }
22372
22373 /* Handle reload of load/stores if we have reload helper functions. */
22374 if (!done_p && icode != CODE_FOR_nothing && memory_p)
22375 {
22376 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
22377 mode);
22378
22379 if (extra_cost >= 0)
22380 {
22381 done_p = true;
22382 ret = NO_REGS;
22383 if (extra_cost > 0)
22384 {
22385 sri->extra_cost = extra_cost;
22386 sri->icode = icode;
22387 }
22388 }
22389 }
22390
22391 /* Handle unaligned loads and stores of integer registers. */
22392 if (!done_p && TARGET_POWERPC64
22393 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22394 && memory_p
22395 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
22396 {
22397 rtx addr = XEXP (x, 0);
22398 rtx off = address_offset (addr);
22399
22400 if (off != NULL_RTX)
22401 {
22402 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22403 unsigned HOST_WIDE_INT offset = INTVAL (off);
22404
22405 /* We need a secondary reload when our legitimate_address_p
22406 says the address is good (as otherwise the entire address
22407 will be reloaded), and the offset is not a multiple of
22408 four or we have an address wrap. Address wrap will only
22409 occur for LO_SUMs since legitimate_offset_address_p
22410 rejects addresses for 16-byte mems that will wrap. */
22411 if (GET_CODE (addr) == LO_SUM
22412 ? (1 /* legitimate_address_p allows any offset for lo_sum */
22413 && ((offset & 3) != 0
22414 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
22415 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
22416 && (offset & 3) != 0))
22417 {
22418 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
22419 if (in_p)
22420 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
22421 : CODE_FOR_reload_di_load);
22422 else
22423 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
22424 : CODE_FOR_reload_di_store);
22425 sri->extra_cost = 2;
22426 ret = NO_REGS;
22427 done_p = true;
22428 }
22429 else
22430 default_p = true;
22431 }
22432 else
22433 default_p = true;
22434 }
22435
22436 if (!done_p && !TARGET_POWERPC64
22437 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
22438 && memory_p
22439 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
22440 {
22441 rtx addr = XEXP (x, 0);
22442 rtx off = address_offset (addr);
22443
22444 if (off != NULL_RTX)
22445 {
22446 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
22447 unsigned HOST_WIDE_INT offset = INTVAL (off);
22448
22449 /* We need a secondary reload when our legitimate_address_p
22450 says the address is good (as otherwise the entire address
22451 will be reloaded), and we have a wrap.
22452
22453 legitimate_lo_sum_address_p allows LO_SUM addresses to
22454 have any offset so test for wrap in the low 16 bits.
22455
22456 legitimate_offset_address_p checks for the range
22457 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
22458 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
22459 [0x7ff4,0x7fff] respectively, so test for the
22460 intersection of these ranges, [0x7ffc,0x7fff] and
22461 [0x7ff4,0x7ff7] respectively.
22462
22463 Note that the address we see here may have been
22464 manipulated by legitimize_reload_address. */
22465 if (GET_CODE (addr) == LO_SUM
22466 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
22467 : offset - (0x8000 - extra) < UNITS_PER_WORD)
22468 {
22469 if (in_p)
22470 sri->icode = CODE_FOR_reload_si_load;
22471 else
22472 sri->icode = CODE_FOR_reload_si_store;
22473 sri->extra_cost = 2;
22474 ret = NO_REGS;
22475 done_p = true;
22476 }
22477 else
22478 default_p = true;
22479 }
22480 else
22481 default_p = true;
22482 }
22483
22484 if (!done_p)
22485 default_p = true;
22486
22487 if (default_p)
22488 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
22489
22490 gcc_assert (ret != ALL_REGS);
22491
22492 if (TARGET_DEBUG_ADDR)
22493 {
22494 fprintf (stderr,
22495 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
22496 "mode = %s",
22497 reg_class_names[ret],
22498 in_p ? "true" : "false",
22499 reg_class_names[rclass],
22500 GET_MODE_NAME (mode));
22501
22502 if (reload_completed)
22503 fputs (", after reload", stderr);
22504
22505 if (!done_p)
22506 fputs (", done_p not set", stderr);
22507
22508 if (default_p)
22509 fputs (", default secondary reload", stderr);
22510
22511 if (sri->icode != CODE_FOR_nothing)
22512 fprintf (stderr, ", reload func = %s, extra cost = %d",
22513 insn_data[sri->icode].name, sri->extra_cost);
22514
22515 else if (sri->extra_cost > 0)
22516 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
22517
22518 fputs ("\n", stderr);
22519 debug_rtx (x);
22520 }
22521
22522 return ret;
22523 }
22524
22525 /* Better tracing for rs6000_secondary_reload_inner. */
22526
22527 static void
22528 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
22529 bool store_p)
22530 {
22531 rtx set, clobber;
22532
22533 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
22534
22535 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
22536 store_p ? "store" : "load");
22537
22538 if (store_p)
22539 set = gen_rtx_SET (mem, reg);
22540 else
22541 set = gen_rtx_SET (reg, mem);
22542
22543 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
22544 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
22545 }
22546
22547 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
22548 ATTRIBUTE_NORETURN;
22549
22550 static void
22551 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
22552 bool store_p)
22553 {
22554 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
22555 gcc_unreachable ();
22556 }
22557
22558 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
22559 reload helper functions. These were identified in
22560 rs6000_secondary_reload_memory, and if reload decided to use the secondary
22561 reload, it calls the insns:
22562 reload_<RELOAD:mode>_<P:mptrsize>_store
22563 reload_<RELOAD:mode>_<P:mptrsize>_load
22564
22565 which in turn calls this function, to do whatever is necessary to create
22566 valid addresses. */
22567
22568 void
22569 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
22570 {
22571 int regno = true_regnum (reg);
22572 machine_mode mode = GET_MODE (reg);
22573 addr_mask_type addr_mask;
22574 rtx addr;
22575 rtx new_addr;
22576 rtx op_reg, op0, op1;
22577 rtx and_op;
22578 rtx cc_clobber;
22579 rtvec rv;
22580
22581 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
22582 || !base_reg_operand (scratch, GET_MODE (scratch)))
22583 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22584
22585 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
22586 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
22587
22588 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
22589 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
22590
22591 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
22592 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
22593
22594 else
22595 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22596
22597 /* Make sure the mode is valid in this register class. */
22598 if ((addr_mask & RELOAD_REG_VALID) == 0)
22599 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22600
22601 if (TARGET_DEBUG_ADDR)
22602 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
22603
22604 new_addr = addr = XEXP (mem, 0);
22605 switch (GET_CODE (addr))
22606 {
22607 /* Does the register class support auto update forms for this mode? If
22608 not, do the update now. We don't need a scratch register, since the
22609 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
22610 case PRE_INC:
22611 case PRE_DEC:
22612 op_reg = XEXP (addr, 0);
22613 if (!base_reg_operand (op_reg, Pmode))
22614 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22615
22616 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
22617 {
22618 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
22619 new_addr = op_reg;
22620 }
22621 break;
22622
22623 case PRE_MODIFY:
22624 op0 = XEXP (addr, 0);
22625 op1 = XEXP (addr, 1);
22626 if (!base_reg_operand (op0, Pmode)
22627 || GET_CODE (op1) != PLUS
22628 || !rtx_equal_p (op0, XEXP (op1, 0)))
22629 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22630
22631 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
22632 {
22633 emit_insn (gen_rtx_SET (op0, op1));
22634 new_addr = reg;
22635 }
22636 break;
22637
22638 /* Do we need to simulate AND -16 to clear the bottom address bits used
22639 in VMX load/stores? */
22640 case AND:
22641 op0 = XEXP (addr, 0);
22642 op1 = XEXP (addr, 1);
22643 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
22644 {
22645 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
22646 op_reg = op0;
22647
22648 else if (GET_CODE (op1) == PLUS)
22649 {
22650 emit_insn (gen_rtx_SET (scratch, op1));
22651 op_reg = scratch;
22652 }
22653
22654 else
22655 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22656
22657 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
22658 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
22659 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
22660 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
22661 new_addr = scratch;
22662 }
22663 break;
22664
22665 /* If this is an indirect address, make sure it is a base register. */
22666 case REG:
22667 case SUBREG:
22668 if (!base_reg_operand (addr, GET_MODE (addr)))
22669 {
22670 emit_insn (gen_rtx_SET (scratch, addr));
22671 new_addr = scratch;
22672 }
22673 break;
22674
22675 /* If this is an indexed address, make sure the register class can handle
22676 indexed addresses for this mode. */
22677 case PLUS:
22678 op0 = XEXP (addr, 0);
22679 op1 = XEXP (addr, 1);
22680 if (!base_reg_operand (op0, Pmode))
22681 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22682
22683 else if (int_reg_operand (op1, Pmode))
22684 {
22685 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22686 {
22687 emit_insn (gen_rtx_SET (scratch, addr));
22688 new_addr = scratch;
22689 }
22690 }
22691
22692 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
22693 {
22694 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
22695 || !quad_address_p (addr, mode, false))
22696 {
22697 emit_insn (gen_rtx_SET (scratch, addr));
22698 new_addr = scratch;
22699 }
22700 }
22701
22702 /* Make sure the register class can handle offset addresses. */
22703 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
22704 {
22705 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22706 {
22707 emit_insn (gen_rtx_SET (scratch, addr));
22708 new_addr = scratch;
22709 }
22710 }
22711
22712 else
22713 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22714
22715 break;
22716
22717 case LO_SUM:
22718 op0 = XEXP (addr, 0);
22719 op1 = XEXP (addr, 1);
22720 if (!base_reg_operand (op0, Pmode))
22721 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22722
22723 else if (int_reg_operand (op1, Pmode))
22724 {
22725 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
22726 {
22727 emit_insn (gen_rtx_SET (scratch, addr));
22728 new_addr = scratch;
22729 }
22730 }
22731
22732 /* Quad offsets are restricted and can't handle normal addresses. */
22733 else if (mode_supports_vsx_dform_quad (mode))
22734 {
22735 emit_insn (gen_rtx_SET (scratch, addr));
22736 new_addr = scratch;
22737 }
22738
22739 /* Make sure the register class can handle offset addresses. */
22740 else if (legitimate_lo_sum_address_p (mode, addr, false))
22741 {
22742 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
22743 {
22744 emit_insn (gen_rtx_SET (scratch, addr));
22745 new_addr = scratch;
22746 }
22747 }
22748
22749 else
22750 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22751
22752 break;
22753
22754 case SYMBOL_REF:
22755 case CONST:
22756 case LABEL_REF:
22757 rs6000_emit_move (scratch, addr, Pmode);
22758 new_addr = scratch;
22759 break;
22760
22761 default:
22762 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
22763 }
22764
22765 /* Adjust the address if it changed. */
22766 if (addr != new_addr)
22767 {
22768 mem = replace_equiv_address_nv (mem, new_addr);
22769 if (TARGET_DEBUG_ADDR)
22770 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
22771 }
22772
22773 /* Now create the move. */
22774 if (store_p)
22775 emit_insn (gen_rtx_SET (mem, reg));
22776 else
22777 emit_insn (gen_rtx_SET (reg, mem));
22778
22779 return;
22780 }
22781
22782 /* Convert reloads involving 64-bit gprs and misaligned offset
22783 addressing, or multiple 32-bit gprs and offsets that are too large,
22784 to use indirect addressing. */
22785
22786 void
22787 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
22788 {
22789 int regno = true_regnum (reg);
22790 enum reg_class rclass;
22791 rtx addr;
22792 rtx scratch_or_premodify = scratch;
22793
22794 if (TARGET_DEBUG_ADDR)
22795 {
22796 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
22797 store_p ? "store" : "load");
22798 fprintf (stderr, "reg:\n");
22799 debug_rtx (reg);
22800 fprintf (stderr, "mem:\n");
22801 debug_rtx (mem);
22802 fprintf (stderr, "scratch:\n");
22803 debug_rtx (scratch);
22804 }
22805
22806 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
22807 gcc_assert (GET_CODE (mem) == MEM);
22808 rclass = REGNO_REG_CLASS (regno);
22809 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
22810 addr = XEXP (mem, 0);
22811
22812 if (GET_CODE (addr) == PRE_MODIFY)
22813 {
22814 gcc_assert (REG_P (XEXP (addr, 0))
22815 && GET_CODE (XEXP (addr, 1)) == PLUS
22816 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
22817 scratch_or_premodify = XEXP (addr, 0);
22818 if (!HARD_REGISTER_P (scratch_or_premodify))
22819 /* If we have a pseudo here then reload will have arranged
22820 to have it replaced, but only in the original insn.
22821 Use the replacement here too. */
22822 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
22823
22824 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
22825 expressions from the original insn, without unsharing them.
22826 Any RTL that points into the original insn will of course
22827 have register replacements applied. That is why we don't
22828 need to look for replacements under the PLUS. */
22829 addr = XEXP (addr, 1);
22830 }
22831 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
22832
22833 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
22834
22835 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
22836
22837 /* Now create the move. */
22838 if (store_p)
22839 emit_insn (gen_rtx_SET (mem, reg));
22840 else
22841 emit_insn (gen_rtx_SET (reg, mem));
22842
22843 return;
22844 }
22845
22846 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
22847 this function has any SDmode references. If we are on a power7 or later, we
22848 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
22849 can load/store the value. */
22850
22851 static void
22852 rs6000_alloc_sdmode_stack_slot (void)
22853 {
22854 tree t;
22855 basic_block bb;
22856 gimple_stmt_iterator gsi;
22857
22858 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
22859 /* We use a different approach for dealing with the secondary
22860 memory in LRA. */
22861 if (ira_use_lra_p)
22862 return;
22863
22864 if (TARGET_NO_SDMODE_STACK)
22865 return;
22866
22867 FOR_EACH_BB_FN (bb, cfun)
22868 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
22869 {
22870 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
22871 if (ret)
22872 {
22873 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22874 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22875 SDmode, 0);
22876 return;
22877 }
22878 }
22879
22880 /* Check for any SDmode parameters of the function. */
22881 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
22882 {
22883 if (TREE_TYPE (t) == error_mark_node)
22884 continue;
22885
22886 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
22887 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
22888 {
22889 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
22890 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
22891 SDmode, 0);
22892 return;
22893 }
22894 }
22895 }
22896
22897 static void
22898 rs6000_instantiate_decls (void)
22899 {
22900 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
22901 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
22902 }
22903
22904 /* Given an rtx X being reloaded into a reg required to be
22905 in class CLASS, return the class of reg to actually use.
22906 In general this is just CLASS; but on some machines
22907 in some cases it is preferable to use a more restrictive class.
22908
22909 On the RS/6000, we have to return NO_REGS when we want to reload a
22910 floating-point CONST_DOUBLE to force it to be copied to memory.
22911
22912 We also don't want to reload integer values into floating-point
22913 registers if we can at all help it. In fact, this can
22914 cause reload to die, if it tries to generate a reload of CTR
22915 into a FP register and discovers it doesn't have the memory location
22916 required.
22917
22918 ??? Would it be a good idea to have reload do the converse, that is
22919 try to reload floating modes into FP registers if possible?
22920 */
22921
22922 static enum reg_class
22923 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
22924 {
22925 machine_mode mode = GET_MODE (x);
22926 bool is_constant = CONSTANT_P (x);
22927
22928 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
22929 reload class for it. */
22930 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22931 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
22932 return NO_REGS;
22933
22934 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
22935 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
22936 return NO_REGS;
22937
22938 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
22939 the reloading of address expressions using PLUS into floating point
22940 registers. */
22941 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
22942 {
22943 if (is_constant)
22944 {
22945 /* Zero is always allowed in all VSX registers. */
22946 if (x == CONST0_RTX (mode))
22947 return rclass;
22948
22949 /* If this is a vector constant that can be formed with a few Altivec
22950 instructions, we want altivec registers. */
22951 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
22952 return ALTIVEC_REGS;
22953
22954 /* If this is an integer constant that can easily be loaded into
22955 vector registers, allow it. */
22956 if (CONST_INT_P (x))
22957 {
22958 HOST_WIDE_INT value = INTVAL (x);
22959
22960 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
22961 2.06 can generate it in the Altivec registers with
22962 VSPLTI<x>. */
22963 if (value == -1)
22964 {
22965 if (TARGET_P8_VECTOR)
22966 return rclass;
22967 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22968 return ALTIVEC_REGS;
22969 else
22970 return NO_REGS;
22971 }
22972
22973 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
22974 a sign extend in the Altivec registers. */
22975 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
22976 && TARGET_VSX_SMALL_INTEGER
22977 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
22978 return ALTIVEC_REGS;
22979 }
22980
22981 /* Force constant to memory. */
22982 return NO_REGS;
22983 }
22984
22985 /* D-form addressing can easily reload the value. */
22986 if (mode_supports_vmx_dform (mode)
22987 || mode_supports_vsx_dform_quad (mode))
22988 return rclass;
22989
22990 /* If this is a scalar floating point value and we don't have D-form
22991 addressing, prefer the traditional floating point registers so that we
22992 can use D-form (register+offset) addressing. */
22993 if (rclass == VSX_REGS
22994 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
22995 return FLOAT_REGS;
22996
22997 /* Prefer the Altivec registers if Altivec is handling the vector
22998 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
22999 loads. */
23000 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
23001 || mode == V1TImode)
23002 return ALTIVEC_REGS;
23003
23004 return rclass;
23005 }
23006
23007 if (is_constant || GET_CODE (x) == PLUS)
23008 {
23009 if (reg_class_subset_p (GENERAL_REGS, rclass))
23010 return GENERAL_REGS;
23011 if (reg_class_subset_p (BASE_REGS, rclass))
23012 return BASE_REGS;
23013 return NO_REGS;
23014 }
23015
23016 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
23017 return GENERAL_REGS;
23018
23019 return rclass;
23020 }
23021
23022 /* Debug version of rs6000_preferred_reload_class. */
23023 static enum reg_class
23024 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
23025 {
23026 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
23027
23028 fprintf (stderr,
23029 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
23030 "mode = %s, x:\n",
23031 reg_class_names[ret], reg_class_names[rclass],
23032 GET_MODE_NAME (GET_MODE (x)));
23033 debug_rtx (x);
23034
23035 return ret;
23036 }
23037
23038 /* If we are copying between FP or AltiVec registers and anything else, we need
23039 a memory location. The exception is when we are targeting ppc64 and the
23040 move to/from fpr to gpr instructions are available. Also, under VSX, you
23041 can copy vector registers from the FP register set to the Altivec register
23042 set and vice versa. */
23043
23044 static bool
23045 rs6000_secondary_memory_needed (enum reg_class from_class,
23046 enum reg_class to_class,
23047 machine_mode mode)
23048 {
23049 enum rs6000_reg_type from_type, to_type;
23050 bool altivec_p = ((from_class == ALTIVEC_REGS)
23051 || (to_class == ALTIVEC_REGS));
23052
23053 /* If a simple/direct move is available, we don't need secondary memory */
23054 from_type = reg_class_to_reg_type[(int)from_class];
23055 to_type = reg_class_to_reg_type[(int)to_class];
23056
23057 if (rs6000_secondary_reload_move (to_type, from_type, mode,
23058 (secondary_reload_info *)0, altivec_p))
23059 return false;
23060
23061 /* If we have a floating point or vector register class, we need to use
23062 memory to transfer the data. */
23063 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
23064 return true;
23065
23066 return false;
23067 }
23068
23069 /* Debug version of rs6000_secondary_memory_needed. */
23070 static bool
23071 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
23072 enum reg_class to_class,
23073 machine_mode mode)
23074 {
23075 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
23076
23077 fprintf (stderr,
23078 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
23079 "to_class = %s, mode = %s\n",
23080 ret ? "true" : "false",
23081 reg_class_names[from_class],
23082 reg_class_names[to_class],
23083 GET_MODE_NAME (mode));
23084
23085 return ret;
23086 }
23087
23088 /* Return the register class of a scratch register needed to copy IN into
23089 or out of a register in RCLASS in MODE. If it can be done directly,
23090 NO_REGS is returned. */
23091
23092 static enum reg_class
23093 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
23094 rtx in)
23095 {
23096 int regno;
23097
23098 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
23099 #if TARGET_MACHO
23100 && MACHOPIC_INDIRECT
23101 #endif
23102 ))
23103 {
23104 /* We cannot copy a symbolic operand directly into anything
23105 other than BASE_REGS for TARGET_ELF. So indicate that a
23106 register from BASE_REGS is needed as an intermediate
23107 register.
23108
23109 On Darwin, pic addresses require a load from memory, which
23110 needs a base register. */
23111 if (rclass != BASE_REGS
23112 && (GET_CODE (in) == SYMBOL_REF
23113 || GET_CODE (in) == HIGH
23114 || GET_CODE (in) == LABEL_REF
23115 || GET_CODE (in) == CONST))
23116 return BASE_REGS;
23117 }
23118
23119 if (GET_CODE (in) == REG)
23120 {
23121 regno = REGNO (in);
23122 if (regno >= FIRST_PSEUDO_REGISTER)
23123 {
23124 regno = true_regnum (in);
23125 if (regno >= FIRST_PSEUDO_REGISTER)
23126 regno = -1;
23127 }
23128 }
23129 else if (GET_CODE (in) == SUBREG)
23130 {
23131 regno = true_regnum (in);
23132 if (regno >= FIRST_PSEUDO_REGISTER)
23133 regno = -1;
23134 }
23135 else
23136 regno = -1;
23137
23138 /* If we have VSX register moves, prefer moving scalar values between
23139 Altivec registers and GPR by going via an FPR (and then via memory)
23140 instead of reloading the secondary memory address for Altivec moves. */
23141 if (TARGET_VSX
23142 && GET_MODE_SIZE (mode) < 16
23143 && !mode_supports_vmx_dform (mode)
23144 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
23145 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
23146 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
23147 && (regno >= 0 && INT_REGNO_P (regno)))))
23148 return FLOAT_REGS;
23149
23150 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
23151 into anything. */
23152 if (rclass == GENERAL_REGS || rclass == BASE_REGS
23153 || (regno >= 0 && INT_REGNO_P (regno)))
23154 return NO_REGS;
23155
23156 /* Constants, memory, and VSX registers can go into VSX registers (both the
23157 traditional floating point and the altivec registers). */
23158 if (rclass == VSX_REGS
23159 && (regno == -1 || VSX_REGNO_P (regno)))
23160 return NO_REGS;
23161
23162 /* Constants, memory, and FP registers can go into FP registers. */
23163 if ((regno == -1 || FP_REGNO_P (regno))
23164 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
23165 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
23166
23167 /* Memory, and AltiVec registers can go into AltiVec registers. */
23168 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
23169 && rclass == ALTIVEC_REGS)
23170 return NO_REGS;
23171
23172 /* We can copy among the CR registers. */
23173 if ((rclass == CR_REGS || rclass == CR0_REGS)
23174 && regno >= 0 && CR_REGNO_P (regno))
23175 return NO_REGS;
23176
23177 /* Otherwise, we need GENERAL_REGS. */
23178 return GENERAL_REGS;
23179 }
23180
23181 /* Debug version of rs6000_secondary_reload_class. */
23182 static enum reg_class
23183 rs6000_debug_secondary_reload_class (enum reg_class rclass,
23184 machine_mode mode, rtx in)
23185 {
23186 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
23187 fprintf (stderr,
23188 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
23189 "mode = %s, input rtx:\n",
23190 reg_class_names[ret], reg_class_names[rclass],
23191 GET_MODE_NAME (mode));
23192 debug_rtx (in);
23193
23194 return ret;
23195 }
23196
23197 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
23198
23199 static bool
23200 rs6000_cannot_change_mode_class (machine_mode from,
23201 machine_mode to,
23202 enum reg_class rclass)
23203 {
23204 unsigned from_size = GET_MODE_SIZE (from);
23205 unsigned to_size = GET_MODE_SIZE (to);
23206
23207 if (from_size != to_size)
23208 {
23209 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
23210
23211 if (reg_classes_intersect_p (xclass, rclass))
23212 {
23213 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
23214 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
23215 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
23216 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
23217
23218 /* Don't allow 64-bit types to overlap with 128-bit types that take a
23219 single register under VSX because the scalar part of the register
23220 is in the upper 64-bits, and not the lower 64-bits. Types like
23221 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
23222 IEEE floating point can't overlap, and neither can small
23223 values. */
23224
23225 if (to_float128_vector_p && from_float128_vector_p)
23226 return false;
23227
23228 else if (to_float128_vector_p || from_float128_vector_p)
23229 return true;
23230
23231 /* TDmode in floating-mode registers must always go into a register
23232 pair with the most significant word in the even-numbered register
23233 to match ISA requirements. In little-endian mode, this does not
23234 match subreg numbering, so we cannot allow subregs. */
23235 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
23236 return true;
23237
23238 if (from_size < 8 || to_size < 8)
23239 return true;
23240
23241 if (from_size == 8 && (8 * to_nregs) != to_size)
23242 return true;
23243
23244 if (to_size == 8 && (8 * from_nregs) != from_size)
23245 return true;
23246
23247 return false;
23248 }
23249 else
23250 return false;
23251 }
23252
23253 if (TARGET_E500_DOUBLE
23254 && ((((to) == DFmode) + ((from) == DFmode)) == 1
23255 || (((to) == TFmode) + ((from) == TFmode)) == 1
23256 || (((to) == IFmode) + ((from) == IFmode)) == 1
23257 || (((to) == KFmode) + ((from) == KFmode)) == 1
23258 || (((to) == DDmode) + ((from) == DDmode)) == 1
23259 || (((to) == TDmode) + ((from) == TDmode)) == 1
23260 || (((to) == DImode) + ((from) == DImode)) == 1))
23261 return true;
23262
23263 /* Since the VSX register set includes traditional floating point registers
23264 and altivec registers, just check for the size being different instead of
23265 trying to check whether the modes are vector modes. Otherwise it won't
23266 allow say DF and DI to change classes. For types like TFmode and TDmode
23267 that take 2 64-bit registers, rather than a single 128-bit register, don't
23268 allow subregs of those types to other 128 bit types. */
23269 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
23270 {
23271 unsigned num_regs = (from_size + 15) / 16;
23272 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
23273 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
23274 return true;
23275
23276 return (from_size != 8 && from_size != 16);
23277 }
23278
23279 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
23280 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
23281 return true;
23282
23283 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
23284 && reg_classes_intersect_p (GENERAL_REGS, rclass))
23285 return true;
23286
23287 return false;
23288 }
23289
23290 /* Debug version of rs6000_cannot_change_mode_class. */
23291 static bool
23292 rs6000_debug_cannot_change_mode_class (machine_mode from,
23293 machine_mode to,
23294 enum reg_class rclass)
23295 {
23296 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
23297
23298 fprintf (stderr,
23299 "rs6000_cannot_change_mode_class, return %s, from = %s, "
23300 "to = %s, rclass = %s\n",
23301 ret ? "true" : "false",
23302 GET_MODE_NAME (from), GET_MODE_NAME (to),
23303 reg_class_names[rclass]);
23304
23305 return ret;
23306 }
23307 \f
23308 /* Return a string to do a move operation of 128 bits of data. */
23309
23310 const char *
23311 rs6000_output_move_128bit (rtx operands[])
23312 {
23313 rtx dest = operands[0];
23314 rtx src = operands[1];
23315 machine_mode mode = GET_MODE (dest);
23316 int dest_regno;
23317 int src_regno;
23318 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
23319 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
23320
23321 if (REG_P (dest))
23322 {
23323 dest_regno = REGNO (dest);
23324 dest_gpr_p = INT_REGNO_P (dest_regno);
23325 dest_fp_p = FP_REGNO_P (dest_regno);
23326 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
23327 dest_vsx_p = dest_fp_p | dest_vmx_p;
23328 }
23329 else
23330 {
23331 dest_regno = -1;
23332 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
23333 }
23334
23335 if (REG_P (src))
23336 {
23337 src_regno = REGNO (src);
23338 src_gpr_p = INT_REGNO_P (src_regno);
23339 src_fp_p = FP_REGNO_P (src_regno);
23340 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
23341 src_vsx_p = src_fp_p | src_vmx_p;
23342 }
23343 else
23344 {
23345 src_regno = -1;
23346 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
23347 }
23348
23349 /* Register moves. */
23350 if (dest_regno >= 0 && src_regno >= 0)
23351 {
23352 if (dest_gpr_p)
23353 {
23354 if (src_gpr_p)
23355 return "#";
23356
23357 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
23358 return (WORDS_BIG_ENDIAN
23359 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
23360 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
23361
23362 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
23363 return "#";
23364 }
23365
23366 else if (TARGET_VSX && dest_vsx_p)
23367 {
23368 if (src_vsx_p)
23369 return "xxlor %x0,%x1,%x1";
23370
23371 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
23372 return (WORDS_BIG_ENDIAN
23373 ? "mtvsrdd %x0,%1,%L1"
23374 : "mtvsrdd %x0,%L1,%1");
23375
23376 else if (TARGET_DIRECT_MOVE && src_gpr_p)
23377 return "#";
23378 }
23379
23380 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
23381 return "vor %0,%1,%1";
23382
23383 else if (dest_fp_p && src_fp_p)
23384 return "#";
23385 }
23386
23387 /* Loads. */
23388 else if (dest_regno >= 0 && MEM_P (src))
23389 {
23390 if (dest_gpr_p)
23391 {
23392 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23393 return "lq %0,%1";
23394 else
23395 return "#";
23396 }
23397
23398 else if (TARGET_ALTIVEC && dest_vmx_p
23399 && altivec_indexed_or_indirect_operand (src, mode))
23400 return "lvx %0,%y1";
23401
23402 else if (TARGET_VSX && dest_vsx_p)
23403 {
23404 if (mode_supports_vsx_dform_quad (mode)
23405 && quad_address_p (XEXP (src, 0), mode, true))
23406 return "lxv %x0,%1";
23407
23408 else if (TARGET_P9_VECTOR)
23409 return "lxvx %x0,%y1";
23410
23411 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23412 return "lxvw4x %x0,%y1";
23413
23414 else
23415 return "lxvd2x %x0,%y1";
23416 }
23417
23418 else if (TARGET_ALTIVEC && dest_vmx_p)
23419 return "lvx %0,%y1";
23420
23421 else if (dest_fp_p)
23422 return "#";
23423 }
23424
23425 /* Stores. */
23426 else if (src_regno >= 0 && MEM_P (dest))
23427 {
23428 if (src_gpr_p)
23429 {
23430 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
23431 return "stq %1,%0";
23432 else
23433 return "#";
23434 }
23435
23436 else if (TARGET_ALTIVEC && src_vmx_p
23437 && altivec_indexed_or_indirect_operand (src, mode))
23438 return "stvx %1,%y0";
23439
23440 else if (TARGET_VSX && src_vsx_p)
23441 {
23442 if (mode_supports_vsx_dform_quad (mode)
23443 && quad_address_p (XEXP (dest, 0), mode, true))
23444 return "stxv %x1,%0";
23445
23446 else if (TARGET_P9_VECTOR)
23447 return "stxvx %x1,%y0";
23448
23449 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
23450 return "stxvw4x %x1,%y0";
23451
23452 else
23453 return "stxvd2x %x1,%y0";
23454 }
23455
23456 else if (TARGET_ALTIVEC && src_vmx_p)
23457 return "stvx %1,%y0";
23458
23459 else if (src_fp_p)
23460 return "#";
23461 }
23462
23463 /* Constants. */
23464 else if (dest_regno >= 0
23465 && (GET_CODE (src) == CONST_INT
23466 || GET_CODE (src) == CONST_WIDE_INT
23467 || GET_CODE (src) == CONST_DOUBLE
23468 || GET_CODE (src) == CONST_VECTOR))
23469 {
23470 if (dest_gpr_p)
23471 return "#";
23472
23473 else if ((dest_vmx_p && TARGET_ALTIVEC)
23474 || (dest_vsx_p && TARGET_VSX))
23475 return output_vec_const_move (operands);
23476 }
23477
23478 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
23479 }
23480
23481 /* Validate a 128-bit move. */
23482 bool
23483 rs6000_move_128bit_ok_p (rtx operands[])
23484 {
23485 machine_mode mode = GET_MODE (operands[0]);
23486 return (gpc_reg_operand (operands[0], mode)
23487 || gpc_reg_operand (operands[1], mode));
23488 }
23489
23490 /* Return true if a 128-bit move needs to be split. */
23491 bool
23492 rs6000_split_128bit_ok_p (rtx operands[])
23493 {
23494 if (!reload_completed)
23495 return false;
23496
23497 if (!gpr_or_gpr_p (operands[0], operands[1]))
23498 return false;
23499
23500 if (quad_load_store_p (operands[0], operands[1]))
23501 return false;
23502
23503 return true;
23504 }
23505
23506 \f
23507 /* Given a comparison operation, return the bit number in CCR to test. We
23508 know this is a valid comparison.
23509
23510 SCC_P is 1 if this is for an scc. That means that %D will have been
23511 used instead of %C, so the bits will be in different places.
23512
23513 Return -1 if OP isn't a valid comparison for some reason. */
23514
23515 int
23516 ccr_bit (rtx op, int scc_p)
23517 {
23518 enum rtx_code code = GET_CODE (op);
23519 machine_mode cc_mode;
23520 int cc_regnum;
23521 int base_bit;
23522 rtx reg;
23523
23524 if (!COMPARISON_P (op))
23525 return -1;
23526
23527 reg = XEXP (op, 0);
23528
23529 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
23530
23531 cc_mode = GET_MODE (reg);
23532 cc_regnum = REGNO (reg);
23533 base_bit = 4 * (cc_regnum - CR0_REGNO);
23534
23535 validate_condition_mode (code, cc_mode);
23536
23537 /* When generating a sCOND operation, only positive conditions are
23538 allowed. */
23539 gcc_assert (!scc_p
23540 || code == EQ || code == GT || code == LT || code == UNORDERED
23541 || code == GTU || code == LTU);
23542
23543 switch (code)
23544 {
23545 case NE:
23546 return scc_p ? base_bit + 3 : base_bit + 2;
23547 case EQ:
23548 return base_bit + 2;
23549 case GT: case GTU: case UNLE:
23550 return base_bit + 1;
23551 case LT: case LTU: case UNGE:
23552 return base_bit;
23553 case ORDERED: case UNORDERED:
23554 return base_bit + 3;
23555
23556 case GE: case GEU:
23557 /* If scc, we will have done a cror to put the bit in the
23558 unordered position. So test that bit. For integer, this is ! LT
23559 unless this is an scc insn. */
23560 return scc_p ? base_bit + 3 : base_bit;
23561
23562 case LE: case LEU:
23563 return scc_p ? base_bit + 3 : base_bit + 1;
23564
23565 default:
23566 gcc_unreachable ();
23567 }
23568 }
23569 \f
23570 /* Return the GOT register. */
23571
23572 rtx
23573 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
23574 {
23575 /* The second flow pass currently (June 1999) can't update
23576 regs_ever_live without disturbing other parts of the compiler, so
23577 update it here to make the prolog/epilogue code happy. */
23578 if (!can_create_pseudo_p ()
23579 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
23580 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
23581
23582 crtl->uses_pic_offset_table = 1;
23583
23584 return pic_offset_table_rtx;
23585 }
23586 \f
23587 static rs6000_stack_t stack_info;
23588
23589 /* Function to init struct machine_function.
23590 This will be called, via a pointer variable,
23591 from push_function_context. */
23592
23593 static struct machine_function *
23594 rs6000_init_machine_status (void)
23595 {
23596 stack_info.reload_completed = 0;
23597 return ggc_cleared_alloc<machine_function> ();
23598 }
23599 \f
23600 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
23601
23602 /* Write out a function code label. */
23603
23604 void
23605 rs6000_output_function_entry (FILE *file, const char *fname)
23606 {
23607 if (fname[0] != '.')
23608 {
23609 switch (DEFAULT_ABI)
23610 {
23611 default:
23612 gcc_unreachable ();
23613
23614 case ABI_AIX:
23615 if (DOT_SYMBOLS)
23616 putc ('.', file);
23617 else
23618 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
23619 break;
23620
23621 case ABI_ELFv2:
23622 case ABI_V4:
23623 case ABI_DARWIN:
23624 break;
23625 }
23626 }
23627
23628 RS6000_OUTPUT_BASENAME (file, fname);
23629 }
23630
23631 /* Print an operand. Recognize special options, documented below. */
23632
23633 #if TARGET_ELF
23634 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
23635 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
23636 #else
23637 #define SMALL_DATA_RELOC "sda21"
23638 #define SMALL_DATA_REG 0
23639 #endif
23640
23641 void
23642 print_operand (FILE *file, rtx x, int code)
23643 {
23644 int i;
23645 unsigned HOST_WIDE_INT uval;
23646
23647 switch (code)
23648 {
23649 /* %a is output_address. */
23650
23651 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
23652 output_operand. */
23653
23654 case 'D':
23655 /* Like 'J' but get to the GT bit only. */
23656 gcc_assert (REG_P (x));
23657
23658 /* Bit 1 is GT bit. */
23659 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
23660
23661 /* Add one for shift count in rlinm for scc. */
23662 fprintf (file, "%d", i + 1);
23663 return;
23664
23665 case 'e':
23666 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
23667 if (! INT_P (x))
23668 {
23669 output_operand_lossage ("invalid %%e value");
23670 return;
23671 }
23672
23673 uval = INTVAL (x);
23674 if ((uval & 0xffff) == 0 && uval != 0)
23675 putc ('s', file);
23676 return;
23677
23678 case 'E':
23679 /* X is a CR register. Print the number of the EQ bit of the CR */
23680 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23681 output_operand_lossage ("invalid %%E value");
23682 else
23683 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
23684 return;
23685
23686 case 'f':
23687 /* X is a CR register. Print the shift count needed to move it
23688 to the high-order four bits. */
23689 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23690 output_operand_lossage ("invalid %%f value");
23691 else
23692 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
23693 return;
23694
23695 case 'F':
23696 /* Similar, but print the count for the rotate in the opposite
23697 direction. */
23698 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23699 output_operand_lossage ("invalid %%F value");
23700 else
23701 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
23702 return;
23703
23704 case 'G':
23705 /* X is a constant integer. If it is negative, print "m",
23706 otherwise print "z". This is to make an aze or ame insn. */
23707 if (GET_CODE (x) != CONST_INT)
23708 output_operand_lossage ("invalid %%G value");
23709 else if (INTVAL (x) >= 0)
23710 putc ('z', file);
23711 else
23712 putc ('m', file);
23713 return;
23714
23715 case 'h':
23716 /* If constant, output low-order five bits. Otherwise, write
23717 normally. */
23718 if (INT_P (x))
23719 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
23720 else
23721 print_operand (file, x, 0);
23722 return;
23723
23724 case 'H':
23725 /* If constant, output low-order six bits. Otherwise, write
23726 normally. */
23727 if (INT_P (x))
23728 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
23729 else
23730 print_operand (file, x, 0);
23731 return;
23732
23733 case 'I':
23734 /* Print `i' if this is a constant, else nothing. */
23735 if (INT_P (x))
23736 putc ('i', file);
23737 return;
23738
23739 case 'j':
23740 /* Write the bit number in CCR for jump. */
23741 i = ccr_bit (x, 0);
23742 if (i == -1)
23743 output_operand_lossage ("invalid %%j code");
23744 else
23745 fprintf (file, "%d", i);
23746 return;
23747
23748 case 'J':
23749 /* Similar, but add one for shift count in rlinm for scc and pass
23750 scc flag to `ccr_bit'. */
23751 i = ccr_bit (x, 1);
23752 if (i == -1)
23753 output_operand_lossage ("invalid %%J code");
23754 else
23755 /* If we want bit 31, write a shift count of zero, not 32. */
23756 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23757 return;
23758
23759 case 'k':
23760 /* X must be a constant. Write the 1's complement of the
23761 constant. */
23762 if (! INT_P (x))
23763 output_operand_lossage ("invalid %%k value");
23764 else
23765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
23766 return;
23767
23768 case 'K':
23769 /* X must be a symbolic constant on ELF. Write an
23770 expression suitable for an 'addi' that adds in the low 16
23771 bits of the MEM. */
23772 if (GET_CODE (x) == CONST)
23773 {
23774 if (GET_CODE (XEXP (x, 0)) != PLUS
23775 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
23776 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
23777 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
23778 output_operand_lossage ("invalid %%K value");
23779 }
23780 print_operand_address (file, x);
23781 fputs ("@l", file);
23782 return;
23783
23784 /* %l is output_asm_label. */
23785
23786 case 'L':
23787 /* Write second word of DImode or DFmode reference. Works on register
23788 or non-indexed memory only. */
23789 if (REG_P (x))
23790 fputs (reg_names[REGNO (x) + 1], file);
23791 else if (MEM_P (x))
23792 {
23793 machine_mode mode = GET_MODE (x);
23794 /* Handle possible auto-increment. Since it is pre-increment and
23795 we have already done it, we can just use an offset of word. */
23796 if (GET_CODE (XEXP (x, 0)) == PRE_INC
23797 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
23798 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23799 UNITS_PER_WORD));
23800 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
23801 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
23802 UNITS_PER_WORD));
23803 else
23804 output_address (mode, XEXP (adjust_address_nv (x, SImode,
23805 UNITS_PER_WORD),
23806 0));
23807
23808 if (small_data_operand (x, GET_MODE (x)))
23809 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
23810 reg_names[SMALL_DATA_REG]);
23811 }
23812 return;
23813
23814 case 'N':
23815 /* Write the number of elements in the vector times 4. */
23816 if (GET_CODE (x) != PARALLEL)
23817 output_operand_lossage ("invalid %%N value");
23818 else
23819 fprintf (file, "%d", XVECLEN (x, 0) * 4);
23820 return;
23821
23822 case 'O':
23823 /* Similar, but subtract 1 first. */
23824 if (GET_CODE (x) != PARALLEL)
23825 output_operand_lossage ("invalid %%O value");
23826 else
23827 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
23828 return;
23829
23830 case 'p':
23831 /* X is a CONST_INT that is a power of two. Output the logarithm. */
23832 if (! INT_P (x)
23833 || INTVAL (x) < 0
23834 || (i = exact_log2 (INTVAL (x))) < 0)
23835 output_operand_lossage ("invalid %%p value");
23836 else
23837 fprintf (file, "%d", i);
23838 return;
23839
23840 case 'P':
23841 /* The operand must be an indirect memory reference. The result
23842 is the register name. */
23843 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
23844 || REGNO (XEXP (x, 0)) >= 32)
23845 output_operand_lossage ("invalid %%P value");
23846 else
23847 fputs (reg_names[REGNO (XEXP (x, 0))], file);
23848 return;
23849
23850 case 'q':
23851 /* This outputs the logical code corresponding to a boolean
23852 expression. The expression may have one or both operands
23853 negated (if one, only the first one). For condition register
23854 logical operations, it will also treat the negated
23855 CR codes as NOTs, but not handle NOTs of them. */
23856 {
23857 const char *const *t = 0;
23858 const char *s;
23859 enum rtx_code code = GET_CODE (x);
23860 static const char * const tbl[3][3] = {
23861 { "and", "andc", "nor" },
23862 { "or", "orc", "nand" },
23863 { "xor", "eqv", "xor" } };
23864
23865 if (code == AND)
23866 t = tbl[0];
23867 else if (code == IOR)
23868 t = tbl[1];
23869 else if (code == XOR)
23870 t = tbl[2];
23871 else
23872 output_operand_lossage ("invalid %%q value");
23873
23874 if (GET_CODE (XEXP (x, 0)) != NOT)
23875 s = t[0];
23876 else
23877 {
23878 if (GET_CODE (XEXP (x, 1)) == NOT)
23879 s = t[2];
23880 else
23881 s = t[1];
23882 }
23883
23884 fputs (s, file);
23885 }
23886 return;
23887
23888 case 'Q':
23889 if (! TARGET_MFCRF)
23890 return;
23891 fputc (',', file);
23892 /* FALLTHRU */
23893
23894 case 'R':
23895 /* X is a CR register. Print the mask for `mtcrf'. */
23896 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
23897 output_operand_lossage ("invalid %%R value");
23898 else
23899 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
23900 return;
23901
23902 case 's':
23903 /* Low 5 bits of 32 - value */
23904 if (! INT_P (x))
23905 output_operand_lossage ("invalid %%s value");
23906 else
23907 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
23908 return;
23909
23910 case 't':
23911 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
23912 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
23913
23914 /* Bit 3 is OV bit. */
23915 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
23916
23917 /* If we want bit 31, write a shift count of zero, not 32. */
23918 fprintf (file, "%d", i == 31 ? 0 : i + 1);
23919 return;
23920
23921 case 'T':
23922 /* Print the symbolic name of a branch target register. */
23923 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
23924 && REGNO (x) != CTR_REGNO))
23925 output_operand_lossage ("invalid %%T value");
23926 else if (REGNO (x) == LR_REGNO)
23927 fputs ("lr", file);
23928 else
23929 fputs ("ctr", file);
23930 return;
23931
23932 case 'u':
23933 /* High-order or low-order 16 bits of constant, whichever is non-zero,
23934 for use in unsigned operand. */
23935 if (! INT_P (x))
23936 {
23937 output_operand_lossage ("invalid %%u value");
23938 return;
23939 }
23940
23941 uval = INTVAL (x);
23942 if ((uval & 0xffff) == 0)
23943 uval >>= 16;
23944
23945 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
23946 return;
23947
23948 case 'v':
23949 /* High-order 16 bits of constant for use in signed operand. */
23950 if (! INT_P (x))
23951 output_operand_lossage ("invalid %%v value");
23952 else
23953 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
23954 (INTVAL (x) >> 16) & 0xffff);
23955 return;
23956
23957 case 'U':
23958 /* Print `u' if this has an auto-increment or auto-decrement. */
23959 if (MEM_P (x)
23960 && (GET_CODE (XEXP (x, 0)) == PRE_INC
23961 || GET_CODE (XEXP (x, 0)) == PRE_DEC
23962 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
23963 putc ('u', file);
23964 return;
23965
23966 case 'V':
23967 /* Print the trap code for this operand. */
23968 switch (GET_CODE (x))
23969 {
23970 case EQ:
23971 fputs ("eq", file); /* 4 */
23972 break;
23973 case NE:
23974 fputs ("ne", file); /* 24 */
23975 break;
23976 case LT:
23977 fputs ("lt", file); /* 16 */
23978 break;
23979 case LE:
23980 fputs ("le", file); /* 20 */
23981 break;
23982 case GT:
23983 fputs ("gt", file); /* 8 */
23984 break;
23985 case GE:
23986 fputs ("ge", file); /* 12 */
23987 break;
23988 case LTU:
23989 fputs ("llt", file); /* 2 */
23990 break;
23991 case LEU:
23992 fputs ("lle", file); /* 6 */
23993 break;
23994 case GTU:
23995 fputs ("lgt", file); /* 1 */
23996 break;
23997 case GEU:
23998 fputs ("lge", file); /* 5 */
23999 break;
24000 default:
24001 gcc_unreachable ();
24002 }
24003 break;
24004
24005 case 'w':
24006 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
24007 normally. */
24008 if (INT_P (x))
24009 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
24010 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
24011 else
24012 print_operand (file, x, 0);
24013 return;
24014
24015 case 'x':
24016 /* X is a FPR or Altivec register used in a VSX context. */
24017 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
24018 output_operand_lossage ("invalid %%x value");
24019 else
24020 {
24021 int reg = REGNO (x);
24022 int vsx_reg = (FP_REGNO_P (reg)
24023 ? reg - 32
24024 : reg - FIRST_ALTIVEC_REGNO + 32);
24025
24026 #ifdef TARGET_REGNAMES
24027 if (TARGET_REGNAMES)
24028 fprintf (file, "%%vs%d", vsx_reg);
24029 else
24030 #endif
24031 fprintf (file, "%d", vsx_reg);
24032 }
24033 return;
24034
24035 case 'X':
24036 if (MEM_P (x)
24037 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
24038 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
24039 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
24040 putc ('x', file);
24041 return;
24042
24043 case 'Y':
24044 /* Like 'L', for third word of TImode/PTImode */
24045 if (REG_P (x))
24046 fputs (reg_names[REGNO (x) + 2], file);
24047 else if (MEM_P (x))
24048 {
24049 machine_mode mode = GET_MODE (x);
24050 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24051 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24052 output_address (mode, plus_constant (Pmode,
24053 XEXP (XEXP (x, 0), 0), 8));
24054 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24055 output_address (mode, plus_constant (Pmode,
24056 XEXP (XEXP (x, 0), 0), 8));
24057 else
24058 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
24059 if (small_data_operand (x, GET_MODE (x)))
24060 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24061 reg_names[SMALL_DATA_REG]);
24062 }
24063 return;
24064
24065 case 'z':
24066 /* X is a SYMBOL_REF. Write out the name preceded by a
24067 period and without any trailing data in brackets. Used for function
24068 names. If we are configured for System V (or the embedded ABI) on
24069 the PowerPC, do not emit the period, since those systems do not use
24070 TOCs and the like. */
24071 gcc_assert (GET_CODE (x) == SYMBOL_REF);
24072
24073 /* For macho, check to see if we need a stub. */
24074 if (TARGET_MACHO)
24075 {
24076 const char *name = XSTR (x, 0);
24077 #if TARGET_MACHO
24078 if (darwin_emit_branch_islands
24079 && MACHOPIC_INDIRECT
24080 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
24081 name = machopic_indirection_name (x, /*stub_p=*/true);
24082 #endif
24083 assemble_name (file, name);
24084 }
24085 else if (!DOT_SYMBOLS)
24086 assemble_name (file, XSTR (x, 0));
24087 else
24088 rs6000_output_function_entry (file, XSTR (x, 0));
24089 return;
24090
24091 case 'Z':
24092 /* Like 'L', for last word of TImode/PTImode. */
24093 if (REG_P (x))
24094 fputs (reg_names[REGNO (x) + 3], file);
24095 else if (MEM_P (x))
24096 {
24097 machine_mode mode = GET_MODE (x);
24098 if (GET_CODE (XEXP (x, 0)) == PRE_INC
24099 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
24100 output_address (mode, plus_constant (Pmode,
24101 XEXP (XEXP (x, 0), 0), 12));
24102 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24103 output_address (mode, plus_constant (Pmode,
24104 XEXP (XEXP (x, 0), 0), 12));
24105 else
24106 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
24107 if (small_data_operand (x, GET_MODE (x)))
24108 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24109 reg_names[SMALL_DATA_REG]);
24110 }
24111 return;
24112
24113 /* Print AltiVec or SPE memory operand. */
24114 case 'y':
24115 {
24116 rtx tmp;
24117
24118 gcc_assert (MEM_P (x));
24119
24120 tmp = XEXP (x, 0);
24121
24122 /* Ugly hack because %y is overloaded. */
24123 if ((TARGET_SPE || TARGET_E500_DOUBLE)
24124 && (GET_MODE_SIZE (GET_MODE (x)) == 8
24125 || FLOAT128_2REG_P (GET_MODE (x))
24126 || GET_MODE (x) == TImode
24127 || GET_MODE (x) == PTImode))
24128 {
24129 /* Handle [reg]. */
24130 if (REG_P (tmp))
24131 {
24132 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
24133 break;
24134 }
24135 /* Handle [reg+UIMM]. */
24136 else if (GET_CODE (tmp) == PLUS &&
24137 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
24138 {
24139 int x;
24140
24141 gcc_assert (REG_P (XEXP (tmp, 0)));
24142
24143 x = INTVAL (XEXP (tmp, 1));
24144 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
24145 break;
24146 }
24147
24148 /* Fall through. Must be [reg+reg]. */
24149 }
24150 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
24151 && GET_CODE (tmp) == AND
24152 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
24153 && INTVAL (XEXP (tmp, 1)) == -16)
24154 tmp = XEXP (tmp, 0);
24155 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
24156 && GET_CODE (tmp) == PRE_MODIFY)
24157 tmp = XEXP (tmp, 1);
24158 if (REG_P (tmp))
24159 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
24160 else
24161 {
24162 if (GET_CODE (tmp) != PLUS
24163 || !REG_P (XEXP (tmp, 0))
24164 || !REG_P (XEXP (tmp, 1)))
24165 {
24166 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
24167 break;
24168 }
24169
24170 if (REGNO (XEXP (tmp, 0)) == 0)
24171 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
24172 reg_names[ REGNO (XEXP (tmp, 0)) ]);
24173 else
24174 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
24175 reg_names[ REGNO (XEXP (tmp, 1)) ]);
24176 }
24177 break;
24178 }
24179
24180 case 0:
24181 if (REG_P (x))
24182 fprintf (file, "%s", reg_names[REGNO (x)]);
24183 else if (MEM_P (x))
24184 {
24185 /* We need to handle PRE_INC and PRE_DEC here, since we need to
24186 know the width from the mode. */
24187 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
24188 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
24189 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24190 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
24191 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
24192 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
24193 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
24194 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
24195 else
24196 output_address (GET_MODE (x), XEXP (x, 0));
24197 }
24198 else
24199 {
24200 if (toc_relative_expr_p (x, false))
24201 /* This hack along with a corresponding hack in
24202 rs6000_output_addr_const_extra arranges to output addends
24203 where the assembler expects to find them. eg.
24204 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
24205 without this hack would be output as "x@toc+4". We
24206 want "x+4@toc". */
24207 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24208 else
24209 output_addr_const (file, x);
24210 }
24211 return;
24212
24213 case '&':
24214 if (const char *name = get_some_local_dynamic_name ())
24215 assemble_name (file, name);
24216 else
24217 output_operand_lossage ("'%%&' used without any "
24218 "local dynamic TLS references");
24219 return;
24220
24221 default:
24222 output_operand_lossage ("invalid %%xn code");
24223 }
24224 }
24225 \f
24226 /* Print the address of an operand. */
24227
24228 void
24229 print_operand_address (FILE *file, rtx x)
24230 {
24231 if (REG_P (x))
24232 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
24233 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
24234 || GET_CODE (x) == LABEL_REF)
24235 {
24236 output_addr_const (file, x);
24237 if (small_data_operand (x, GET_MODE (x)))
24238 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
24239 reg_names[SMALL_DATA_REG]);
24240 else
24241 gcc_assert (!TARGET_TOC);
24242 }
24243 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24244 && REG_P (XEXP (x, 1)))
24245 {
24246 if (REGNO (XEXP (x, 0)) == 0)
24247 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
24248 reg_names[ REGNO (XEXP (x, 0)) ]);
24249 else
24250 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
24251 reg_names[ REGNO (XEXP (x, 1)) ]);
24252 }
24253 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
24254 && GET_CODE (XEXP (x, 1)) == CONST_INT)
24255 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
24256 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
24257 #if TARGET_MACHO
24258 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24259 && CONSTANT_P (XEXP (x, 1)))
24260 {
24261 fprintf (file, "lo16(");
24262 output_addr_const (file, XEXP (x, 1));
24263 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24264 }
24265 #endif
24266 #if TARGET_ELF
24267 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
24268 && CONSTANT_P (XEXP (x, 1)))
24269 {
24270 output_addr_const (file, XEXP (x, 1));
24271 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
24272 }
24273 #endif
24274 else if (toc_relative_expr_p (x, false))
24275 {
24276 /* This hack along with a corresponding hack in
24277 rs6000_output_addr_const_extra arranges to output addends
24278 where the assembler expects to find them. eg.
24279 (lo_sum (reg 9)
24280 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
24281 without this hack would be output as "x@toc+8@l(9)". We
24282 want "x+8@toc@l(9)". */
24283 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
24284 if (GET_CODE (x) == LO_SUM)
24285 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
24286 else
24287 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
24288 }
24289 else
24290 gcc_unreachable ();
24291 }
24292 \f
24293 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
24294
24295 static bool
24296 rs6000_output_addr_const_extra (FILE *file, rtx x)
24297 {
24298 if (GET_CODE (x) == UNSPEC)
24299 switch (XINT (x, 1))
24300 {
24301 case UNSPEC_TOCREL:
24302 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
24303 && REG_P (XVECEXP (x, 0, 1))
24304 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
24305 output_addr_const (file, XVECEXP (x, 0, 0));
24306 if (x == tocrel_base && tocrel_offset != const0_rtx)
24307 {
24308 if (INTVAL (tocrel_offset) >= 0)
24309 fprintf (file, "+");
24310 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
24311 }
24312 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
24313 {
24314 putc ('-', file);
24315 assemble_name (file, toc_label_name);
24316 need_toc_init = 1;
24317 }
24318 else if (TARGET_ELF)
24319 fputs ("@toc", file);
24320 return true;
24321
24322 #if TARGET_MACHO
24323 case UNSPEC_MACHOPIC_OFFSET:
24324 output_addr_const (file, XVECEXP (x, 0, 0));
24325 putc ('-', file);
24326 machopic_output_function_base_name (file);
24327 return true;
24328 #endif
24329 }
24330 return false;
24331 }
24332 \f
24333 /* Target hook for assembling integer objects. The PowerPC version has
24334 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
24335 is defined. It also needs to handle DI-mode objects on 64-bit
24336 targets. */
24337
24338 static bool
24339 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
24340 {
24341 #ifdef RELOCATABLE_NEEDS_FIXUP
24342 /* Special handling for SI values. */
24343 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
24344 {
24345 static int recurse = 0;
24346
24347 /* For -mrelocatable, we mark all addresses that need to be fixed up in
24348 the .fixup section. Since the TOC section is already relocated, we
24349 don't need to mark it here. We used to skip the text section, but it
24350 should never be valid for relocated addresses to be placed in the text
24351 section. */
24352 if (DEFAULT_ABI == ABI_V4
24353 && (TARGET_RELOCATABLE || flag_pic > 1)
24354 && in_section != toc_section
24355 && !recurse
24356 && !CONST_SCALAR_INT_P (x)
24357 && CONSTANT_P (x))
24358 {
24359 char buf[256];
24360
24361 recurse = 1;
24362 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
24363 fixuplabelno++;
24364 ASM_OUTPUT_LABEL (asm_out_file, buf);
24365 fprintf (asm_out_file, "\t.long\t(");
24366 output_addr_const (asm_out_file, x);
24367 fprintf (asm_out_file, ")@fixup\n");
24368 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
24369 ASM_OUTPUT_ALIGN (asm_out_file, 2);
24370 fprintf (asm_out_file, "\t.long\t");
24371 assemble_name (asm_out_file, buf);
24372 fprintf (asm_out_file, "\n\t.previous\n");
24373 recurse = 0;
24374 return true;
24375 }
24376 /* Remove initial .'s to turn a -mcall-aixdesc function
24377 address into the address of the descriptor, not the function
24378 itself. */
24379 else if (GET_CODE (x) == SYMBOL_REF
24380 && XSTR (x, 0)[0] == '.'
24381 && DEFAULT_ABI == ABI_AIX)
24382 {
24383 const char *name = XSTR (x, 0);
24384 while (*name == '.')
24385 name++;
24386
24387 fprintf (asm_out_file, "\t.long\t%s\n", name);
24388 return true;
24389 }
24390 }
24391 #endif /* RELOCATABLE_NEEDS_FIXUP */
24392 return default_assemble_integer (x, size, aligned_p);
24393 }
24394
24395 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
24396 /* Emit an assembler directive to set symbol visibility for DECL to
24397 VISIBILITY_TYPE. */
24398
24399 static void
24400 rs6000_assemble_visibility (tree decl, int vis)
24401 {
24402 if (TARGET_XCOFF)
24403 return;
24404
24405 /* Functions need to have their entry point symbol visibility set as
24406 well as their descriptor symbol visibility. */
24407 if (DEFAULT_ABI == ABI_AIX
24408 && DOT_SYMBOLS
24409 && TREE_CODE (decl) == FUNCTION_DECL)
24410 {
24411 static const char * const visibility_types[] = {
24412 NULL, "protected", "hidden", "internal"
24413 };
24414
24415 const char *name, *type;
24416
24417 name = ((* targetm.strip_name_encoding)
24418 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
24419 type = visibility_types[vis];
24420
24421 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
24422 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
24423 }
24424 else
24425 default_assemble_visibility (decl, vis);
24426 }
24427 #endif
24428 \f
24429 enum rtx_code
24430 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
24431 {
24432 /* Reversal of FP compares takes care -- an ordered compare
24433 becomes an unordered compare and vice versa. */
24434 if (mode == CCFPmode
24435 && (!flag_finite_math_only
24436 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
24437 || code == UNEQ || code == LTGT))
24438 return reverse_condition_maybe_unordered (code);
24439 else
24440 return reverse_condition (code);
24441 }
24442
24443 /* Generate a compare for CODE. Return a brand-new rtx that
24444 represents the result of the compare. */
24445
24446 static rtx
24447 rs6000_generate_compare (rtx cmp, machine_mode mode)
24448 {
24449 machine_mode comp_mode;
24450 rtx compare_result;
24451 enum rtx_code code = GET_CODE (cmp);
24452 rtx op0 = XEXP (cmp, 0);
24453 rtx op1 = XEXP (cmp, 1);
24454
24455 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24456 comp_mode = CCmode;
24457 else if (FLOAT_MODE_P (mode))
24458 comp_mode = CCFPmode;
24459 else if (code == GTU || code == LTU
24460 || code == GEU || code == LEU)
24461 comp_mode = CCUNSmode;
24462 else if ((code == EQ || code == NE)
24463 && unsigned_reg_p (op0)
24464 && (unsigned_reg_p (op1)
24465 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
24466 /* These are unsigned values, perhaps there will be a later
24467 ordering compare that can be shared with this one. */
24468 comp_mode = CCUNSmode;
24469 else
24470 comp_mode = CCmode;
24471
24472 /* If we have an unsigned compare, make sure we don't have a signed value as
24473 an immediate. */
24474 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
24475 && INTVAL (op1) < 0)
24476 {
24477 op0 = copy_rtx_if_shared (op0);
24478 op1 = force_reg (GET_MODE (op0), op1);
24479 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
24480 }
24481
24482 /* First, the compare. */
24483 compare_result = gen_reg_rtx (comp_mode);
24484
24485 /* E500 FP compare instructions on the GPRs. Yuck! */
24486 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
24487 && FLOAT_MODE_P (mode))
24488 {
24489 rtx cmp, or_result, compare_result2;
24490 machine_mode op_mode = GET_MODE (op0);
24491 bool reverse_p;
24492
24493 if (op_mode == VOIDmode)
24494 op_mode = GET_MODE (op1);
24495
24496 /* First reverse the condition codes that aren't directly supported. */
24497 switch (code)
24498 {
24499 case NE:
24500 case UNLT:
24501 case UNLE:
24502 case UNGT:
24503 case UNGE:
24504 code = reverse_condition_maybe_unordered (code);
24505 reverse_p = true;
24506 break;
24507
24508 case EQ:
24509 case LT:
24510 case LE:
24511 case GT:
24512 case GE:
24513 reverse_p = false;
24514 break;
24515
24516 default:
24517 gcc_unreachable ();
24518 }
24519
24520 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
24521 This explains the following mess. */
24522
24523 switch (code)
24524 {
24525 case EQ:
24526 switch (op_mode)
24527 {
24528 case E_SFmode:
24529 cmp = (flag_finite_math_only && !flag_trapping_math)
24530 ? gen_tstsfeq_gpr (compare_result, op0, op1)
24531 : gen_cmpsfeq_gpr (compare_result, op0, op1);
24532 break;
24533
24534 case E_DFmode:
24535 cmp = (flag_finite_math_only && !flag_trapping_math)
24536 ? gen_tstdfeq_gpr (compare_result, op0, op1)
24537 : gen_cmpdfeq_gpr (compare_result, op0, op1);
24538 break;
24539
24540 case E_TFmode:
24541 case E_IFmode:
24542 case E_KFmode:
24543 cmp = (flag_finite_math_only && !flag_trapping_math)
24544 ? gen_tsttfeq_gpr (compare_result, op0, op1)
24545 : gen_cmptfeq_gpr (compare_result, op0, op1);
24546 break;
24547
24548 default:
24549 gcc_unreachable ();
24550 }
24551 break;
24552
24553 case GT:
24554 case GE:
24555 switch (op_mode)
24556 {
24557 case E_SFmode:
24558 cmp = (flag_finite_math_only && !flag_trapping_math)
24559 ? gen_tstsfgt_gpr (compare_result, op0, op1)
24560 : gen_cmpsfgt_gpr (compare_result, op0, op1);
24561 break;
24562
24563 case E_DFmode:
24564 cmp = (flag_finite_math_only && !flag_trapping_math)
24565 ? gen_tstdfgt_gpr (compare_result, op0, op1)
24566 : gen_cmpdfgt_gpr (compare_result, op0, op1);
24567 break;
24568
24569 case E_TFmode:
24570 case E_IFmode:
24571 case E_KFmode:
24572 cmp = (flag_finite_math_only && !flag_trapping_math)
24573 ? gen_tsttfgt_gpr (compare_result, op0, op1)
24574 : gen_cmptfgt_gpr (compare_result, op0, op1);
24575 break;
24576
24577 default:
24578 gcc_unreachable ();
24579 }
24580 break;
24581
24582 case LT:
24583 case LE:
24584 switch (op_mode)
24585 {
24586 case E_SFmode:
24587 cmp = (flag_finite_math_only && !flag_trapping_math)
24588 ? gen_tstsflt_gpr (compare_result, op0, op1)
24589 : gen_cmpsflt_gpr (compare_result, op0, op1);
24590 break;
24591
24592 case E_DFmode:
24593 cmp = (flag_finite_math_only && !flag_trapping_math)
24594 ? gen_tstdflt_gpr (compare_result, op0, op1)
24595 : gen_cmpdflt_gpr (compare_result, op0, op1);
24596 break;
24597
24598 case E_TFmode:
24599 case E_IFmode:
24600 case E_KFmode:
24601 cmp = (flag_finite_math_only && !flag_trapping_math)
24602 ? gen_tsttflt_gpr (compare_result, op0, op1)
24603 : gen_cmptflt_gpr (compare_result, op0, op1);
24604 break;
24605
24606 default:
24607 gcc_unreachable ();
24608 }
24609 break;
24610
24611 default:
24612 gcc_unreachable ();
24613 }
24614
24615 /* Synthesize LE and GE from LT/GT || EQ. */
24616 if (code == LE || code == GE)
24617 {
24618 emit_insn (cmp);
24619
24620 compare_result2 = gen_reg_rtx (CCFPmode);
24621
24622 /* Do the EQ. */
24623 switch (op_mode)
24624 {
24625 case E_SFmode:
24626 cmp = (flag_finite_math_only && !flag_trapping_math)
24627 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
24628 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
24629 break;
24630
24631 case E_DFmode:
24632 cmp = (flag_finite_math_only && !flag_trapping_math)
24633 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
24634 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
24635 break;
24636
24637 case E_TFmode:
24638 case E_IFmode:
24639 case E_KFmode:
24640 cmp = (flag_finite_math_only && !flag_trapping_math)
24641 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
24642 : gen_cmptfeq_gpr (compare_result2, op0, op1);
24643 break;
24644
24645 default:
24646 gcc_unreachable ();
24647 }
24648
24649 emit_insn (cmp);
24650
24651 /* OR them together. */
24652 or_result = gen_reg_rtx (CCFPmode);
24653 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
24654 compare_result2);
24655 compare_result = or_result;
24656 }
24657
24658 code = reverse_p ? NE : EQ;
24659
24660 emit_insn (cmp);
24661 }
24662
24663 /* IEEE 128-bit support in VSX registers when we do not have hardware
24664 support. */
24665 else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
24666 {
24667 rtx libfunc = NULL_RTX;
24668 bool check_nan = false;
24669 rtx dest;
24670
24671 switch (code)
24672 {
24673 case EQ:
24674 case NE:
24675 libfunc = optab_libfunc (eq_optab, mode);
24676 break;
24677
24678 case GT:
24679 case GE:
24680 libfunc = optab_libfunc (ge_optab, mode);
24681 break;
24682
24683 case LT:
24684 case LE:
24685 libfunc = optab_libfunc (le_optab, mode);
24686 break;
24687
24688 case UNORDERED:
24689 case ORDERED:
24690 libfunc = optab_libfunc (unord_optab, mode);
24691 code = (code == UNORDERED) ? NE : EQ;
24692 break;
24693
24694 case UNGE:
24695 case UNGT:
24696 check_nan = true;
24697 libfunc = optab_libfunc (ge_optab, mode);
24698 code = (code == UNGE) ? GE : GT;
24699 break;
24700
24701 case UNLE:
24702 case UNLT:
24703 check_nan = true;
24704 libfunc = optab_libfunc (le_optab, mode);
24705 code = (code == UNLE) ? LE : LT;
24706 break;
24707
24708 case UNEQ:
24709 case LTGT:
24710 check_nan = true;
24711 libfunc = optab_libfunc (eq_optab, mode);
24712 code = (code = UNEQ) ? EQ : NE;
24713 break;
24714
24715 default:
24716 gcc_unreachable ();
24717 }
24718
24719 gcc_assert (libfunc);
24720
24721 if (!check_nan)
24722 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24723 SImode, op0, mode, op1, mode);
24724
24725 /* The library signals an exception for signalling NaNs, so we need to
24726 handle isgreater, etc. by first checking isordered. */
24727 else
24728 {
24729 rtx ne_rtx, normal_dest, unord_dest;
24730 rtx unord_func = optab_libfunc (unord_optab, mode);
24731 rtx join_label = gen_label_rtx ();
24732 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
24733 rtx unord_cmp = gen_reg_rtx (comp_mode);
24734
24735
24736 /* Test for either value being a NaN. */
24737 gcc_assert (unord_func);
24738 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
24739 SImode, op0, mode, op1, mode);
24740
24741 /* Set value (0) if either value is a NaN, and jump to the join
24742 label. */
24743 dest = gen_reg_rtx (SImode);
24744 emit_move_insn (dest, const1_rtx);
24745 emit_insn (gen_rtx_SET (unord_cmp,
24746 gen_rtx_COMPARE (comp_mode, unord_dest,
24747 const0_rtx)));
24748
24749 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
24750 emit_jump_insn (gen_rtx_SET (pc_rtx,
24751 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
24752 join_ref,
24753 pc_rtx)));
24754
24755 /* Do the normal comparison, knowing that the values are not
24756 NaNs. */
24757 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
24758 SImode, op0, mode, op1, mode);
24759
24760 emit_insn (gen_cstoresi4 (dest,
24761 gen_rtx_fmt_ee (code, SImode, normal_dest,
24762 const0_rtx),
24763 normal_dest, const0_rtx));
24764
24765 /* Join NaN and non-Nan paths. Compare dest against 0. */
24766 emit_label (join_label);
24767 code = NE;
24768 }
24769
24770 emit_insn (gen_rtx_SET (compare_result,
24771 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
24772 }
24773
24774 else
24775 {
24776 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
24777 CLOBBERs to match cmptf_internal2 pattern. */
24778 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
24779 && FLOAT128_IBM_P (GET_MODE (op0))
24780 && TARGET_HARD_FLOAT && TARGET_FPRS)
24781 emit_insn (gen_rtx_PARALLEL (VOIDmode,
24782 gen_rtvec (10,
24783 gen_rtx_SET (compare_result,
24784 gen_rtx_COMPARE (comp_mode, op0, op1)),
24785 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24786 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24787 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24788 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24789 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24790 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24791 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24792 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
24793 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
24794 else if (GET_CODE (op1) == UNSPEC
24795 && XINT (op1, 1) == UNSPEC_SP_TEST)
24796 {
24797 rtx op1b = XVECEXP (op1, 0, 0);
24798 comp_mode = CCEQmode;
24799 compare_result = gen_reg_rtx (CCEQmode);
24800 if (TARGET_64BIT)
24801 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
24802 else
24803 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
24804 }
24805 else
24806 emit_insn (gen_rtx_SET (compare_result,
24807 gen_rtx_COMPARE (comp_mode, op0, op1)));
24808 }
24809
24810 /* Some kinds of FP comparisons need an OR operation;
24811 under flag_finite_math_only we don't bother. */
24812 if (FLOAT_MODE_P (mode)
24813 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
24814 && !flag_finite_math_only
24815 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
24816 && (code == LE || code == GE
24817 || code == UNEQ || code == LTGT
24818 || code == UNGT || code == UNLT))
24819 {
24820 enum rtx_code or1, or2;
24821 rtx or1_rtx, or2_rtx, compare2_rtx;
24822 rtx or_result = gen_reg_rtx (CCEQmode);
24823
24824 switch (code)
24825 {
24826 case LE: or1 = LT; or2 = EQ; break;
24827 case GE: or1 = GT; or2 = EQ; break;
24828 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
24829 case LTGT: or1 = LT; or2 = GT; break;
24830 case UNGT: or1 = UNORDERED; or2 = GT; break;
24831 case UNLT: or1 = UNORDERED; or2 = LT; break;
24832 default: gcc_unreachable ();
24833 }
24834 validate_condition_mode (or1, comp_mode);
24835 validate_condition_mode (or2, comp_mode);
24836 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
24837 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
24838 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
24839 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
24840 const_true_rtx);
24841 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
24842
24843 compare_result = or_result;
24844 code = EQ;
24845 }
24846
24847 validate_condition_mode (code, GET_MODE (compare_result));
24848
24849 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
24850 }
24851
24852 \f
24853 /* Return the diagnostic message string if the binary operation OP is
24854 not permitted on TYPE1 and TYPE2, NULL otherwise. */
24855
24856 static const char*
24857 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
24858 const_tree type1,
24859 const_tree type2)
24860 {
24861 machine_mode mode1 = TYPE_MODE (type1);
24862 machine_mode mode2 = TYPE_MODE (type2);
24863
24864 /* For complex modes, use the inner type. */
24865 if (COMPLEX_MODE_P (mode1))
24866 mode1 = GET_MODE_INNER (mode1);
24867
24868 if (COMPLEX_MODE_P (mode2))
24869 mode2 = GET_MODE_INNER (mode2);
24870
24871 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
24872 double to intermix unless -mfloat128-convert. */
24873 if (mode1 == mode2)
24874 return NULL;
24875
24876 if (!TARGET_FLOAT128_CVT)
24877 {
24878 if ((mode1 == KFmode && mode2 == IFmode)
24879 || (mode1 == IFmode && mode2 == KFmode))
24880 return N_("__float128 and __ibm128 cannot be used in the same "
24881 "expression");
24882
24883 if (TARGET_IEEEQUAD
24884 && ((mode1 == IFmode && mode2 == TFmode)
24885 || (mode1 == TFmode && mode2 == IFmode)))
24886 return N_("__ibm128 and long double cannot be used in the same "
24887 "expression");
24888
24889 if (!TARGET_IEEEQUAD
24890 && ((mode1 == KFmode && mode2 == TFmode)
24891 || (mode1 == TFmode && mode2 == KFmode)))
24892 return N_("__float128 and long double cannot be used in the same "
24893 "expression");
24894 }
24895
24896 return NULL;
24897 }
24898
24899 \f
24900 /* Expand floating point conversion to/from __float128 and __ibm128. */
24901
24902 void
24903 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
24904 {
24905 machine_mode dest_mode = GET_MODE (dest);
24906 machine_mode src_mode = GET_MODE (src);
24907 convert_optab cvt = unknown_optab;
24908 bool do_move = false;
24909 rtx libfunc = NULL_RTX;
24910 rtx dest2;
24911 typedef rtx (*rtx_2func_t) (rtx, rtx);
24912 rtx_2func_t hw_convert = (rtx_2func_t)0;
24913 size_t kf_or_tf;
24914
24915 struct hw_conv_t {
24916 rtx_2func_t from_df;
24917 rtx_2func_t from_sf;
24918 rtx_2func_t from_si_sign;
24919 rtx_2func_t from_si_uns;
24920 rtx_2func_t from_di_sign;
24921 rtx_2func_t from_di_uns;
24922 rtx_2func_t to_df;
24923 rtx_2func_t to_sf;
24924 rtx_2func_t to_si_sign;
24925 rtx_2func_t to_si_uns;
24926 rtx_2func_t to_di_sign;
24927 rtx_2func_t to_di_uns;
24928 } hw_conversions[2] = {
24929 /* convertions to/from KFmode */
24930 {
24931 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
24932 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
24933 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
24934 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
24935 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
24936 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
24937 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
24938 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
24939 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
24940 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
24941 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
24942 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
24943 },
24944
24945 /* convertions to/from TFmode */
24946 {
24947 gen_extenddftf2_hw, /* TFmode <- DFmode. */
24948 gen_extendsftf2_hw, /* TFmode <- SFmode. */
24949 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
24950 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
24951 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
24952 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
24953 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
24954 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
24955 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
24956 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
24957 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
24958 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
24959 },
24960 };
24961
24962 if (dest_mode == src_mode)
24963 gcc_unreachable ();
24964
24965 /* Eliminate memory operations. */
24966 if (MEM_P (src))
24967 src = force_reg (src_mode, src);
24968
24969 if (MEM_P (dest))
24970 {
24971 rtx tmp = gen_reg_rtx (dest_mode);
24972 rs6000_expand_float128_convert (tmp, src, unsigned_p);
24973 rs6000_emit_move (dest, tmp, dest_mode);
24974 return;
24975 }
24976
24977 /* Convert to IEEE 128-bit floating point. */
24978 if (FLOAT128_IEEE_P (dest_mode))
24979 {
24980 if (dest_mode == KFmode)
24981 kf_or_tf = 0;
24982 else if (dest_mode == TFmode)
24983 kf_or_tf = 1;
24984 else
24985 gcc_unreachable ();
24986
24987 switch (src_mode)
24988 {
24989 case E_DFmode:
24990 cvt = sext_optab;
24991 hw_convert = hw_conversions[kf_or_tf].from_df;
24992 break;
24993
24994 case E_SFmode:
24995 cvt = sext_optab;
24996 hw_convert = hw_conversions[kf_or_tf].from_sf;
24997 break;
24998
24999 case E_KFmode:
25000 case E_IFmode:
25001 case E_TFmode:
25002 if (FLOAT128_IBM_P (src_mode))
25003 cvt = sext_optab;
25004 else
25005 do_move = true;
25006 break;
25007
25008 case E_SImode:
25009 if (unsigned_p)
25010 {
25011 cvt = ufloat_optab;
25012 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
25013 }
25014 else
25015 {
25016 cvt = sfloat_optab;
25017 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
25018 }
25019 break;
25020
25021 case E_DImode:
25022 if (unsigned_p)
25023 {
25024 cvt = ufloat_optab;
25025 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
25026 }
25027 else
25028 {
25029 cvt = sfloat_optab;
25030 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
25031 }
25032 break;
25033
25034 default:
25035 gcc_unreachable ();
25036 }
25037 }
25038
25039 /* Convert from IEEE 128-bit floating point. */
25040 else if (FLOAT128_IEEE_P (src_mode))
25041 {
25042 if (src_mode == KFmode)
25043 kf_or_tf = 0;
25044 else if (src_mode == TFmode)
25045 kf_or_tf = 1;
25046 else
25047 gcc_unreachable ();
25048
25049 switch (dest_mode)
25050 {
25051 case E_DFmode:
25052 cvt = trunc_optab;
25053 hw_convert = hw_conversions[kf_or_tf].to_df;
25054 break;
25055
25056 case E_SFmode:
25057 cvt = trunc_optab;
25058 hw_convert = hw_conversions[kf_or_tf].to_sf;
25059 break;
25060
25061 case E_KFmode:
25062 case E_IFmode:
25063 case E_TFmode:
25064 if (FLOAT128_IBM_P (dest_mode))
25065 cvt = trunc_optab;
25066 else
25067 do_move = true;
25068 break;
25069
25070 case E_SImode:
25071 if (unsigned_p)
25072 {
25073 cvt = ufix_optab;
25074 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
25075 }
25076 else
25077 {
25078 cvt = sfix_optab;
25079 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
25080 }
25081 break;
25082
25083 case E_DImode:
25084 if (unsigned_p)
25085 {
25086 cvt = ufix_optab;
25087 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
25088 }
25089 else
25090 {
25091 cvt = sfix_optab;
25092 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
25093 }
25094 break;
25095
25096 default:
25097 gcc_unreachable ();
25098 }
25099 }
25100
25101 /* Both IBM format. */
25102 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
25103 do_move = true;
25104
25105 else
25106 gcc_unreachable ();
25107
25108 /* Handle conversion between TFmode/KFmode. */
25109 if (do_move)
25110 emit_move_insn (dest, gen_lowpart (dest_mode, src));
25111
25112 /* Handle conversion if we have hardware support. */
25113 else if (TARGET_FLOAT128_HW && hw_convert)
25114 emit_insn ((hw_convert) (dest, src));
25115
25116 /* Call an external function to do the conversion. */
25117 else if (cvt != unknown_optab)
25118 {
25119 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
25120 gcc_assert (libfunc != NULL_RTX);
25121
25122 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
25123 src, src_mode);
25124
25125 gcc_assert (dest2 != NULL_RTX);
25126 if (!rtx_equal_p (dest, dest2))
25127 emit_move_insn (dest, dest2);
25128 }
25129
25130 else
25131 gcc_unreachable ();
25132
25133 return;
25134 }
25135
25136 \f
25137 /* Emit the RTL for an sISEL pattern. */
25138
25139 void
25140 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
25141 {
25142 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
25143 }
25144
25145 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
25146 can be used as that dest register. Return the dest register. */
25147
25148 rtx
25149 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
25150 {
25151 if (op2 == const0_rtx)
25152 return op1;
25153
25154 if (GET_CODE (scratch) == SCRATCH)
25155 scratch = gen_reg_rtx (mode);
25156
25157 if (logical_operand (op2, mode))
25158 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
25159 else
25160 emit_insn (gen_rtx_SET (scratch,
25161 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
25162
25163 return scratch;
25164 }
25165
25166 void
25167 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
25168 {
25169 rtx condition_rtx;
25170 machine_mode op_mode;
25171 enum rtx_code cond_code;
25172 rtx result = operands[0];
25173
25174 condition_rtx = rs6000_generate_compare (operands[1], mode);
25175 cond_code = GET_CODE (condition_rtx);
25176
25177 if (FLOAT_MODE_P (mode)
25178 && !TARGET_FPRS && TARGET_HARD_FLOAT)
25179 {
25180 rtx t;
25181
25182 PUT_MODE (condition_rtx, SImode);
25183 t = XEXP (condition_rtx, 0);
25184
25185 gcc_assert (cond_code == NE || cond_code == EQ);
25186
25187 if (cond_code == NE)
25188 emit_insn (gen_e500_flip_gt_bit (t, t));
25189
25190 emit_insn (gen_move_from_CR_gt_bit (result, t));
25191 return;
25192 }
25193
25194 if (cond_code == NE
25195 || cond_code == GE || cond_code == LE
25196 || cond_code == GEU || cond_code == LEU
25197 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
25198 {
25199 rtx not_result = gen_reg_rtx (CCEQmode);
25200 rtx not_op, rev_cond_rtx;
25201 machine_mode cc_mode;
25202
25203 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
25204
25205 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
25206 SImode, XEXP (condition_rtx, 0), const0_rtx);
25207 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
25208 emit_insn (gen_rtx_SET (not_result, not_op));
25209 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
25210 }
25211
25212 op_mode = GET_MODE (XEXP (operands[1], 0));
25213 if (op_mode == VOIDmode)
25214 op_mode = GET_MODE (XEXP (operands[1], 1));
25215
25216 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
25217 {
25218 PUT_MODE (condition_rtx, DImode);
25219 convert_move (result, condition_rtx, 0);
25220 }
25221 else
25222 {
25223 PUT_MODE (condition_rtx, SImode);
25224 emit_insn (gen_rtx_SET (result, condition_rtx));
25225 }
25226 }
25227
25228 /* Emit a branch of kind CODE to location LOC. */
25229
25230 void
25231 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
25232 {
25233 rtx condition_rtx, loc_ref;
25234
25235 condition_rtx = rs6000_generate_compare (operands[0], mode);
25236 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
25237 emit_jump_insn (gen_rtx_SET (pc_rtx,
25238 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
25239 loc_ref, pc_rtx)));
25240 }
25241
25242 /* Return the string to output a conditional branch to LABEL, which is
25243 the operand template of the label, or NULL if the branch is really a
25244 conditional return.
25245
25246 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
25247 condition code register and its mode specifies what kind of
25248 comparison we made.
25249
25250 REVERSED is nonzero if we should reverse the sense of the comparison.
25251
25252 INSN is the insn. */
25253
25254 char *
25255 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
25256 {
25257 static char string[64];
25258 enum rtx_code code = GET_CODE (op);
25259 rtx cc_reg = XEXP (op, 0);
25260 machine_mode mode = GET_MODE (cc_reg);
25261 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
25262 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
25263 int really_reversed = reversed ^ need_longbranch;
25264 char *s = string;
25265 const char *ccode;
25266 const char *pred;
25267 rtx note;
25268
25269 validate_condition_mode (code, mode);
25270
25271 /* Work out which way this really branches. We could use
25272 reverse_condition_maybe_unordered here always but this
25273 makes the resulting assembler clearer. */
25274 if (really_reversed)
25275 {
25276 /* Reversal of FP compares takes care -- an ordered compare
25277 becomes an unordered compare and vice versa. */
25278 if (mode == CCFPmode)
25279 code = reverse_condition_maybe_unordered (code);
25280 else
25281 code = reverse_condition (code);
25282 }
25283
25284 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
25285 {
25286 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
25287 to the GT bit. */
25288 switch (code)
25289 {
25290 case EQ:
25291 /* Opposite of GT. */
25292 code = GT;
25293 break;
25294
25295 case NE:
25296 code = UNLE;
25297 break;
25298
25299 default:
25300 gcc_unreachable ();
25301 }
25302 }
25303
25304 switch (code)
25305 {
25306 /* Not all of these are actually distinct opcodes, but
25307 we distinguish them for clarity of the resulting assembler. */
25308 case NE: case LTGT:
25309 ccode = "ne"; break;
25310 case EQ: case UNEQ:
25311 ccode = "eq"; break;
25312 case GE: case GEU:
25313 ccode = "ge"; break;
25314 case GT: case GTU: case UNGT:
25315 ccode = "gt"; break;
25316 case LE: case LEU:
25317 ccode = "le"; break;
25318 case LT: case LTU: case UNLT:
25319 ccode = "lt"; break;
25320 case UNORDERED: ccode = "un"; break;
25321 case ORDERED: ccode = "nu"; break;
25322 case UNGE: ccode = "nl"; break;
25323 case UNLE: ccode = "ng"; break;
25324 default:
25325 gcc_unreachable ();
25326 }
25327
25328 /* Maybe we have a guess as to how likely the branch is. */
25329 pred = "";
25330 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
25331 if (note != NULL_RTX)
25332 {
25333 /* PROB is the difference from 50%. */
25334 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
25335 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
25336
25337 /* Only hint for highly probable/improbable branches on newer cpus when
25338 we have real profile data, as static prediction overrides processor
25339 dynamic prediction. For older cpus we may as well always hint, but
25340 assume not taken for branches that are very close to 50% as a
25341 mispredicted taken branch is more expensive than a
25342 mispredicted not-taken branch. */
25343 if (rs6000_always_hint
25344 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
25345 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
25346 && br_prob_note_reliable_p (note)))
25347 {
25348 if (abs (prob) > REG_BR_PROB_BASE / 20
25349 && ((prob > 0) ^ need_longbranch))
25350 pred = "+";
25351 else
25352 pred = "-";
25353 }
25354 }
25355
25356 if (label == NULL)
25357 s += sprintf (s, "b%slr%s ", ccode, pred);
25358 else
25359 s += sprintf (s, "b%s%s ", ccode, pred);
25360
25361 /* We need to escape any '%' characters in the reg_names string.
25362 Assume they'd only be the first character.... */
25363 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
25364 *s++ = '%';
25365 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
25366
25367 if (label != NULL)
25368 {
25369 /* If the branch distance was too far, we may have to use an
25370 unconditional branch to go the distance. */
25371 if (need_longbranch)
25372 s += sprintf (s, ",$+8\n\tb %s", label);
25373 else
25374 s += sprintf (s, ",%s", label);
25375 }
25376
25377 return string;
25378 }
25379
25380 /* Return the string to flip the GT bit on a CR. */
25381 char *
25382 output_e500_flip_gt_bit (rtx dst, rtx src)
25383 {
25384 static char string[64];
25385 int a, b;
25386
25387 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
25388 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
25389
25390 /* GT bit. */
25391 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
25392 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
25393
25394 sprintf (string, "crnot %d,%d", a, b);
25395 return string;
25396 }
25397
25398 /* Return insn for VSX or Altivec comparisons. */
25399
25400 static rtx
25401 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
25402 {
25403 rtx mask;
25404 machine_mode mode = GET_MODE (op0);
25405
25406 switch (code)
25407 {
25408 default:
25409 break;
25410
25411 case GE:
25412 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
25413 return NULL_RTX;
25414 /* FALLTHRU */
25415
25416 case EQ:
25417 case GT:
25418 case GTU:
25419 case ORDERED:
25420 case UNORDERED:
25421 case UNEQ:
25422 case LTGT:
25423 mask = gen_reg_rtx (mode);
25424 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
25425 return mask;
25426 }
25427
25428 return NULL_RTX;
25429 }
25430
25431 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
25432 DMODE is expected destination mode. This is a recursive function. */
25433
25434 static rtx
25435 rs6000_emit_vector_compare (enum rtx_code rcode,
25436 rtx op0, rtx op1,
25437 machine_mode dmode)
25438 {
25439 rtx mask;
25440 bool swap_operands = false;
25441 bool try_again = false;
25442
25443 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
25444 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
25445
25446 /* See if the comparison works as is. */
25447 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25448 if (mask)
25449 return mask;
25450
25451 switch (rcode)
25452 {
25453 case LT:
25454 rcode = GT;
25455 swap_operands = true;
25456 try_again = true;
25457 break;
25458 case LTU:
25459 rcode = GTU;
25460 swap_operands = true;
25461 try_again = true;
25462 break;
25463 case NE:
25464 case UNLE:
25465 case UNLT:
25466 case UNGE:
25467 case UNGT:
25468 /* Invert condition and try again.
25469 e.g., A != B becomes ~(A==B). */
25470 {
25471 enum rtx_code rev_code;
25472 enum insn_code nor_code;
25473 rtx mask2;
25474
25475 rev_code = reverse_condition_maybe_unordered (rcode);
25476 if (rev_code == UNKNOWN)
25477 return NULL_RTX;
25478
25479 nor_code = optab_handler (one_cmpl_optab, dmode);
25480 if (nor_code == CODE_FOR_nothing)
25481 return NULL_RTX;
25482
25483 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
25484 if (!mask2)
25485 return NULL_RTX;
25486
25487 mask = gen_reg_rtx (dmode);
25488 emit_insn (GEN_FCN (nor_code) (mask, mask2));
25489 return mask;
25490 }
25491 break;
25492 case GE:
25493 case GEU:
25494 case LE:
25495 case LEU:
25496 /* Try GT/GTU/LT/LTU OR EQ */
25497 {
25498 rtx c_rtx, eq_rtx;
25499 enum insn_code ior_code;
25500 enum rtx_code new_code;
25501
25502 switch (rcode)
25503 {
25504 case GE:
25505 new_code = GT;
25506 break;
25507
25508 case GEU:
25509 new_code = GTU;
25510 break;
25511
25512 case LE:
25513 new_code = LT;
25514 break;
25515
25516 case LEU:
25517 new_code = LTU;
25518 break;
25519
25520 default:
25521 gcc_unreachable ();
25522 }
25523
25524 ior_code = optab_handler (ior_optab, dmode);
25525 if (ior_code == CODE_FOR_nothing)
25526 return NULL_RTX;
25527
25528 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
25529 if (!c_rtx)
25530 return NULL_RTX;
25531
25532 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
25533 if (!eq_rtx)
25534 return NULL_RTX;
25535
25536 mask = gen_reg_rtx (dmode);
25537 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
25538 return mask;
25539 }
25540 break;
25541 default:
25542 return NULL_RTX;
25543 }
25544
25545 if (try_again)
25546 {
25547 if (swap_operands)
25548 std::swap (op0, op1);
25549
25550 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
25551 if (mask)
25552 return mask;
25553 }
25554
25555 /* You only get two chances. */
25556 return NULL_RTX;
25557 }
25558
25559 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
25560 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
25561 operands for the relation operation COND. */
25562
25563 int
25564 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
25565 rtx cond, rtx cc_op0, rtx cc_op1)
25566 {
25567 machine_mode dest_mode = GET_MODE (dest);
25568 machine_mode mask_mode = GET_MODE (cc_op0);
25569 enum rtx_code rcode = GET_CODE (cond);
25570 machine_mode cc_mode = CCmode;
25571 rtx mask;
25572 rtx cond2;
25573 bool invert_move = false;
25574
25575 if (VECTOR_UNIT_NONE_P (dest_mode))
25576 return 0;
25577
25578 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
25579 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
25580
25581 switch (rcode)
25582 {
25583 /* Swap operands if we can, and fall back to doing the operation as
25584 specified, and doing a NOR to invert the test. */
25585 case NE:
25586 case UNLE:
25587 case UNLT:
25588 case UNGE:
25589 case UNGT:
25590 /* Invert condition and try again.
25591 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
25592 invert_move = true;
25593 rcode = reverse_condition_maybe_unordered (rcode);
25594 if (rcode == UNKNOWN)
25595 return 0;
25596 break;
25597
25598 case GE:
25599 case LE:
25600 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
25601 {
25602 /* Invert condition to avoid compound test. */
25603 invert_move = true;
25604 rcode = reverse_condition (rcode);
25605 }
25606 break;
25607
25608 case GTU:
25609 case GEU:
25610 case LTU:
25611 case LEU:
25612 /* Mark unsigned tests with CCUNSmode. */
25613 cc_mode = CCUNSmode;
25614
25615 /* Invert condition to avoid compound test if necessary. */
25616 if (rcode == GEU || rcode == LEU)
25617 {
25618 invert_move = true;
25619 rcode = reverse_condition (rcode);
25620 }
25621 break;
25622
25623 default:
25624 break;
25625 }
25626
25627 /* Get the vector mask for the given relational operations. */
25628 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
25629
25630 if (!mask)
25631 return 0;
25632
25633 if (invert_move)
25634 std::swap (op_true, op_false);
25635
25636 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
25637 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
25638 && (GET_CODE (op_true) == CONST_VECTOR
25639 || GET_CODE (op_false) == CONST_VECTOR))
25640 {
25641 rtx constant_0 = CONST0_RTX (dest_mode);
25642 rtx constant_m1 = CONSTM1_RTX (dest_mode);
25643
25644 if (op_true == constant_m1 && op_false == constant_0)
25645 {
25646 emit_move_insn (dest, mask);
25647 return 1;
25648 }
25649
25650 else if (op_true == constant_0 && op_false == constant_m1)
25651 {
25652 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
25653 return 1;
25654 }
25655
25656 /* If we can't use the vector comparison directly, perhaps we can use
25657 the mask for the true or false fields, instead of loading up a
25658 constant. */
25659 if (op_true == constant_m1)
25660 op_true = mask;
25661
25662 if (op_false == constant_0)
25663 op_false = mask;
25664 }
25665
25666 if (!REG_P (op_true) && !SUBREG_P (op_true))
25667 op_true = force_reg (dest_mode, op_true);
25668
25669 if (!REG_P (op_false) && !SUBREG_P (op_false))
25670 op_false = force_reg (dest_mode, op_false);
25671
25672 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
25673 CONST0_RTX (dest_mode));
25674 emit_insn (gen_rtx_SET (dest,
25675 gen_rtx_IF_THEN_ELSE (dest_mode,
25676 cond2,
25677 op_true,
25678 op_false)));
25679 return 1;
25680 }
25681
25682 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
25683 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
25684 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
25685 hardware has no such operation. */
25686
25687 static int
25688 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25689 {
25690 enum rtx_code code = GET_CODE (op);
25691 rtx op0 = XEXP (op, 0);
25692 rtx op1 = XEXP (op, 1);
25693 machine_mode compare_mode = GET_MODE (op0);
25694 machine_mode result_mode = GET_MODE (dest);
25695 bool max_p = false;
25696
25697 if (result_mode != compare_mode)
25698 return 0;
25699
25700 if (code == GE || code == GT)
25701 max_p = true;
25702 else if (code == LE || code == LT)
25703 max_p = false;
25704 else
25705 return 0;
25706
25707 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
25708 ;
25709
25710 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
25711 max_p = !max_p;
25712
25713 else
25714 return 0;
25715
25716 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
25717 return 1;
25718 }
25719
25720 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
25721 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
25722 operands of the last comparison is nonzero/true, FALSE_COND if it is
25723 zero/false. Return 0 if the hardware has no such operation. */
25724
25725 static int
25726 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25727 {
25728 enum rtx_code code = GET_CODE (op);
25729 rtx op0 = XEXP (op, 0);
25730 rtx op1 = XEXP (op, 1);
25731 machine_mode result_mode = GET_MODE (dest);
25732 rtx compare_rtx;
25733 rtx cmove_rtx;
25734 rtx clobber_rtx;
25735
25736 if (!can_create_pseudo_p ())
25737 return 0;
25738
25739 switch (code)
25740 {
25741 case EQ:
25742 case GE:
25743 case GT:
25744 break;
25745
25746 case NE:
25747 case LT:
25748 case LE:
25749 code = swap_condition (code);
25750 std::swap (op0, op1);
25751 break;
25752
25753 default:
25754 return 0;
25755 }
25756
25757 /* Generate: [(parallel [(set (dest)
25758 (if_then_else (op (cmp1) (cmp2))
25759 (true)
25760 (false)))
25761 (clobber (scratch))])]. */
25762
25763 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
25764 cmove_rtx = gen_rtx_SET (dest,
25765 gen_rtx_IF_THEN_ELSE (result_mode,
25766 compare_rtx,
25767 true_cond,
25768 false_cond));
25769
25770 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
25771 emit_insn (gen_rtx_PARALLEL (VOIDmode,
25772 gen_rtvec (2, cmove_rtx, clobber_rtx)));
25773
25774 return 1;
25775 }
25776
25777 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
25778 operands of the last comparison is nonzero/true, FALSE_COND if it
25779 is zero/false. Return 0 if the hardware has no such operation. */
25780
25781 int
25782 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25783 {
25784 enum rtx_code code = GET_CODE (op);
25785 rtx op0 = XEXP (op, 0);
25786 rtx op1 = XEXP (op, 1);
25787 machine_mode compare_mode = GET_MODE (op0);
25788 machine_mode result_mode = GET_MODE (dest);
25789 rtx temp;
25790 bool is_against_zero;
25791
25792 /* These modes should always match. */
25793 if (GET_MODE (op1) != compare_mode
25794 /* In the isel case however, we can use a compare immediate, so
25795 op1 may be a small constant. */
25796 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
25797 return 0;
25798 if (GET_MODE (true_cond) != result_mode)
25799 return 0;
25800 if (GET_MODE (false_cond) != result_mode)
25801 return 0;
25802
25803 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
25804 if (TARGET_P9_MINMAX
25805 && (compare_mode == SFmode || compare_mode == DFmode)
25806 && (result_mode == SFmode || result_mode == DFmode))
25807 {
25808 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
25809 return 1;
25810
25811 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
25812 return 1;
25813 }
25814
25815 /* Don't allow using floating point comparisons for integer results for
25816 now. */
25817 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
25818 return 0;
25819
25820 /* First, work out if the hardware can do this at all, or
25821 if it's too slow.... */
25822 if (!FLOAT_MODE_P (compare_mode))
25823 {
25824 if (TARGET_ISEL)
25825 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
25826 return 0;
25827 }
25828 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
25829 && SCALAR_FLOAT_MODE_P (compare_mode))
25830 return 0;
25831
25832 is_against_zero = op1 == CONST0_RTX (compare_mode);
25833
25834 /* A floating-point subtract might overflow, underflow, or produce
25835 an inexact result, thus changing the floating-point flags, so it
25836 can't be generated if we care about that. It's safe if one side
25837 of the construct is zero, since then no subtract will be
25838 generated. */
25839 if (SCALAR_FLOAT_MODE_P (compare_mode)
25840 && flag_trapping_math && ! is_against_zero)
25841 return 0;
25842
25843 /* Eliminate half of the comparisons by switching operands, this
25844 makes the remaining code simpler. */
25845 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
25846 || code == LTGT || code == LT || code == UNLE)
25847 {
25848 code = reverse_condition_maybe_unordered (code);
25849 temp = true_cond;
25850 true_cond = false_cond;
25851 false_cond = temp;
25852 }
25853
25854 /* UNEQ and LTGT take four instructions for a comparison with zero,
25855 it'll probably be faster to use a branch here too. */
25856 if (code == UNEQ && HONOR_NANS (compare_mode))
25857 return 0;
25858
25859 /* We're going to try to implement comparisons by performing
25860 a subtract, then comparing against zero. Unfortunately,
25861 Inf - Inf is NaN which is not zero, and so if we don't
25862 know that the operand is finite and the comparison
25863 would treat EQ different to UNORDERED, we can't do it. */
25864 if (HONOR_INFINITIES (compare_mode)
25865 && code != GT && code != UNGE
25866 && (GET_CODE (op1) != CONST_DOUBLE
25867 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
25868 /* Constructs of the form (a OP b ? a : b) are safe. */
25869 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
25870 || (! rtx_equal_p (op0, true_cond)
25871 && ! rtx_equal_p (op1, true_cond))))
25872 return 0;
25873
25874 /* At this point we know we can use fsel. */
25875
25876 /* Reduce the comparison to a comparison against zero. */
25877 if (! is_against_zero)
25878 {
25879 temp = gen_reg_rtx (compare_mode);
25880 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
25881 op0 = temp;
25882 op1 = CONST0_RTX (compare_mode);
25883 }
25884
25885 /* If we don't care about NaNs we can reduce some of the comparisons
25886 down to faster ones. */
25887 if (! HONOR_NANS (compare_mode))
25888 switch (code)
25889 {
25890 case GT:
25891 code = LE;
25892 temp = true_cond;
25893 true_cond = false_cond;
25894 false_cond = temp;
25895 break;
25896 case UNGE:
25897 code = GE;
25898 break;
25899 case UNEQ:
25900 code = EQ;
25901 break;
25902 default:
25903 break;
25904 }
25905
25906 /* Now, reduce everything down to a GE. */
25907 switch (code)
25908 {
25909 case GE:
25910 break;
25911
25912 case LE:
25913 temp = gen_reg_rtx (compare_mode);
25914 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25915 op0 = temp;
25916 break;
25917
25918 case ORDERED:
25919 temp = gen_reg_rtx (compare_mode);
25920 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
25921 op0 = temp;
25922 break;
25923
25924 case EQ:
25925 temp = gen_reg_rtx (compare_mode);
25926 emit_insn (gen_rtx_SET (temp,
25927 gen_rtx_NEG (compare_mode,
25928 gen_rtx_ABS (compare_mode, op0))));
25929 op0 = temp;
25930 break;
25931
25932 case UNGE:
25933 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
25934 temp = gen_reg_rtx (result_mode);
25935 emit_insn (gen_rtx_SET (temp,
25936 gen_rtx_IF_THEN_ELSE (result_mode,
25937 gen_rtx_GE (VOIDmode,
25938 op0, op1),
25939 true_cond, false_cond)));
25940 false_cond = true_cond;
25941 true_cond = temp;
25942
25943 temp = gen_reg_rtx (compare_mode);
25944 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25945 op0 = temp;
25946 break;
25947
25948 case GT:
25949 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
25950 temp = gen_reg_rtx (result_mode);
25951 emit_insn (gen_rtx_SET (temp,
25952 gen_rtx_IF_THEN_ELSE (result_mode,
25953 gen_rtx_GE (VOIDmode,
25954 op0, op1),
25955 true_cond, false_cond)));
25956 true_cond = false_cond;
25957 false_cond = temp;
25958
25959 temp = gen_reg_rtx (compare_mode);
25960 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
25961 op0 = temp;
25962 break;
25963
25964 default:
25965 gcc_unreachable ();
25966 }
25967
25968 emit_insn (gen_rtx_SET (dest,
25969 gen_rtx_IF_THEN_ELSE (result_mode,
25970 gen_rtx_GE (VOIDmode,
25971 op0, op1),
25972 true_cond, false_cond)));
25973 return 1;
25974 }
25975
25976 /* Same as above, but for ints (isel). */
25977
25978 static int
25979 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
25980 {
25981 rtx condition_rtx, cr;
25982 machine_mode mode = GET_MODE (dest);
25983 enum rtx_code cond_code;
25984 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
25985 bool signedp;
25986
25987 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
25988 return 0;
25989
25990 /* We still have to do the compare, because isel doesn't do a
25991 compare, it just looks at the CRx bits set by a previous compare
25992 instruction. */
25993 condition_rtx = rs6000_generate_compare (op, mode);
25994 cond_code = GET_CODE (condition_rtx);
25995 cr = XEXP (condition_rtx, 0);
25996 signedp = GET_MODE (cr) == CCmode;
25997
25998 isel_func = (mode == SImode
25999 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
26000 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
26001
26002 switch (cond_code)
26003 {
26004 case LT: case GT: case LTU: case GTU: case EQ:
26005 /* isel handles these directly. */
26006 break;
26007
26008 default:
26009 /* We need to swap the sense of the comparison. */
26010 {
26011 std::swap (false_cond, true_cond);
26012 PUT_CODE (condition_rtx, reverse_condition (cond_code));
26013 }
26014 break;
26015 }
26016
26017 false_cond = force_reg (mode, false_cond);
26018 if (true_cond != const0_rtx)
26019 true_cond = force_reg (mode, true_cond);
26020
26021 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
26022
26023 return 1;
26024 }
26025
26026 const char *
26027 output_isel (rtx *operands)
26028 {
26029 enum rtx_code code;
26030
26031 code = GET_CODE (operands[1]);
26032
26033 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
26034 {
26035 gcc_assert (GET_CODE (operands[2]) == REG
26036 && GET_CODE (operands[3]) == REG);
26037 PUT_CODE (operands[1], reverse_condition (code));
26038 return "isel %0,%3,%2,%j1";
26039 }
26040
26041 return "isel %0,%2,%3,%j1";
26042 }
26043
26044 void
26045 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
26046 {
26047 machine_mode mode = GET_MODE (op0);
26048 enum rtx_code c;
26049 rtx target;
26050
26051 /* VSX/altivec have direct min/max insns. */
26052 if ((code == SMAX || code == SMIN)
26053 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
26054 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
26055 {
26056 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
26057 return;
26058 }
26059
26060 if (code == SMAX || code == SMIN)
26061 c = GE;
26062 else
26063 c = GEU;
26064
26065 if (code == SMAX || code == UMAX)
26066 target = emit_conditional_move (dest, c, op0, op1, mode,
26067 op0, op1, mode, 0);
26068 else
26069 target = emit_conditional_move (dest, c, op0, op1, mode,
26070 op1, op0, mode, 0);
26071 gcc_assert (target);
26072 if (target != dest)
26073 emit_move_insn (dest, target);
26074 }
26075
26076 /* Split a signbit operation on 64-bit machines with direct move. Also allow
26077 for the value to come from memory or if it is already loaded into a GPR. */
26078
26079 void
26080 rs6000_split_signbit (rtx dest, rtx src)
26081 {
26082 machine_mode d_mode = GET_MODE (dest);
26083 machine_mode s_mode = GET_MODE (src);
26084 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
26085 rtx shift_reg = dest_di;
26086
26087 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
26088
26089 if (MEM_P (src))
26090 {
26091 rtx mem = (WORDS_BIG_ENDIAN
26092 ? adjust_address (src, DImode, 0)
26093 : adjust_address (src, DImode, 8));
26094 emit_insn (gen_rtx_SET (dest_di, mem));
26095 }
26096
26097 else
26098 {
26099 unsigned int r = reg_or_subregno (src);
26100
26101 if (INT_REGNO_P (r))
26102 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
26103
26104 else
26105 {
26106 /* Generate the special mfvsrd instruction to get it in a GPR. */
26107 gcc_assert (VSX_REGNO_P (r));
26108 if (s_mode == KFmode)
26109 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
26110 else
26111 emit_insn (gen_signbittf2_dm2 (dest_di, src));
26112 }
26113 }
26114
26115 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
26116 return;
26117 }
26118
26119 /* A subroutine of the atomic operation splitters. Jump to LABEL if
26120 COND is true. Mark the jump as unlikely to be taken. */
26121
26122 static void
26123 emit_unlikely_jump (rtx cond, rtx label)
26124 {
26125 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
26126 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
26127 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
26128 }
26129
26130 /* A subroutine of the atomic operation splitters. Emit a load-locked
26131 instruction in MODE. For QI/HImode, possibly use a pattern than includes
26132 the zero_extend operation. */
26133
26134 static void
26135 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
26136 {
26137 rtx (*fn) (rtx, rtx) = NULL;
26138
26139 switch (mode)
26140 {
26141 case E_QImode:
26142 fn = gen_load_lockedqi;
26143 break;
26144 case E_HImode:
26145 fn = gen_load_lockedhi;
26146 break;
26147 case E_SImode:
26148 if (GET_MODE (mem) == QImode)
26149 fn = gen_load_lockedqi_si;
26150 else if (GET_MODE (mem) == HImode)
26151 fn = gen_load_lockedhi_si;
26152 else
26153 fn = gen_load_lockedsi;
26154 break;
26155 case E_DImode:
26156 fn = gen_load_lockeddi;
26157 break;
26158 case E_TImode:
26159 fn = gen_load_lockedti;
26160 break;
26161 default:
26162 gcc_unreachable ();
26163 }
26164 emit_insn (fn (reg, mem));
26165 }
26166
26167 /* A subroutine of the atomic operation splitters. Emit a store-conditional
26168 instruction in MODE. */
26169
26170 static void
26171 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
26172 {
26173 rtx (*fn) (rtx, rtx, rtx) = NULL;
26174
26175 switch (mode)
26176 {
26177 case E_QImode:
26178 fn = gen_store_conditionalqi;
26179 break;
26180 case E_HImode:
26181 fn = gen_store_conditionalhi;
26182 break;
26183 case E_SImode:
26184 fn = gen_store_conditionalsi;
26185 break;
26186 case E_DImode:
26187 fn = gen_store_conditionaldi;
26188 break;
26189 case E_TImode:
26190 fn = gen_store_conditionalti;
26191 break;
26192 default:
26193 gcc_unreachable ();
26194 }
26195
26196 /* Emit sync before stwcx. to address PPC405 Erratum. */
26197 if (PPC405_ERRATUM77)
26198 emit_insn (gen_hwsync ());
26199
26200 emit_insn (fn (res, mem, val));
26201 }
26202
26203 /* Expand barriers before and after a load_locked/store_cond sequence. */
26204
26205 static rtx
26206 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
26207 {
26208 rtx addr = XEXP (mem, 0);
26209 int strict_p = (reload_in_progress || reload_completed);
26210
26211 if (!legitimate_indirect_address_p (addr, strict_p)
26212 && !legitimate_indexed_address_p (addr, strict_p))
26213 {
26214 addr = force_reg (Pmode, addr);
26215 mem = replace_equiv_address_nv (mem, addr);
26216 }
26217
26218 switch (model)
26219 {
26220 case MEMMODEL_RELAXED:
26221 case MEMMODEL_CONSUME:
26222 case MEMMODEL_ACQUIRE:
26223 break;
26224 case MEMMODEL_RELEASE:
26225 case MEMMODEL_ACQ_REL:
26226 emit_insn (gen_lwsync ());
26227 break;
26228 case MEMMODEL_SEQ_CST:
26229 emit_insn (gen_hwsync ());
26230 break;
26231 default:
26232 gcc_unreachable ();
26233 }
26234 return mem;
26235 }
26236
26237 static void
26238 rs6000_post_atomic_barrier (enum memmodel model)
26239 {
26240 switch (model)
26241 {
26242 case MEMMODEL_RELAXED:
26243 case MEMMODEL_CONSUME:
26244 case MEMMODEL_RELEASE:
26245 break;
26246 case MEMMODEL_ACQUIRE:
26247 case MEMMODEL_ACQ_REL:
26248 case MEMMODEL_SEQ_CST:
26249 emit_insn (gen_isync ());
26250 break;
26251 default:
26252 gcc_unreachable ();
26253 }
26254 }
26255
26256 /* A subroutine of the various atomic expanders. For sub-word operations,
26257 we must adjust things to operate on SImode. Given the original MEM,
26258 return a new aligned memory. Also build and return the quantities by
26259 which to shift and mask. */
26260
26261 static rtx
26262 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
26263 {
26264 rtx addr, align, shift, mask, mem;
26265 HOST_WIDE_INT shift_mask;
26266 machine_mode mode = GET_MODE (orig_mem);
26267
26268 /* For smaller modes, we have to implement this via SImode. */
26269 shift_mask = (mode == QImode ? 0x18 : 0x10);
26270
26271 addr = XEXP (orig_mem, 0);
26272 addr = force_reg (GET_MODE (addr), addr);
26273
26274 /* Aligned memory containing subword. Generate a new memory. We
26275 do not want any of the existing MEM_ATTR data, as we're now
26276 accessing memory outside the original object. */
26277 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
26278 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26279 mem = gen_rtx_MEM (SImode, align);
26280 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
26281 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
26282 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
26283
26284 /* Shift amount for subword relative to aligned word. */
26285 shift = gen_reg_rtx (SImode);
26286 addr = gen_lowpart (SImode, addr);
26287 rtx tmp = gen_reg_rtx (SImode);
26288 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
26289 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
26290 if (BYTES_BIG_ENDIAN)
26291 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
26292 shift, 1, OPTAB_LIB_WIDEN);
26293 *pshift = shift;
26294
26295 /* Mask for insertion. */
26296 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
26297 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
26298 *pmask = mask;
26299
26300 return mem;
26301 }
26302
26303 /* A subroutine of the various atomic expanders. For sub-word operands,
26304 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
26305
26306 static rtx
26307 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
26308 {
26309 rtx x;
26310
26311 x = gen_reg_rtx (SImode);
26312 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
26313 gen_rtx_NOT (SImode, mask),
26314 oldval)));
26315
26316 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
26317
26318 return x;
26319 }
26320
26321 /* A subroutine of the various atomic expanders. For sub-word operands,
26322 extract WIDE to NARROW via SHIFT. */
26323
26324 static void
26325 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
26326 {
26327 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
26328 wide, 1, OPTAB_LIB_WIDEN);
26329 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
26330 }
26331
26332 /* Expand an atomic compare and swap operation. */
26333
26334 void
26335 rs6000_expand_atomic_compare_and_swap (rtx operands[])
26336 {
26337 rtx boolval, retval, mem, oldval, newval, cond;
26338 rtx label1, label2, x, mask, shift;
26339 machine_mode mode, orig_mode;
26340 enum memmodel mod_s, mod_f;
26341 bool is_weak;
26342
26343 boolval = operands[0];
26344 retval = operands[1];
26345 mem = operands[2];
26346 oldval = operands[3];
26347 newval = operands[4];
26348 is_weak = (INTVAL (operands[5]) != 0);
26349 mod_s = memmodel_base (INTVAL (operands[6]));
26350 mod_f = memmodel_base (INTVAL (operands[7]));
26351 orig_mode = mode = GET_MODE (mem);
26352
26353 mask = shift = NULL_RTX;
26354 if (mode == QImode || mode == HImode)
26355 {
26356 /* Before power8, we didn't have access to lbarx/lharx, so generate a
26357 lwarx and shift/mask operations. With power8, we need to do the
26358 comparison in SImode, but the store is still done in QI/HImode. */
26359 oldval = convert_modes (SImode, mode, oldval, 1);
26360
26361 if (!TARGET_SYNC_HI_QI)
26362 {
26363 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26364
26365 /* Shift and mask OLDVAL into position with the word. */
26366 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
26367 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26368
26369 /* Shift and mask NEWVAL into position within the word. */
26370 newval = convert_modes (SImode, mode, newval, 1);
26371 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
26372 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26373 }
26374
26375 /* Prepare to adjust the return value. */
26376 retval = gen_reg_rtx (SImode);
26377 mode = SImode;
26378 }
26379 else if (reg_overlap_mentioned_p (retval, oldval))
26380 oldval = copy_to_reg (oldval);
26381
26382 if (mode != TImode && !reg_or_short_operand (oldval, mode))
26383 oldval = copy_to_mode_reg (mode, oldval);
26384
26385 if (reg_overlap_mentioned_p (retval, newval))
26386 newval = copy_to_reg (newval);
26387
26388 mem = rs6000_pre_atomic_barrier (mem, mod_s);
26389
26390 label1 = NULL_RTX;
26391 if (!is_weak)
26392 {
26393 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26394 emit_label (XEXP (label1, 0));
26395 }
26396 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26397
26398 emit_load_locked (mode, retval, mem);
26399
26400 x = retval;
26401 if (mask)
26402 x = expand_simple_binop (SImode, AND, retval, mask,
26403 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26404
26405 cond = gen_reg_rtx (CCmode);
26406 /* If we have TImode, synthesize a comparison. */
26407 if (mode != TImode)
26408 x = gen_rtx_COMPARE (CCmode, x, oldval);
26409 else
26410 {
26411 rtx xor1_result = gen_reg_rtx (DImode);
26412 rtx xor2_result = gen_reg_rtx (DImode);
26413 rtx or_result = gen_reg_rtx (DImode);
26414 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
26415 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
26416 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
26417 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
26418
26419 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
26420 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
26421 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
26422 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
26423 }
26424
26425 emit_insn (gen_rtx_SET (cond, x));
26426
26427 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26428 emit_unlikely_jump (x, label2);
26429
26430 x = newval;
26431 if (mask)
26432 x = rs6000_mask_atomic_subword (retval, newval, mask);
26433
26434 emit_store_conditional (orig_mode, cond, mem, x);
26435
26436 if (!is_weak)
26437 {
26438 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26439 emit_unlikely_jump (x, label1);
26440 }
26441
26442 if (!is_mm_relaxed (mod_f))
26443 emit_label (XEXP (label2, 0));
26444
26445 rs6000_post_atomic_barrier (mod_s);
26446
26447 if (is_mm_relaxed (mod_f))
26448 emit_label (XEXP (label2, 0));
26449
26450 if (shift)
26451 rs6000_finish_atomic_subword (operands[1], retval, shift);
26452 else if (mode != GET_MODE (operands[1]))
26453 convert_move (operands[1], retval, 1);
26454
26455 /* In all cases, CR0 contains EQ on success, and NE on failure. */
26456 x = gen_rtx_EQ (SImode, cond, const0_rtx);
26457 emit_insn (gen_rtx_SET (boolval, x));
26458 }
26459
26460 /* Expand an atomic exchange operation. */
26461
26462 void
26463 rs6000_expand_atomic_exchange (rtx operands[])
26464 {
26465 rtx retval, mem, val, cond;
26466 machine_mode mode;
26467 enum memmodel model;
26468 rtx label, x, mask, shift;
26469
26470 retval = operands[0];
26471 mem = operands[1];
26472 val = operands[2];
26473 model = memmodel_base (INTVAL (operands[3]));
26474 mode = GET_MODE (mem);
26475
26476 mask = shift = NULL_RTX;
26477 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
26478 {
26479 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26480
26481 /* Shift and mask VAL into position with the word. */
26482 val = convert_modes (SImode, mode, val, 1);
26483 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26484 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26485
26486 /* Prepare to adjust the return value. */
26487 retval = gen_reg_rtx (SImode);
26488 mode = SImode;
26489 }
26490
26491 mem = rs6000_pre_atomic_barrier (mem, model);
26492
26493 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
26494 emit_label (XEXP (label, 0));
26495
26496 emit_load_locked (mode, retval, mem);
26497
26498 x = val;
26499 if (mask)
26500 x = rs6000_mask_atomic_subword (retval, val, mask);
26501
26502 cond = gen_reg_rtx (CCmode);
26503 emit_store_conditional (mode, cond, mem, x);
26504
26505 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26506 emit_unlikely_jump (x, label);
26507
26508 rs6000_post_atomic_barrier (model);
26509
26510 if (shift)
26511 rs6000_finish_atomic_subword (operands[0], retval, shift);
26512 }
26513
26514 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
26515 to perform. MEM is the memory on which to operate. VAL is the second
26516 operand of the binary operator. BEFORE and AFTER are optional locations to
26517 return the value of MEM either before of after the operation. MODEL_RTX
26518 is a CONST_INT containing the memory model to use. */
26519
26520 void
26521 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
26522 rtx orig_before, rtx orig_after, rtx model_rtx)
26523 {
26524 enum memmodel model = memmodel_base (INTVAL (model_rtx));
26525 machine_mode mode = GET_MODE (mem);
26526 machine_mode store_mode = mode;
26527 rtx label, x, cond, mask, shift;
26528 rtx before = orig_before, after = orig_after;
26529
26530 mask = shift = NULL_RTX;
26531 /* On power8, we want to use SImode for the operation. On previous systems,
26532 use the operation in a subword and shift/mask to get the proper byte or
26533 halfword. */
26534 if (mode == QImode || mode == HImode)
26535 {
26536 if (TARGET_SYNC_HI_QI)
26537 {
26538 val = convert_modes (SImode, mode, val, 1);
26539
26540 /* Prepare to adjust the return value. */
26541 before = gen_reg_rtx (SImode);
26542 if (after)
26543 after = gen_reg_rtx (SImode);
26544 mode = SImode;
26545 }
26546 else
26547 {
26548 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
26549
26550 /* Shift and mask VAL into position with the word. */
26551 val = convert_modes (SImode, mode, val, 1);
26552 val = expand_simple_binop (SImode, ASHIFT, val, shift,
26553 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26554
26555 switch (code)
26556 {
26557 case IOR:
26558 case XOR:
26559 /* We've already zero-extended VAL. That is sufficient to
26560 make certain that it does not affect other bits. */
26561 mask = NULL;
26562 break;
26563
26564 case AND:
26565 /* If we make certain that all of the other bits in VAL are
26566 set, that will be sufficient to not affect other bits. */
26567 x = gen_rtx_NOT (SImode, mask);
26568 x = gen_rtx_IOR (SImode, x, val);
26569 emit_insn (gen_rtx_SET (val, x));
26570 mask = NULL;
26571 break;
26572
26573 case NOT:
26574 case PLUS:
26575 case MINUS:
26576 /* These will all affect bits outside the field and need
26577 adjustment via MASK within the loop. */
26578 break;
26579
26580 default:
26581 gcc_unreachable ();
26582 }
26583
26584 /* Prepare to adjust the return value. */
26585 before = gen_reg_rtx (SImode);
26586 if (after)
26587 after = gen_reg_rtx (SImode);
26588 store_mode = mode = SImode;
26589 }
26590 }
26591
26592 mem = rs6000_pre_atomic_barrier (mem, model);
26593
26594 label = gen_label_rtx ();
26595 emit_label (label);
26596 label = gen_rtx_LABEL_REF (VOIDmode, label);
26597
26598 if (before == NULL_RTX)
26599 before = gen_reg_rtx (mode);
26600
26601 emit_load_locked (mode, before, mem);
26602
26603 if (code == NOT)
26604 {
26605 x = expand_simple_binop (mode, AND, before, val,
26606 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26607 after = expand_simple_unop (mode, NOT, x, after, 1);
26608 }
26609 else
26610 {
26611 after = expand_simple_binop (mode, code, before, val,
26612 after, 1, OPTAB_LIB_WIDEN);
26613 }
26614
26615 x = after;
26616 if (mask)
26617 {
26618 x = expand_simple_binop (SImode, AND, after, mask,
26619 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26620 x = rs6000_mask_atomic_subword (before, x, mask);
26621 }
26622 else if (store_mode != mode)
26623 x = convert_modes (store_mode, mode, x, 1);
26624
26625 cond = gen_reg_rtx (CCmode);
26626 emit_store_conditional (store_mode, cond, mem, x);
26627
26628 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
26629 emit_unlikely_jump (x, label);
26630
26631 rs6000_post_atomic_barrier (model);
26632
26633 if (shift)
26634 {
26635 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
26636 then do the calcuations in a SImode register. */
26637 if (orig_before)
26638 rs6000_finish_atomic_subword (orig_before, before, shift);
26639 if (orig_after)
26640 rs6000_finish_atomic_subword (orig_after, after, shift);
26641 }
26642 else if (store_mode != mode)
26643 {
26644 /* QImode/HImode on machines with lbarx/lharx where we do the native
26645 operation and then do the calcuations in a SImode register. */
26646 if (orig_before)
26647 convert_move (orig_before, before, 1);
26648 if (orig_after)
26649 convert_move (orig_after, after, 1);
26650 }
26651 else if (orig_after && after != orig_after)
26652 emit_move_insn (orig_after, after);
26653 }
26654
26655 /* Emit instructions to move SRC to DST. Called by splitters for
26656 multi-register moves. It will emit at most one instruction for
26657 each register that is accessed; that is, it won't emit li/lis pairs
26658 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26659 register. */
26660
26661 void
26662 rs6000_split_multireg_move (rtx dst, rtx src)
26663 {
26664 /* The register number of the first register being moved. */
26665 int reg;
26666 /* The mode that is to be moved. */
26667 machine_mode mode;
26668 /* The mode that the move is being done in, and its size. */
26669 machine_mode reg_mode;
26670 int reg_mode_size;
26671 /* The number of registers that will be moved. */
26672 int nregs;
26673
26674 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26675 mode = GET_MODE (dst);
26676 nregs = hard_regno_nregs[reg][mode];
26677 if (FP_REGNO_P (reg))
26678 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26679 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
26680 else if (ALTIVEC_REGNO_P (reg))
26681 reg_mode = V16QImode;
26682 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
26683 reg_mode = DFmode;
26684 else
26685 reg_mode = word_mode;
26686 reg_mode_size = GET_MODE_SIZE (reg_mode);
26687
26688 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26689
26690 /* TDmode residing in FP registers is special, since the ISA requires that
26691 the lower-numbered word of a register pair is always the most significant
26692 word, even in little-endian mode. This does not match the usual subreg
26693 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26694 the appropriate constituent registers "by hand" in little-endian mode.
26695
26696 Note we do not need to check for destructive overlap here since TDmode
26697 can only reside in even/odd register pairs. */
26698 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26699 {
26700 rtx p_src, p_dst;
26701 int i;
26702
26703 for (i = 0; i < nregs; i++)
26704 {
26705 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26706 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26707 else
26708 p_src = simplify_gen_subreg (reg_mode, src, mode,
26709 i * reg_mode_size);
26710
26711 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26712 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26713 else
26714 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26715 i * reg_mode_size);
26716
26717 emit_insn (gen_rtx_SET (p_dst, p_src));
26718 }
26719
26720 return;
26721 }
26722
26723 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26724 {
26725 /* Move register range backwards, if we might have destructive
26726 overlap. */
26727 int i;
26728 for (i = nregs - 1; i >= 0; i--)
26729 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26730 i * reg_mode_size),
26731 simplify_gen_subreg (reg_mode, src, mode,
26732 i * reg_mode_size)));
26733 }
26734 else
26735 {
26736 int i;
26737 int j = -1;
26738 bool used_update = false;
26739 rtx restore_basereg = NULL_RTX;
26740
26741 if (MEM_P (src) && INT_REGNO_P (reg))
26742 {
26743 rtx breg;
26744
26745 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26746 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26747 {
26748 rtx delta_rtx;
26749 breg = XEXP (XEXP (src, 0), 0);
26750 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26751 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26752 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26753 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26754 src = replace_equiv_address (src, breg);
26755 }
26756 else if (! rs6000_offsettable_memref_p (src, reg_mode))
26757 {
26758 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26759 {
26760 rtx basereg = XEXP (XEXP (src, 0), 0);
26761 if (TARGET_UPDATE)
26762 {
26763 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26764 emit_insn (gen_rtx_SET (ndst,
26765 gen_rtx_MEM (reg_mode,
26766 XEXP (src, 0))));
26767 used_update = true;
26768 }
26769 else
26770 emit_insn (gen_rtx_SET (basereg,
26771 XEXP (XEXP (src, 0), 1)));
26772 src = replace_equiv_address (src, basereg);
26773 }
26774 else
26775 {
26776 rtx basereg = gen_rtx_REG (Pmode, reg);
26777 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26778 src = replace_equiv_address (src, basereg);
26779 }
26780 }
26781
26782 breg = XEXP (src, 0);
26783 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26784 breg = XEXP (breg, 0);
26785
26786 /* If the base register we are using to address memory is
26787 also a destination reg, then change that register last. */
26788 if (REG_P (breg)
26789 && REGNO (breg) >= REGNO (dst)
26790 && REGNO (breg) < REGNO (dst) + nregs)
26791 j = REGNO (breg) - REGNO (dst);
26792 }
26793 else if (MEM_P (dst) && INT_REGNO_P (reg))
26794 {
26795 rtx breg;
26796
26797 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26798 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26799 {
26800 rtx delta_rtx;
26801 breg = XEXP (XEXP (dst, 0), 0);
26802 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26803 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26804 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26805
26806 /* We have to update the breg before doing the store.
26807 Use store with update, if available. */
26808
26809 if (TARGET_UPDATE)
26810 {
26811 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26812 emit_insn (TARGET_32BIT
26813 ? (TARGET_POWERPC64
26814 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26815 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
26816 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26817 used_update = true;
26818 }
26819 else
26820 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26821 dst = replace_equiv_address (dst, breg);
26822 }
26823 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
26824 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26825 {
26826 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
26827 {
26828 rtx basereg = XEXP (XEXP (dst, 0), 0);
26829 if (TARGET_UPDATE)
26830 {
26831 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26832 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
26833 XEXP (dst, 0)),
26834 nsrc));
26835 used_update = true;
26836 }
26837 else
26838 emit_insn (gen_rtx_SET (basereg,
26839 XEXP (XEXP (dst, 0), 1)));
26840 dst = replace_equiv_address (dst, basereg);
26841 }
26842 else
26843 {
26844 rtx basereg = XEXP (XEXP (dst, 0), 0);
26845 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
26846 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
26847 && REG_P (basereg)
26848 && REG_P (offsetreg)
26849 && REGNO (basereg) != REGNO (offsetreg));
26850 if (REGNO (basereg) == 0)
26851 {
26852 rtx tmp = offsetreg;
26853 offsetreg = basereg;
26854 basereg = tmp;
26855 }
26856 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
26857 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
26858 dst = replace_equiv_address (dst, basereg);
26859 }
26860 }
26861 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
26862 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
26863 }
26864
26865 for (i = 0; i < nregs; i++)
26866 {
26867 /* Calculate index to next subword. */
26868 ++j;
26869 if (j == nregs)
26870 j = 0;
26871
26872 /* If compiler already emitted move of first word by
26873 store with update, no need to do anything. */
26874 if (j == 0 && used_update)
26875 continue;
26876
26877 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26878 j * reg_mode_size),
26879 simplify_gen_subreg (reg_mode, src, mode,
26880 j * reg_mode_size)));
26881 }
26882 if (restore_basereg != NULL_RTX)
26883 emit_insn (restore_basereg);
26884 }
26885 }
26886
26887 \f
26888 /* This page contains routines that are used to determine what the
26889 function prologue and epilogue code will do and write them out. */
26890
26891 static inline bool
26892 save_reg_p (int r)
26893 {
26894 return !call_used_regs[r] && df_regs_ever_live_p (r);
26895 }
26896
26897 /* Determine whether the gp REG is really used. */
26898
26899 static bool
26900 rs6000_reg_live_or_pic_offset_p (int reg)
26901 {
26902 /* We need to mark the PIC offset register live for the same conditions
26903 as it is set up, or otherwise it won't be saved before we clobber it. */
26904
26905 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
26906 {
26907 if (TARGET_TOC && TARGET_MINIMAL_TOC
26908 && (crtl->calls_eh_return
26909 || df_regs_ever_live_p (reg)
26910 || !constant_pool_empty_p ()))
26911 return true;
26912
26913 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
26914 && flag_pic)
26915 return true;
26916 }
26917
26918 /* If the function calls eh_return, claim used all the registers that would
26919 be checked for liveness otherwise. */
26920
26921 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
26922 && !call_used_regs[reg]);
26923 }
26924
26925 /* Return the first fixed-point register that is required to be
26926 saved. 32 if none. */
26927
26928 int
26929 first_reg_to_save (void)
26930 {
26931 int first_reg;
26932
26933 /* Find lowest numbered live register. */
26934 for (first_reg = 13; first_reg <= 31; first_reg++)
26935 if (save_reg_p (first_reg))
26936 break;
26937
26938 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
26939 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
26940 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
26941 || (TARGET_TOC && TARGET_MINIMAL_TOC))
26942 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
26943 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
26944
26945 #if TARGET_MACHO
26946 if (flag_pic
26947 && crtl->uses_pic_offset_table
26948 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
26949 return RS6000_PIC_OFFSET_TABLE_REGNUM;
26950 #endif
26951
26952 return first_reg;
26953 }
26954
26955 /* Similar, for FP regs. */
26956
26957 int
26958 first_fp_reg_to_save (void)
26959 {
26960 int first_reg;
26961
26962 /* Find lowest numbered live register. */
26963 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
26964 if (save_reg_p (first_reg))
26965 break;
26966
26967 return first_reg;
26968 }
26969
26970 /* Similar, for AltiVec regs. */
26971
26972 static int
26973 first_altivec_reg_to_save (void)
26974 {
26975 int i;
26976
26977 /* Stack frame remains as is unless we are in AltiVec ABI. */
26978 if (! TARGET_ALTIVEC_ABI)
26979 return LAST_ALTIVEC_REGNO + 1;
26980
26981 /* On Darwin, the unwind routines are compiled without
26982 TARGET_ALTIVEC, and use save_world to save/restore the
26983 altivec registers when necessary. */
26984 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
26985 && ! TARGET_ALTIVEC)
26986 return FIRST_ALTIVEC_REGNO + 20;
26987
26988 /* Find lowest numbered live register. */
26989 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
26990 if (save_reg_p (i))
26991 break;
26992
26993 return i;
26994 }
26995
26996 /* Return a 32-bit mask of the AltiVec registers we need to set in
26997 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
26998 the 32-bit word is 0. */
26999
27000 static unsigned int
27001 compute_vrsave_mask (void)
27002 {
27003 unsigned int i, mask = 0;
27004
27005 /* On Darwin, the unwind routines are compiled without
27006 TARGET_ALTIVEC, and use save_world to save/restore the
27007 call-saved altivec registers when necessary. */
27008 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
27009 && ! TARGET_ALTIVEC)
27010 mask |= 0xFFF;
27011
27012 /* First, find out if we use _any_ altivec registers. */
27013 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
27014 if (df_regs_ever_live_p (i))
27015 mask |= ALTIVEC_REG_BIT (i);
27016
27017 if (mask == 0)
27018 return mask;
27019
27020 /* Next, remove the argument registers from the set. These must
27021 be in the VRSAVE mask set by the caller, so we don't need to add
27022 them in again. More importantly, the mask we compute here is
27023 used to generate CLOBBERs in the set_vrsave insn, and we do not
27024 wish the argument registers to die. */
27025 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
27026 mask &= ~ALTIVEC_REG_BIT (i);
27027
27028 /* Similarly, remove the return value from the set. */
27029 {
27030 bool yes = false;
27031 diddle_return_value (is_altivec_return_reg, &yes);
27032 if (yes)
27033 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
27034 }
27035
27036 return mask;
27037 }
27038
27039 /* For a very restricted set of circumstances, we can cut down the
27040 size of prologues/epilogues by calling our own save/restore-the-world
27041 routines. */
27042
27043 static void
27044 compute_save_world_info (rs6000_stack_t *info)
27045 {
27046 info->world_save_p = 1;
27047 info->world_save_p
27048 = (WORLD_SAVE_P (info)
27049 && DEFAULT_ABI == ABI_DARWIN
27050 && !cfun->has_nonlocal_label
27051 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
27052 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
27053 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
27054 && info->cr_save_p);
27055
27056 /* This will not work in conjunction with sibcalls. Make sure there
27057 are none. (This check is expensive, but seldom executed.) */
27058 if (WORLD_SAVE_P (info))
27059 {
27060 rtx_insn *insn;
27061 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
27062 if (CALL_P (insn) && SIBLING_CALL_P (insn))
27063 {
27064 info->world_save_p = 0;
27065 break;
27066 }
27067 }
27068
27069 if (WORLD_SAVE_P (info))
27070 {
27071 /* Even if we're not touching VRsave, make sure there's room on the
27072 stack for it, if it looks like we're calling SAVE_WORLD, which
27073 will attempt to save it. */
27074 info->vrsave_size = 4;
27075
27076 /* If we are going to save the world, we need to save the link register too. */
27077 info->lr_save_p = 1;
27078
27079 /* "Save" the VRsave register too if we're saving the world. */
27080 if (info->vrsave_mask == 0)
27081 info->vrsave_mask = compute_vrsave_mask ();
27082
27083 /* Because the Darwin register save/restore routines only handle
27084 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
27085 check. */
27086 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
27087 && (info->first_altivec_reg_save
27088 >= FIRST_SAVED_ALTIVEC_REGNO));
27089 }
27090
27091 return;
27092 }
27093
27094
27095 static void
27096 is_altivec_return_reg (rtx reg, void *xyes)
27097 {
27098 bool *yes = (bool *) xyes;
27099 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
27100 *yes = true;
27101 }
27102
27103 \f
27104 /* Return whether REG is a global user reg or has been specifed by
27105 -ffixed-REG. We should not restore these, and so cannot use
27106 lmw or out-of-line restore functions if there are any. We also
27107 can't save them (well, emit frame notes for them), because frame
27108 unwinding during exception handling will restore saved registers. */
27109
27110 static bool
27111 fixed_reg_p (int reg)
27112 {
27113 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
27114 backend sets it, overriding anything the user might have given. */
27115 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
27116 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
27117 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
27118 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
27119 return false;
27120
27121 return fixed_regs[reg];
27122 }
27123
27124 /* Determine the strategy for savings/restoring registers. */
27125
27126 enum {
27127 SAVE_MULTIPLE = 0x1,
27128 SAVE_INLINE_GPRS = 0x2,
27129 SAVE_INLINE_FPRS = 0x4,
27130 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
27131 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
27132 SAVE_INLINE_VRS = 0x20,
27133 REST_MULTIPLE = 0x100,
27134 REST_INLINE_GPRS = 0x200,
27135 REST_INLINE_FPRS = 0x400,
27136 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
27137 REST_INLINE_VRS = 0x1000
27138 };
27139
27140 static int
27141 rs6000_savres_strategy (rs6000_stack_t *info,
27142 bool using_static_chain_p)
27143 {
27144 int strategy = 0;
27145
27146 /* Select between in-line and out-of-line save and restore of regs.
27147 First, all the obvious cases where we don't use out-of-line. */
27148 if (crtl->calls_eh_return
27149 || cfun->machine->ra_need_lr)
27150 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
27151 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
27152 | SAVE_INLINE_VRS | REST_INLINE_VRS);
27153
27154 if (info->first_gp_reg_save == 32)
27155 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27156
27157 if (info->first_fp_reg_save == 64
27158 /* The out-of-line FP routines use double-precision stores;
27159 we can't use those routines if we don't have such stores. */
27160 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
27161 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27162
27163 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
27164 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27165
27166 /* Define cutoff for using out-of-line functions to save registers. */
27167 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
27168 {
27169 if (!optimize_size)
27170 {
27171 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27172 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27173 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27174 }
27175 else
27176 {
27177 /* Prefer out-of-line restore if it will exit. */
27178 if (info->first_fp_reg_save > 61)
27179 strategy |= SAVE_INLINE_FPRS;
27180 if (info->first_gp_reg_save > 29)
27181 {
27182 if (info->first_fp_reg_save == 64)
27183 strategy |= SAVE_INLINE_GPRS;
27184 else
27185 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27186 }
27187 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
27188 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27189 }
27190 }
27191 else if (DEFAULT_ABI == ABI_DARWIN)
27192 {
27193 if (info->first_fp_reg_save > 60)
27194 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27195 if (info->first_gp_reg_save > 29)
27196 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27197 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27198 }
27199 else
27200 {
27201 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27202 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
27203 || info->first_fp_reg_save > 61)
27204 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
27205 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27206 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
27207 }
27208
27209 /* Don't bother to try to save things out-of-line if r11 is occupied
27210 by the static chain. It would require too much fiddling and the
27211 static chain is rarely used anyway. FPRs are saved w.r.t the stack
27212 pointer on Darwin, and AIX uses r1 or r12. */
27213 if (using_static_chain_p
27214 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
27215 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
27216 | SAVE_INLINE_GPRS
27217 | SAVE_INLINE_VRS);
27218
27219 /* Saving CR interferes with the exit routines used on the SPE, so
27220 just punt here. */
27221 if (TARGET_SPE_ABI
27222 && info->spe_64bit_regs_used
27223 && info->cr_save_p)
27224 strategy |= REST_INLINE_GPRS;
27225
27226 /* We can only use the out-of-line routines to restore fprs if we've
27227 saved all the registers from first_fp_reg_save in the prologue.
27228 Otherwise, we risk loading garbage. Of course, if we have saved
27229 out-of-line then we know we haven't skipped any fprs. */
27230 if ((strategy & SAVE_INLINE_FPRS)
27231 && !(strategy & REST_INLINE_FPRS))
27232 {
27233 int i;
27234
27235 for (i = info->first_fp_reg_save; i < 64; i++)
27236 if (fixed_regs[i] || !save_reg_p (i))
27237 {
27238 strategy |= REST_INLINE_FPRS;
27239 break;
27240 }
27241 }
27242
27243 /* Similarly, for altivec regs. */
27244 if ((strategy & SAVE_INLINE_VRS)
27245 && !(strategy & REST_INLINE_VRS))
27246 {
27247 int i;
27248
27249 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
27250 if (fixed_regs[i] || !save_reg_p (i))
27251 {
27252 strategy |= REST_INLINE_VRS;
27253 break;
27254 }
27255 }
27256
27257 /* info->lr_save_p isn't yet set if the only reason lr needs to be
27258 saved is an out-of-line save or restore. Set up the value for
27259 the next test (excluding out-of-line gprs). */
27260 bool lr_save_p = (info->lr_save_p
27261 || !(strategy & SAVE_INLINE_FPRS)
27262 || !(strategy & SAVE_INLINE_VRS)
27263 || !(strategy & REST_INLINE_FPRS)
27264 || !(strategy & REST_INLINE_VRS));
27265
27266 if (TARGET_MULTIPLE
27267 && !TARGET_POWERPC64
27268 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
27269 && info->first_gp_reg_save < 31
27270 && !(flag_shrink_wrap
27271 && flag_shrink_wrap_separate
27272 && optimize_function_for_speed_p (cfun)))
27273 {
27274 /* Prefer store multiple for saves over out-of-line routines,
27275 since the store-multiple instruction will always be smaller. */
27276 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
27277
27278 /* The situation is more complicated with load multiple. We'd
27279 prefer to use the out-of-line routines for restores, since the
27280 "exit" out-of-line routines can handle the restore of LR and the
27281 frame teardown. However if doesn't make sense to use the
27282 out-of-line routine if that is the only reason we'd need to save
27283 LR, and we can't use the "exit" out-of-line gpr restore if we
27284 have saved some fprs; In those cases it is advantageous to use
27285 load multiple when available. */
27286 if (info->first_fp_reg_save != 64 || !lr_save_p)
27287 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
27288 }
27289
27290 /* Using the "exit" out-of-line routine does not improve code size
27291 if using it would require lr to be saved and if only saving one
27292 or two gprs. */
27293 else if (!lr_save_p && info->first_gp_reg_save > 29)
27294 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
27295
27296 /* We can only use load multiple or the out-of-line routines to
27297 restore gprs if we've saved all the registers from
27298 first_gp_reg_save. Otherwise, we risk loading garbage.
27299 Of course, if we have saved out-of-line or used stmw then we know
27300 we haven't skipped any gprs. */
27301 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
27302 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
27303 {
27304 int i;
27305
27306 for (i = info->first_gp_reg_save; i < 32; i++)
27307 if (fixed_reg_p (i) || !save_reg_p (i))
27308 {
27309 strategy |= REST_INLINE_GPRS;
27310 strategy &= ~REST_MULTIPLE;
27311 break;
27312 }
27313 }
27314
27315 if (TARGET_ELF && TARGET_64BIT)
27316 {
27317 if (!(strategy & SAVE_INLINE_FPRS))
27318 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27319 else if (!(strategy & SAVE_INLINE_GPRS)
27320 && info->first_fp_reg_save == 64)
27321 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
27322 }
27323 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
27324 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
27325
27326 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
27327 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
27328
27329 return strategy;
27330 }
27331
27332 /* Calculate the stack information for the current function. This is
27333 complicated by having two separate calling sequences, the AIX calling
27334 sequence and the V.4 calling sequence.
27335
27336 AIX (and Darwin/Mac OS X) stack frames look like:
27337 32-bit 64-bit
27338 SP----> +---------------------------------------+
27339 | back chain to caller | 0 0
27340 +---------------------------------------+
27341 | saved CR | 4 8 (8-11)
27342 +---------------------------------------+
27343 | saved LR | 8 16
27344 +---------------------------------------+
27345 | reserved for compilers | 12 24
27346 +---------------------------------------+
27347 | reserved for binders | 16 32
27348 +---------------------------------------+
27349 | saved TOC pointer | 20 40
27350 +---------------------------------------+
27351 | Parameter save area (+padding*) (P) | 24 48
27352 +---------------------------------------+
27353 | Alloca space (A) | 24+P etc.
27354 +---------------------------------------+
27355 | Local variable space (L) | 24+P+A
27356 +---------------------------------------+
27357 | Float/int conversion temporary (X) | 24+P+A+L
27358 +---------------------------------------+
27359 | Save area for AltiVec registers (W) | 24+P+A+L+X
27360 +---------------------------------------+
27361 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
27362 +---------------------------------------+
27363 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
27364 +---------------------------------------+
27365 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
27366 +---------------------------------------+
27367 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
27368 +---------------------------------------+
27369 old SP->| back chain to caller's caller |
27370 +---------------------------------------+
27371
27372 * If the alloca area is present, the parameter save area is
27373 padded so that the former starts 16-byte aligned.
27374
27375 The required alignment for AIX configurations is two words (i.e., 8
27376 or 16 bytes).
27377
27378 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
27379
27380 SP----> +---------------------------------------+
27381 | Back chain to caller | 0
27382 +---------------------------------------+
27383 | Save area for CR | 8
27384 +---------------------------------------+
27385 | Saved LR | 16
27386 +---------------------------------------+
27387 | Saved TOC pointer | 24
27388 +---------------------------------------+
27389 | Parameter save area (+padding*) (P) | 32
27390 +---------------------------------------+
27391 | Alloca space (A) | 32+P
27392 +---------------------------------------+
27393 | Local variable space (L) | 32+P+A
27394 +---------------------------------------+
27395 | Save area for AltiVec registers (W) | 32+P+A+L
27396 +---------------------------------------+
27397 | AltiVec alignment padding (Y) | 32+P+A+L+W
27398 +---------------------------------------+
27399 | Save area for GP registers (G) | 32+P+A+L+W+Y
27400 +---------------------------------------+
27401 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
27402 +---------------------------------------+
27403 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
27404 +---------------------------------------+
27405
27406 * If the alloca area is present, the parameter save area is
27407 padded so that the former starts 16-byte aligned.
27408
27409 V.4 stack frames look like:
27410
27411 SP----> +---------------------------------------+
27412 | back chain to caller | 0
27413 +---------------------------------------+
27414 | caller's saved LR | 4
27415 +---------------------------------------+
27416 | Parameter save area (+padding*) (P) | 8
27417 +---------------------------------------+
27418 | Alloca space (A) | 8+P
27419 +---------------------------------------+
27420 | Varargs save area (V) | 8+P+A
27421 +---------------------------------------+
27422 | Local variable space (L) | 8+P+A+V
27423 +---------------------------------------+
27424 | Float/int conversion temporary (X) | 8+P+A+V+L
27425 +---------------------------------------+
27426 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
27427 +---------------------------------------+
27428 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
27429 +---------------------------------------+
27430 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
27431 +---------------------------------------+
27432 | SPE: area for 64-bit GP registers |
27433 +---------------------------------------+
27434 | SPE alignment padding |
27435 +---------------------------------------+
27436 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
27437 +---------------------------------------+
27438 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
27439 +---------------------------------------+
27440 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
27441 +---------------------------------------+
27442 old SP->| back chain to caller's caller |
27443 +---------------------------------------+
27444
27445 * If the alloca area is present and the required alignment is
27446 16 bytes, the parameter save area is padded so that the
27447 alloca area starts 16-byte aligned.
27448
27449 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
27450 given. (But note below and in sysv4.h that we require only 8 and
27451 may round up the size of our stack frame anyways. The historical
27452 reason is early versions of powerpc-linux which didn't properly
27453 align the stack at program startup. A happy side-effect is that
27454 -mno-eabi libraries can be used with -meabi programs.)
27455
27456 The EABI configuration defaults to the V.4 layout. However,
27457 the stack alignment requirements may differ. If -mno-eabi is not
27458 given, the required stack alignment is 8 bytes; if -mno-eabi is
27459 given, the required alignment is 16 bytes. (But see V.4 comment
27460 above.) */
27461
27462 #ifndef ABI_STACK_BOUNDARY
27463 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
27464 #endif
27465
27466 static rs6000_stack_t *
27467 rs6000_stack_info (void)
27468 {
27469 /* We should never be called for thunks, we are not set up for that. */
27470 gcc_assert (!cfun->is_thunk);
27471
27472 rs6000_stack_t *info = &stack_info;
27473 int reg_size = TARGET_32BIT ? 4 : 8;
27474 int ehrd_size;
27475 int ehcr_size;
27476 int save_align;
27477 int first_gp;
27478 HOST_WIDE_INT non_fixed_size;
27479 bool using_static_chain_p;
27480
27481 if (reload_completed && info->reload_completed)
27482 return info;
27483
27484 memset (info, 0, sizeof (*info));
27485 info->reload_completed = reload_completed;
27486
27487 if (TARGET_SPE)
27488 {
27489 /* Cache value so we don't rescan instruction chain over and over. */
27490 if (cfun->machine->spe_insn_chain_scanned_p == 0)
27491 cfun->machine->spe_insn_chain_scanned_p
27492 = spe_func_has_64bit_regs_p () + 1;
27493 info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1;
27494 }
27495
27496 /* Select which calling sequence. */
27497 info->abi = DEFAULT_ABI;
27498
27499 /* Calculate which registers need to be saved & save area size. */
27500 info->first_gp_reg_save = first_reg_to_save ();
27501 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
27502 even if it currently looks like we won't. Reload may need it to
27503 get at a constant; if so, it will have already created a constant
27504 pool entry for it. */
27505 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
27506 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
27507 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
27508 && crtl->uses_const_pool
27509 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
27510 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
27511 else
27512 first_gp = info->first_gp_reg_save;
27513
27514 info->gp_size = reg_size * (32 - first_gp);
27515
27516 /* For the SPE, we have an additional upper 32-bits on each GPR.
27517 Ideally we should save the entire 64-bits only when the upper
27518 half is used in SIMD instructions. Since we only record
27519 registers live (not the size they are used in), this proves
27520 difficult because we'd have to traverse the instruction chain at
27521 the right time, taking reload into account. This is a real pain,
27522 so we opt to save the GPRs in 64-bits always if but one register
27523 gets used in 64-bits. Otherwise, all the registers in the frame
27524 get saved in 32-bits.
27525
27526 So... since when we save all GPRs (except the SP) in 64-bits, the
27527 traditional GP save area will be empty. */
27528 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27529 info->gp_size = 0;
27530
27531 info->first_fp_reg_save = first_fp_reg_to_save ();
27532 info->fp_size = 8 * (64 - info->first_fp_reg_save);
27533
27534 info->first_altivec_reg_save = first_altivec_reg_to_save ();
27535 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
27536 - info->first_altivec_reg_save);
27537
27538 /* Does this function call anything? */
27539 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
27540
27541 /* Determine if we need to save the condition code registers. */
27542 if (save_reg_p (CR2_REGNO)
27543 || save_reg_p (CR3_REGNO)
27544 || save_reg_p (CR4_REGNO))
27545 {
27546 info->cr_save_p = 1;
27547 if (DEFAULT_ABI == ABI_V4)
27548 info->cr_size = reg_size;
27549 }
27550
27551 /* If the current function calls __builtin_eh_return, then we need
27552 to allocate stack space for registers that will hold data for
27553 the exception handler. */
27554 if (crtl->calls_eh_return)
27555 {
27556 unsigned int i;
27557 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
27558 continue;
27559
27560 /* SPE saves EH registers in 64-bits. */
27561 ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0
27562 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
27563 }
27564 else
27565 ehrd_size = 0;
27566
27567 /* In the ELFv2 ABI, we also need to allocate space for separate
27568 CR field save areas if the function calls __builtin_eh_return. */
27569 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27570 {
27571 /* This hard-codes that we have three call-saved CR fields. */
27572 ehcr_size = 3 * reg_size;
27573 /* We do *not* use the regular CR save mechanism. */
27574 info->cr_save_p = 0;
27575 }
27576 else
27577 ehcr_size = 0;
27578
27579 /* Determine various sizes. */
27580 info->reg_size = reg_size;
27581 info->fixed_size = RS6000_SAVE_AREA;
27582 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
27583 if (cfun->calls_alloca)
27584 info->parm_size =
27585 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
27586 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
27587 else
27588 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
27589 TARGET_ALTIVEC ? 16 : 8);
27590 if (FRAME_GROWS_DOWNWARD)
27591 info->vars_size
27592 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
27593 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
27594 - (info->fixed_size + info->vars_size + info->parm_size);
27595
27596 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27597 info->spe_gp_size = 8 * (32 - first_gp);
27598
27599 if (TARGET_ALTIVEC_ABI)
27600 info->vrsave_mask = compute_vrsave_mask ();
27601
27602 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
27603 info->vrsave_size = 4;
27604
27605 compute_save_world_info (info);
27606
27607 /* Calculate the offsets. */
27608 switch (DEFAULT_ABI)
27609 {
27610 case ABI_NONE:
27611 default:
27612 gcc_unreachable ();
27613
27614 case ABI_AIX:
27615 case ABI_ELFv2:
27616 case ABI_DARWIN:
27617 info->fp_save_offset = -info->fp_size;
27618 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27619
27620 if (TARGET_ALTIVEC_ABI)
27621 {
27622 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
27623
27624 /* Align stack so vector save area is on a quadword boundary.
27625 The padding goes above the vectors. */
27626 if (info->altivec_size != 0)
27627 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
27628
27629 info->altivec_save_offset = info->vrsave_save_offset
27630 - info->altivec_padding_size
27631 - info->altivec_size;
27632 gcc_assert (info->altivec_size == 0
27633 || info->altivec_save_offset % 16 == 0);
27634
27635 /* Adjust for AltiVec case. */
27636 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
27637 }
27638 else
27639 info->ehrd_offset = info->gp_save_offset - ehrd_size;
27640
27641 info->ehcr_offset = info->ehrd_offset - ehcr_size;
27642 info->cr_save_offset = reg_size; /* first word when 64-bit. */
27643 info->lr_save_offset = 2*reg_size;
27644 break;
27645
27646 case ABI_V4:
27647 info->fp_save_offset = -info->fp_size;
27648 info->gp_save_offset = info->fp_save_offset - info->gp_size;
27649 info->cr_save_offset = info->gp_save_offset - info->cr_size;
27650
27651 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
27652 {
27653 /* Align stack so SPE GPR save area is aligned on a
27654 double-word boundary. */
27655 if (info->spe_gp_size != 0 && info->cr_save_offset != 0)
27656 info->spe_padding_size = 8 - (-info->cr_save_offset % 8);
27657 else
27658 info->spe_padding_size = 0;
27659
27660 info->spe_gp_save_offset = info->cr_save_offset
27661 - info->spe_padding_size
27662 - info->spe_gp_size;
27663
27664 /* Adjust for SPE case. */
27665 info->ehrd_offset = info->spe_gp_save_offset;
27666 }
27667 else if (TARGET_ALTIVEC_ABI)
27668 {
27669 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
27670
27671 /* Align stack so vector save area is on a quadword boundary. */
27672 if (info->altivec_size != 0)
27673 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
27674
27675 info->altivec_save_offset = info->vrsave_save_offset
27676 - info->altivec_padding_size
27677 - info->altivec_size;
27678
27679 /* Adjust for AltiVec case. */
27680 info->ehrd_offset = info->altivec_save_offset;
27681 }
27682 else
27683 info->ehrd_offset = info->cr_save_offset;
27684
27685 info->ehrd_offset -= ehrd_size;
27686 info->lr_save_offset = reg_size;
27687 }
27688
27689 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
27690 info->save_size = RS6000_ALIGN (info->fp_size
27691 + info->gp_size
27692 + info->altivec_size
27693 + info->altivec_padding_size
27694 + info->spe_gp_size
27695 + info->spe_padding_size
27696 + ehrd_size
27697 + ehcr_size
27698 + info->cr_size
27699 + info->vrsave_size,
27700 save_align);
27701
27702 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
27703
27704 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
27705 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
27706
27707 /* Determine if we need to save the link register. */
27708 if (info->calls_p
27709 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27710 && crtl->profile
27711 && !TARGET_PROFILE_KERNEL)
27712 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
27713 #ifdef TARGET_RELOCATABLE
27714 || (DEFAULT_ABI == ABI_V4
27715 && (TARGET_RELOCATABLE || flag_pic > 1)
27716 && !constant_pool_empty_p ())
27717 #endif
27718 || rs6000_ra_ever_killed ())
27719 info->lr_save_p = 1;
27720
27721 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27722 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27723 && call_used_regs[STATIC_CHAIN_REGNUM]);
27724 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
27725
27726 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
27727 || !(info->savres_strategy & SAVE_INLINE_FPRS)
27728 || !(info->savres_strategy & SAVE_INLINE_VRS)
27729 || !(info->savres_strategy & REST_INLINE_GPRS)
27730 || !(info->savres_strategy & REST_INLINE_FPRS)
27731 || !(info->savres_strategy & REST_INLINE_VRS))
27732 info->lr_save_p = 1;
27733
27734 if (info->lr_save_p)
27735 df_set_regs_ever_live (LR_REGNO, true);
27736
27737 /* Determine if we need to allocate any stack frame:
27738
27739 For AIX we need to push the stack if a frame pointer is needed
27740 (because the stack might be dynamically adjusted), if we are
27741 debugging, if we make calls, or if the sum of fp_save, gp_save,
27742 and local variables are more than the space needed to save all
27743 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
27744 + 18*8 = 288 (GPR13 reserved).
27745
27746 For V.4 we don't have the stack cushion that AIX uses, but assume
27747 that the debugger can handle stackless frames. */
27748
27749 if (info->calls_p)
27750 info->push_p = 1;
27751
27752 else if (DEFAULT_ABI == ABI_V4)
27753 info->push_p = non_fixed_size != 0;
27754
27755 else if (frame_pointer_needed)
27756 info->push_p = 1;
27757
27758 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
27759 info->push_p = 1;
27760
27761 else
27762 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
27763
27764 return info;
27765 }
27766
27767 /* Return true if the current function uses any GPRs in 64-bit SIMD
27768 mode. */
27769
27770 static bool
27771 spe_func_has_64bit_regs_p (void)
27772 {
27773 rtx_insn *insns, *insn;
27774
27775 /* Functions that save and restore all the call-saved registers will
27776 need to save/restore the registers in 64-bits. */
27777 if (crtl->calls_eh_return
27778 || cfun->calls_setjmp
27779 || crtl->has_nonlocal_goto)
27780 return true;
27781
27782 insns = get_insns ();
27783
27784 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
27785 {
27786 if (INSN_P (insn))
27787 {
27788 rtx i;
27789
27790 /* FIXME: This should be implemented with attributes...
27791
27792 (set_attr "spe64" "true")....then,
27793 if (get_spe64(insn)) return true;
27794
27795 It's the only reliable way to do the stuff below. */
27796
27797 i = PATTERN (insn);
27798 if (GET_CODE (i) == SET)
27799 {
27800 machine_mode mode = GET_MODE (SET_SRC (i));
27801
27802 if (SPE_VECTOR_MODE (mode))
27803 return true;
27804 if (TARGET_E500_DOUBLE
27805 && (mode == DFmode || FLOAT128_2REG_P (mode)))
27806 return true;
27807 }
27808 }
27809 }
27810
27811 return false;
27812 }
27813
27814 static void
27815 debug_stack_info (rs6000_stack_t *info)
27816 {
27817 const char *abi_string;
27818
27819 if (! info)
27820 info = rs6000_stack_info ();
27821
27822 fprintf (stderr, "\nStack information for function %s:\n",
27823 ((current_function_decl && DECL_NAME (current_function_decl))
27824 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
27825 : "<unknown>"));
27826
27827 switch (info->abi)
27828 {
27829 default: abi_string = "Unknown"; break;
27830 case ABI_NONE: abi_string = "NONE"; break;
27831 case ABI_AIX: abi_string = "AIX"; break;
27832 case ABI_ELFv2: abi_string = "ELFv2"; break;
27833 case ABI_DARWIN: abi_string = "Darwin"; break;
27834 case ABI_V4: abi_string = "V.4"; break;
27835 }
27836
27837 fprintf (stderr, "\tABI = %5s\n", abi_string);
27838
27839 if (TARGET_ALTIVEC_ABI)
27840 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
27841
27842 if (TARGET_SPE_ABI)
27843 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
27844
27845 if (info->first_gp_reg_save != 32)
27846 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
27847
27848 if (info->first_fp_reg_save != 64)
27849 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
27850
27851 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
27852 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
27853 info->first_altivec_reg_save);
27854
27855 if (info->lr_save_p)
27856 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
27857
27858 if (info->cr_save_p)
27859 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
27860
27861 if (info->vrsave_mask)
27862 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
27863
27864 if (info->push_p)
27865 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
27866
27867 if (info->calls_p)
27868 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
27869
27870 if (info->gp_size)
27871 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
27872
27873 if (info->fp_size)
27874 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
27875
27876 if (info->altivec_size)
27877 fprintf (stderr, "\taltivec_save_offset = %5d\n",
27878 info->altivec_save_offset);
27879
27880 if (info->spe_gp_size)
27881 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
27882 info->spe_gp_save_offset);
27883
27884 if (info->vrsave_size)
27885 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
27886 info->vrsave_save_offset);
27887
27888 if (info->lr_save_p)
27889 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
27890
27891 if (info->cr_save_p)
27892 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
27893
27894 if (info->varargs_save_offset)
27895 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
27896
27897 if (info->total_size)
27898 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27899 info->total_size);
27900
27901 if (info->vars_size)
27902 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
27903 info->vars_size);
27904
27905 if (info->parm_size)
27906 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
27907
27908 if (info->fixed_size)
27909 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
27910
27911 if (info->gp_size)
27912 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
27913
27914 if (info->spe_gp_size)
27915 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
27916
27917 if (info->fp_size)
27918 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
27919
27920 if (info->altivec_size)
27921 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
27922
27923 if (info->vrsave_size)
27924 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
27925
27926 if (info->altivec_padding_size)
27927 fprintf (stderr, "\taltivec_padding_size= %5d\n",
27928 info->altivec_padding_size);
27929
27930 if (info->spe_padding_size)
27931 fprintf (stderr, "\tspe_padding_size = %5d\n",
27932 info->spe_padding_size);
27933
27934 if (info->cr_size)
27935 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
27936
27937 if (info->save_size)
27938 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
27939
27940 if (info->reg_size != 4)
27941 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
27942
27943 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
27944
27945 fprintf (stderr, "\n");
27946 }
27947
27948 rtx
27949 rs6000_return_addr (int count, rtx frame)
27950 {
27951 /* Currently we don't optimize very well between prolog and body
27952 code and for PIC code the code can be actually quite bad, so
27953 don't try to be too clever here. */
27954 if (count != 0
27955 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
27956 {
27957 cfun->machine->ra_needs_full_frame = 1;
27958
27959 return
27960 gen_rtx_MEM
27961 (Pmode,
27962 memory_address
27963 (Pmode,
27964 plus_constant (Pmode,
27965 copy_to_reg
27966 (gen_rtx_MEM (Pmode,
27967 memory_address (Pmode, frame))),
27968 RETURN_ADDRESS_OFFSET)));
27969 }
27970
27971 cfun->machine->ra_need_lr = 1;
27972 return get_hard_reg_initial_val (Pmode, LR_REGNO);
27973 }
27974
27975 /* Say whether a function is a candidate for sibcall handling or not. */
27976
27977 static bool
27978 rs6000_function_ok_for_sibcall (tree decl, tree exp)
27979 {
27980 tree fntype;
27981
27982 if (decl)
27983 fntype = TREE_TYPE (decl);
27984 else
27985 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
27986
27987 /* We can't do it if the called function has more vector parameters
27988 than the current function; there's nowhere to put the VRsave code. */
27989 if (TARGET_ALTIVEC_ABI
27990 && TARGET_ALTIVEC_VRSAVE
27991 && !(decl && decl == current_function_decl))
27992 {
27993 function_args_iterator args_iter;
27994 tree type;
27995 int nvreg = 0;
27996
27997 /* Functions with vector parameters are required to have a
27998 prototype, so the argument type info must be available
27999 here. */
28000 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
28001 if (TREE_CODE (type) == VECTOR_TYPE
28002 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28003 nvreg++;
28004
28005 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
28006 if (TREE_CODE (type) == VECTOR_TYPE
28007 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
28008 nvreg--;
28009
28010 if (nvreg > 0)
28011 return false;
28012 }
28013
28014 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
28015 functions, because the callee may have a different TOC pointer to
28016 the caller and there's no way to ensure we restore the TOC when
28017 we return. With the secure-plt SYSV ABI we can't make non-local
28018 calls when -fpic/PIC because the plt call stubs use r30. */
28019 if (DEFAULT_ABI == ABI_DARWIN
28020 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28021 && decl
28022 && !DECL_EXTERNAL (decl)
28023 && !DECL_WEAK (decl)
28024 && (*targetm.binds_local_p) (decl))
28025 || (DEFAULT_ABI == ABI_V4
28026 && (!TARGET_SECURE_PLT
28027 || !flag_pic
28028 || (decl
28029 && (*targetm.binds_local_p) (decl)))))
28030 {
28031 tree attr_list = TYPE_ATTRIBUTES (fntype);
28032
28033 if (!lookup_attribute ("longcall", attr_list)
28034 || lookup_attribute ("shortcall", attr_list))
28035 return true;
28036 }
28037
28038 return false;
28039 }
28040
28041 static int
28042 rs6000_ra_ever_killed (void)
28043 {
28044 rtx_insn *top;
28045 rtx reg;
28046 rtx_insn *insn;
28047
28048 if (cfun->is_thunk)
28049 return 0;
28050
28051 if (cfun->machine->lr_save_state)
28052 return cfun->machine->lr_save_state - 1;
28053
28054 /* regs_ever_live has LR marked as used if any sibcalls are present,
28055 but this should not force saving and restoring in the
28056 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
28057 clobbers LR, so that is inappropriate. */
28058
28059 /* Also, the prologue can generate a store into LR that
28060 doesn't really count, like this:
28061
28062 move LR->R0
28063 bcl to set PIC register
28064 move LR->R31
28065 move R0->LR
28066
28067 When we're called from the epilogue, we need to avoid counting
28068 this as a store. */
28069
28070 push_topmost_sequence ();
28071 top = get_insns ();
28072 pop_topmost_sequence ();
28073 reg = gen_rtx_REG (Pmode, LR_REGNO);
28074
28075 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
28076 {
28077 if (INSN_P (insn))
28078 {
28079 if (CALL_P (insn))
28080 {
28081 if (!SIBLING_CALL_P (insn))
28082 return 1;
28083 }
28084 else if (find_regno_note (insn, REG_INC, LR_REGNO))
28085 return 1;
28086 else if (set_of (reg, insn) != NULL_RTX
28087 && !prologue_epilogue_contains (insn))
28088 return 1;
28089 }
28090 }
28091 return 0;
28092 }
28093 \f
28094 /* Emit instructions needed to load the TOC register.
28095 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
28096 a constant pool; or for SVR4 -fpic. */
28097
28098 void
28099 rs6000_emit_load_toc_table (int fromprolog)
28100 {
28101 rtx dest;
28102 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
28103
28104 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
28105 {
28106 char buf[30];
28107 rtx lab, tmp1, tmp2, got;
28108
28109 lab = gen_label_rtx ();
28110 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
28111 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28112 if (flag_pic == 2)
28113 {
28114 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28115 need_toc_init = 1;
28116 }
28117 else
28118 got = rs6000_got_sym ();
28119 tmp1 = tmp2 = dest;
28120 if (!fromprolog)
28121 {
28122 tmp1 = gen_reg_rtx (Pmode);
28123 tmp2 = gen_reg_rtx (Pmode);
28124 }
28125 emit_insn (gen_load_toc_v4_PIC_1 (lab));
28126 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
28127 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
28128 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
28129 }
28130 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
28131 {
28132 emit_insn (gen_load_toc_v4_pic_si ());
28133 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28134 }
28135 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
28136 {
28137 char buf[30];
28138 rtx temp0 = (fromprolog
28139 ? gen_rtx_REG (Pmode, 0)
28140 : gen_reg_rtx (Pmode));
28141
28142 if (fromprolog)
28143 {
28144 rtx symF, symL;
28145
28146 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
28147 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28148
28149 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
28150 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
28151
28152 emit_insn (gen_load_toc_v4_PIC_1 (symF));
28153 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28154 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
28155 }
28156 else
28157 {
28158 rtx tocsym, lab;
28159
28160 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28161 need_toc_init = 1;
28162 lab = gen_label_rtx ();
28163 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
28164 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
28165 if (TARGET_LINK_STACK)
28166 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
28167 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
28168 }
28169 emit_insn (gen_addsi3 (dest, temp0, dest));
28170 }
28171 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
28172 {
28173 /* This is for AIX code running in non-PIC ELF32. */
28174 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
28175
28176 need_toc_init = 1;
28177 emit_insn (gen_elf_high (dest, realsym));
28178 emit_insn (gen_elf_low (dest, dest, realsym));
28179 }
28180 else
28181 {
28182 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
28183
28184 if (TARGET_32BIT)
28185 emit_insn (gen_load_toc_aix_si (dest));
28186 else
28187 emit_insn (gen_load_toc_aix_di (dest));
28188 }
28189 }
28190
28191 /* Emit instructions to restore the link register after determining where
28192 its value has been stored. */
28193
28194 void
28195 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
28196 {
28197 rs6000_stack_t *info = rs6000_stack_info ();
28198 rtx operands[2];
28199
28200 operands[0] = source;
28201 operands[1] = scratch;
28202
28203 if (info->lr_save_p)
28204 {
28205 rtx frame_rtx = stack_pointer_rtx;
28206 HOST_WIDE_INT sp_offset = 0;
28207 rtx tmp;
28208
28209 if (frame_pointer_needed
28210 || cfun->calls_alloca
28211 || info->total_size > 32767)
28212 {
28213 tmp = gen_frame_mem (Pmode, frame_rtx);
28214 emit_move_insn (operands[1], tmp);
28215 frame_rtx = operands[1];
28216 }
28217 else if (info->push_p)
28218 sp_offset = info->total_size;
28219
28220 tmp = plus_constant (Pmode, frame_rtx,
28221 info->lr_save_offset + sp_offset);
28222 tmp = gen_frame_mem (Pmode, tmp);
28223 emit_move_insn (tmp, operands[0]);
28224 }
28225 else
28226 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
28227
28228 /* Freeze lr_save_p. We've just emitted rtl that depends on the
28229 state of lr_save_p so any change from here on would be a bug. In
28230 particular, stop rs6000_ra_ever_killed from considering the SET
28231 of lr we may have added just above. */
28232 cfun->machine->lr_save_state = info->lr_save_p + 1;
28233 }
28234
28235 static GTY(()) alias_set_type set = -1;
28236
28237 alias_set_type
28238 get_TOC_alias_set (void)
28239 {
28240 if (set == -1)
28241 set = new_alias_set ();
28242 return set;
28243 }
28244
28245 /* This returns nonzero if the current function uses the TOC. This is
28246 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
28247 is generated by the ABI_V4 load_toc_* patterns. */
28248 #if TARGET_ELF
28249 static int
28250 uses_TOC (void)
28251 {
28252 rtx_insn *insn;
28253
28254 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
28255 if (INSN_P (insn))
28256 {
28257 rtx pat = PATTERN (insn);
28258 int i;
28259
28260 if (GET_CODE (pat) == PARALLEL)
28261 for (i = 0; i < XVECLEN (pat, 0); i++)
28262 {
28263 rtx sub = XVECEXP (pat, 0, i);
28264 if (GET_CODE (sub) == USE)
28265 {
28266 sub = XEXP (sub, 0);
28267 if (GET_CODE (sub) == UNSPEC
28268 && XINT (sub, 1) == UNSPEC_TOC)
28269 return 1;
28270 }
28271 }
28272 }
28273 return 0;
28274 }
28275 #endif
28276
28277 rtx
28278 create_TOC_reference (rtx symbol, rtx largetoc_reg)
28279 {
28280 rtx tocrel, tocreg, hi;
28281
28282 if (TARGET_DEBUG_ADDR)
28283 {
28284 if (GET_CODE (symbol) == SYMBOL_REF)
28285 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
28286 XSTR (symbol, 0));
28287 else
28288 {
28289 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
28290 GET_RTX_NAME (GET_CODE (symbol)));
28291 debug_rtx (symbol);
28292 }
28293 }
28294
28295 if (!can_create_pseudo_p ())
28296 df_set_regs_ever_live (TOC_REGISTER, true);
28297
28298 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
28299 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
28300 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
28301 return tocrel;
28302
28303 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
28304 if (largetoc_reg != NULL)
28305 {
28306 emit_move_insn (largetoc_reg, hi);
28307 hi = largetoc_reg;
28308 }
28309 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
28310 }
28311
28312 /* Issue assembly directives that create a reference to the given DWARF
28313 FRAME_TABLE_LABEL from the current function section. */
28314 void
28315 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
28316 {
28317 fprintf (asm_out_file, "\t.ref %s\n",
28318 (* targetm.strip_name_encoding) (frame_table_label));
28319 }
28320 \f
28321 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
28322 and the change to the stack pointer. */
28323
28324 static void
28325 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
28326 {
28327 rtvec p;
28328 int i;
28329 rtx regs[3];
28330
28331 i = 0;
28332 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28333 if (hard_frame_needed)
28334 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
28335 if (!(REGNO (fp) == STACK_POINTER_REGNUM
28336 || (hard_frame_needed
28337 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
28338 regs[i++] = fp;
28339
28340 p = rtvec_alloc (i);
28341 while (--i >= 0)
28342 {
28343 rtx mem = gen_frame_mem (BLKmode, regs[i]);
28344 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
28345 }
28346
28347 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
28348 }
28349
28350 /* Emit the correct code for allocating stack space, as insns.
28351 If COPY_REG, make sure a copy of the old frame is left there.
28352 The generated code may use hard register 0 as a temporary. */
28353
28354 static rtx_insn *
28355 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
28356 {
28357 rtx_insn *insn;
28358 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28359 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
28360 rtx todec = gen_int_mode (-size, Pmode);
28361 rtx par, set, mem;
28362
28363 if (INTVAL (todec) != -size)
28364 {
28365 warning (0, "stack frame too large");
28366 emit_insn (gen_trap ());
28367 return 0;
28368 }
28369
28370 if (crtl->limit_stack)
28371 {
28372 if (REG_P (stack_limit_rtx)
28373 && REGNO (stack_limit_rtx) > 1
28374 && REGNO (stack_limit_rtx) <= 31)
28375 {
28376 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
28377 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28378 const0_rtx));
28379 }
28380 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
28381 && TARGET_32BIT
28382 && DEFAULT_ABI == ABI_V4
28383 && !flag_pic)
28384 {
28385 rtx toload = gen_rtx_CONST (VOIDmode,
28386 gen_rtx_PLUS (Pmode,
28387 stack_limit_rtx,
28388 GEN_INT (size)));
28389
28390 emit_insn (gen_elf_high (tmp_reg, toload));
28391 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
28392 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
28393 const0_rtx));
28394 }
28395 else
28396 warning (0, "stack limit expression is not supported");
28397 }
28398
28399 if (copy_reg)
28400 {
28401 if (copy_off != 0)
28402 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
28403 else
28404 emit_move_insn (copy_reg, stack_reg);
28405 }
28406
28407 if (size > 32767)
28408 {
28409 /* Need a note here so that try_split doesn't get confused. */
28410 if (get_last_insn () == NULL_RTX)
28411 emit_note (NOTE_INSN_DELETED);
28412 insn = emit_move_insn (tmp_reg, todec);
28413 try_split (PATTERN (insn), insn, 0);
28414 todec = tmp_reg;
28415 }
28416
28417 insn = emit_insn (TARGET_32BIT
28418 ? gen_movsi_update_stack (stack_reg, stack_reg,
28419 todec, stack_reg)
28420 : gen_movdi_di_update_stack (stack_reg, stack_reg,
28421 todec, stack_reg));
28422 /* Since we didn't use gen_frame_mem to generate the MEM, grab
28423 it now and set the alias set/attributes. The above gen_*_update
28424 calls will generate a PARALLEL with the MEM set being the first
28425 operation. */
28426 par = PATTERN (insn);
28427 gcc_assert (GET_CODE (par) == PARALLEL);
28428 set = XVECEXP (par, 0, 0);
28429 gcc_assert (GET_CODE (set) == SET);
28430 mem = SET_DEST (set);
28431 gcc_assert (MEM_P (mem));
28432 MEM_NOTRAP_P (mem) = 1;
28433 set_mem_alias_set (mem, get_frame_alias_set ());
28434
28435 RTX_FRAME_RELATED_P (insn) = 1;
28436 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
28437 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
28438 GEN_INT (-size))));
28439 return insn;
28440 }
28441
28442 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
28443
28444 #if PROBE_INTERVAL > 32768
28445 #error Cannot use indexed addressing mode for stack probing
28446 #endif
28447
28448 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
28449 inclusive. These are offsets from the current stack pointer. */
28450
28451 static void
28452 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
28453 {
28454 /* See if we have a constant small number of probes to generate. If so,
28455 that's the easy case. */
28456 if (first + size <= 32768)
28457 {
28458 HOST_WIDE_INT i;
28459
28460 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
28461 it exceeds SIZE. If only one probe is needed, this will not
28462 generate any code. Then probe at FIRST + SIZE. */
28463 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
28464 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28465 -(first + i)));
28466
28467 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
28468 -(first + size)));
28469 }
28470
28471 /* Otherwise, do the same as above, but in a loop. Note that we must be
28472 extra careful with variables wrapping around because we might be at
28473 the very top (or the very bottom) of the address space and we have
28474 to be able to handle this case properly; in particular, we use an
28475 equality test for the loop condition. */
28476 else
28477 {
28478 HOST_WIDE_INT rounded_size;
28479 rtx r12 = gen_rtx_REG (Pmode, 12);
28480 rtx r0 = gen_rtx_REG (Pmode, 0);
28481
28482 /* Sanity check for the addressing mode we're going to use. */
28483 gcc_assert (first <= 32768);
28484
28485 /* Step 1: round SIZE to the previous multiple of the interval. */
28486
28487 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
28488
28489
28490 /* Step 2: compute initial and final value of the loop counter. */
28491
28492 /* TEST_ADDR = SP + FIRST. */
28493 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
28494 -first)));
28495
28496 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
28497 if (rounded_size > 32768)
28498 {
28499 emit_move_insn (r0, GEN_INT (-rounded_size));
28500 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
28501 }
28502 else
28503 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
28504 -rounded_size)));
28505
28506
28507 /* Step 3: the loop
28508
28509 do
28510 {
28511 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
28512 probe at TEST_ADDR
28513 }
28514 while (TEST_ADDR != LAST_ADDR)
28515
28516 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
28517 until it is equal to ROUNDED_SIZE. */
28518
28519 if (TARGET_64BIT)
28520 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
28521 else
28522 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
28523
28524
28525 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
28526 that SIZE is equal to ROUNDED_SIZE. */
28527
28528 if (size != rounded_size)
28529 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
28530 }
28531 }
28532
28533 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
28534 absolute addresses. */
28535
28536 const char *
28537 output_probe_stack_range (rtx reg1, rtx reg2)
28538 {
28539 static int labelno = 0;
28540 char loop_lab[32];
28541 rtx xops[2];
28542
28543 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
28544
28545 /* Loop. */
28546 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
28547
28548 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
28549 xops[0] = reg1;
28550 xops[1] = GEN_INT (-PROBE_INTERVAL);
28551 output_asm_insn ("addi %0,%0,%1", xops);
28552
28553 /* Probe at TEST_ADDR. */
28554 xops[1] = gen_rtx_REG (Pmode, 0);
28555 output_asm_insn ("stw %1,0(%0)", xops);
28556
28557 /* Test if TEST_ADDR == LAST_ADDR. */
28558 xops[1] = reg2;
28559 if (TARGET_64BIT)
28560 output_asm_insn ("cmpd 0,%0,%1", xops);
28561 else
28562 output_asm_insn ("cmpw 0,%0,%1", xops);
28563
28564 /* Branch. */
28565 fputs ("\tbne 0,", asm_out_file);
28566 assemble_name_raw (asm_out_file, loop_lab);
28567 fputc ('\n', asm_out_file);
28568
28569 return "";
28570 }
28571
28572 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
28573 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
28574 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
28575 deduce these equivalences by itself so it wasn't necessary to hold
28576 its hand so much. Don't be tempted to always supply d2_f_d_e with
28577 the actual cfa register, ie. r31 when we are using a hard frame
28578 pointer. That fails when saving regs off r1, and sched moves the
28579 r31 setup past the reg saves. */
28580
28581 static rtx_insn *
28582 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
28583 rtx reg2, rtx repl2)
28584 {
28585 rtx repl;
28586
28587 if (REGNO (reg) == STACK_POINTER_REGNUM)
28588 {
28589 gcc_checking_assert (val == 0);
28590 repl = NULL_RTX;
28591 }
28592 else
28593 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
28594 GEN_INT (val));
28595
28596 rtx pat = PATTERN (insn);
28597 if (!repl && !reg2)
28598 {
28599 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
28600 if (GET_CODE (pat) == PARALLEL)
28601 for (int i = 0; i < XVECLEN (pat, 0); i++)
28602 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28603 {
28604 rtx set = XVECEXP (pat, 0, i);
28605
28606 /* If this PARALLEL has been emitted for out-of-line
28607 register save functions, or store multiple, then omit
28608 eh_frame info for any user-defined global regs. If
28609 eh_frame info is supplied, frame unwinding will
28610 restore a user reg. */
28611 if (!REG_P (SET_SRC (set))
28612 || !fixed_reg_p (REGNO (SET_SRC (set))))
28613 RTX_FRAME_RELATED_P (set) = 1;
28614 }
28615 RTX_FRAME_RELATED_P (insn) = 1;
28616 return insn;
28617 }
28618
28619 /* We expect that 'pat' is either a SET or a PARALLEL containing
28620 SETs (and possibly other stuff). In a PARALLEL, all the SETs
28621 are important so they all have to be marked RTX_FRAME_RELATED_P.
28622 Call simplify_replace_rtx on the SETs rather than the whole insn
28623 so as to leave the other stuff alone (for example USE of r12). */
28624
28625 set_used_flags (pat);
28626 if (GET_CODE (pat) == SET)
28627 {
28628 if (repl)
28629 pat = simplify_replace_rtx (pat, reg, repl);
28630 if (reg2)
28631 pat = simplify_replace_rtx (pat, reg2, repl2);
28632 }
28633 else if (GET_CODE (pat) == PARALLEL)
28634 {
28635 pat = shallow_copy_rtx (pat);
28636 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
28637
28638 for (int i = 0; i < XVECLEN (pat, 0); i++)
28639 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28640 {
28641 rtx set = XVECEXP (pat, 0, i);
28642
28643 if (repl)
28644 set = simplify_replace_rtx (set, reg, repl);
28645 if (reg2)
28646 set = simplify_replace_rtx (set, reg2, repl2);
28647 XVECEXP (pat, 0, i) = set;
28648
28649 /* Omit eh_frame info for any user-defined global regs. */
28650 if (!REG_P (SET_SRC (set))
28651 || !fixed_reg_p (REGNO (SET_SRC (set))))
28652 RTX_FRAME_RELATED_P (set) = 1;
28653 }
28654 }
28655 else
28656 gcc_unreachable ();
28657
28658 RTX_FRAME_RELATED_P (insn) = 1;
28659 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
28660
28661 return insn;
28662 }
28663
28664 /* Returns an insn that has a vrsave set operation with the
28665 appropriate CLOBBERs. */
28666
28667 static rtx
28668 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
28669 {
28670 int nclobs, i;
28671 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
28672 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
28673
28674 clobs[0]
28675 = gen_rtx_SET (vrsave,
28676 gen_rtx_UNSPEC_VOLATILE (SImode,
28677 gen_rtvec (2, reg, vrsave),
28678 UNSPECV_SET_VRSAVE));
28679
28680 nclobs = 1;
28681
28682 /* We need to clobber the registers in the mask so the scheduler
28683 does not move sets to VRSAVE before sets of AltiVec registers.
28684
28685 However, if the function receives nonlocal gotos, reload will set
28686 all call saved registers live. We will end up with:
28687
28688 (set (reg 999) (mem))
28689 (parallel [ (set (reg vrsave) (unspec blah))
28690 (clobber (reg 999))])
28691
28692 The clobber will cause the store into reg 999 to be dead, and
28693 flow will attempt to delete an epilogue insn. In this case, we
28694 need an unspec use/set of the register. */
28695
28696 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
28697 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28698 {
28699 if (!epiloguep || call_used_regs [i])
28700 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
28701 gen_rtx_REG (V4SImode, i));
28702 else
28703 {
28704 rtx reg = gen_rtx_REG (V4SImode, i);
28705
28706 clobs[nclobs++]
28707 = gen_rtx_SET (reg,
28708 gen_rtx_UNSPEC (V4SImode,
28709 gen_rtvec (1, reg), 27));
28710 }
28711 }
28712
28713 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
28714
28715 for (i = 0; i < nclobs; ++i)
28716 XVECEXP (insn, 0, i) = clobs[i];
28717
28718 return insn;
28719 }
28720
28721 static rtx
28722 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
28723 {
28724 rtx addr, mem;
28725
28726 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
28727 mem = gen_frame_mem (GET_MODE (reg), addr);
28728 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
28729 }
28730
28731 static rtx
28732 gen_frame_load (rtx reg, rtx frame_reg, int offset)
28733 {
28734 return gen_frame_set (reg, frame_reg, offset, false);
28735 }
28736
28737 static rtx
28738 gen_frame_store (rtx reg, rtx frame_reg, int offset)
28739 {
28740 return gen_frame_set (reg, frame_reg, offset, true);
28741 }
28742
28743 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
28744 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
28745
28746 static rtx_insn *
28747 emit_frame_save (rtx frame_reg, machine_mode mode,
28748 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
28749 {
28750 rtx reg;
28751
28752 /* Some cases that need register indexed addressing. */
28753 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
28754 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
28755 || (TARGET_E500_DOUBLE && mode == DFmode)
28756 || (TARGET_SPE_ABI
28757 && SPE_VECTOR_MODE (mode)
28758 && !SPE_CONST_OFFSET_OK (offset))));
28759
28760 reg = gen_rtx_REG (mode, regno);
28761 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
28762 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
28763 NULL_RTX, NULL_RTX);
28764 }
28765
28766 /* Emit an offset memory reference suitable for a frame store, while
28767 converting to a valid addressing mode. */
28768
28769 static rtx
28770 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
28771 {
28772 rtx int_rtx, offset_rtx;
28773
28774 int_rtx = GEN_INT (offset);
28775
28776 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
28777 || (TARGET_E500_DOUBLE && mode == DFmode))
28778 {
28779 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
28780 emit_move_insn (offset_rtx, int_rtx);
28781 }
28782 else
28783 offset_rtx = int_rtx;
28784
28785 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
28786 }
28787
28788 #ifndef TARGET_FIX_AND_CONTINUE
28789 #define TARGET_FIX_AND_CONTINUE 0
28790 #endif
28791
28792 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
28793 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
28794 #define LAST_SAVRES_REGISTER 31
28795 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
28796
28797 enum {
28798 SAVRES_LR = 0x1,
28799 SAVRES_SAVE = 0x2,
28800 SAVRES_REG = 0x0c,
28801 SAVRES_GPR = 0,
28802 SAVRES_FPR = 4,
28803 SAVRES_VR = 8
28804 };
28805
28806 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
28807
28808 /* Temporary holding space for an out-of-line register save/restore
28809 routine name. */
28810 static char savres_routine_name[30];
28811
28812 /* Return the name for an out-of-line register save/restore routine.
28813 We are saving/restoring GPRs if GPR is true. */
28814
28815 static char *
28816 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
28817 {
28818 const char *prefix = "";
28819 const char *suffix = "";
28820
28821 /* Different targets are supposed to define
28822 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
28823 routine name could be defined with:
28824
28825 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
28826
28827 This is a nice idea in practice, but in reality, things are
28828 complicated in several ways:
28829
28830 - ELF targets have save/restore routines for GPRs.
28831
28832 - SPE targets use different prefixes for 32/64-bit registers, and
28833 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
28834
28835 - PPC64 ELF targets have routines for save/restore of GPRs that
28836 differ in what they do with the link register, so having a set
28837 prefix doesn't work. (We only use one of the save routines at
28838 the moment, though.)
28839
28840 - PPC32 elf targets have "exit" versions of the restore routines
28841 that restore the link register and can save some extra space.
28842 These require an extra suffix. (There are also "tail" versions
28843 of the restore routines and "GOT" versions of the save routines,
28844 but we don't generate those at present. Same problems apply,
28845 though.)
28846
28847 We deal with all this by synthesizing our own prefix/suffix and
28848 using that for the simple sprintf call shown above. */
28849 if (TARGET_SPE)
28850 {
28851 /* No floating point saves on the SPE. */
28852 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
28853
28854 if ((sel & SAVRES_SAVE))
28855 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
28856 else
28857 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
28858
28859 if ((sel & SAVRES_LR))
28860 suffix = "_x";
28861 }
28862 else if (DEFAULT_ABI == ABI_V4)
28863 {
28864 if (TARGET_64BIT)
28865 goto aix_names;
28866
28867 if ((sel & SAVRES_REG) == SAVRES_GPR)
28868 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
28869 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28870 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
28871 else if ((sel & SAVRES_REG) == SAVRES_VR)
28872 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28873 else
28874 abort ();
28875
28876 if ((sel & SAVRES_LR))
28877 suffix = "_x";
28878 }
28879 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28880 {
28881 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
28882 /* No out-of-line save/restore routines for GPRs on AIX. */
28883 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
28884 #endif
28885
28886 aix_names:
28887 if ((sel & SAVRES_REG) == SAVRES_GPR)
28888 prefix = ((sel & SAVRES_SAVE)
28889 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
28890 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
28891 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28892 {
28893 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
28894 if ((sel & SAVRES_LR))
28895 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
28896 else
28897 #endif
28898 {
28899 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
28900 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
28901 }
28902 }
28903 else if ((sel & SAVRES_REG) == SAVRES_VR)
28904 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
28905 else
28906 abort ();
28907 }
28908
28909 if (DEFAULT_ABI == ABI_DARWIN)
28910 {
28911 /* The Darwin approach is (slightly) different, in order to be
28912 compatible with code generated by the system toolchain. There is a
28913 single symbol for the start of save sequence, and the code here
28914 embeds an offset into that code on the basis of the first register
28915 to be saved. */
28916 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
28917 if ((sel & SAVRES_REG) == SAVRES_GPR)
28918 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
28919 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
28920 (regno - 13) * 4, prefix, regno);
28921 else if ((sel & SAVRES_REG) == SAVRES_FPR)
28922 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
28923 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
28924 else if ((sel & SAVRES_REG) == SAVRES_VR)
28925 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
28926 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
28927 else
28928 abort ();
28929 }
28930 else
28931 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
28932
28933 return savres_routine_name;
28934 }
28935
28936 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
28937 We are saving/restoring GPRs if GPR is true. */
28938
28939 static rtx
28940 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
28941 {
28942 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
28943 ? info->first_gp_reg_save
28944 : (sel & SAVRES_REG) == SAVRES_FPR
28945 ? info->first_fp_reg_save - 32
28946 : (sel & SAVRES_REG) == SAVRES_VR
28947 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
28948 : -1);
28949 rtx sym;
28950 int select = sel;
28951
28952 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
28953 versions of the gpr routines. */
28954 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
28955 && info->spe_64bit_regs_used)
28956 select ^= SAVRES_FPR ^ SAVRES_GPR;
28957
28958 /* Don't generate bogus routine names. */
28959 gcc_assert (FIRST_SAVRES_REGISTER <= regno
28960 && regno <= LAST_SAVRES_REGISTER
28961 && select >= 0 && select <= 12);
28962
28963 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
28964
28965 if (sym == NULL)
28966 {
28967 char *name;
28968
28969 name = rs6000_savres_routine_name (info, regno, sel);
28970
28971 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
28972 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
28973 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
28974 }
28975
28976 return sym;
28977 }
28978
28979 /* Emit a sequence of insns, including a stack tie if needed, for
28980 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
28981 reset the stack pointer, but move the base of the frame into
28982 reg UPDT_REGNO for use by out-of-line register restore routines. */
28983
28984 static rtx
28985 rs6000_emit_stack_reset (rs6000_stack_t *info,
28986 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
28987 unsigned updt_regno)
28988 {
28989 /* If there is nothing to do, don't do anything. */
28990 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
28991 return NULL_RTX;
28992
28993 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
28994
28995 /* This blockage is needed so that sched doesn't decide to move
28996 the sp change before the register restores. */
28997 if (DEFAULT_ABI == ABI_V4
28998 || (TARGET_SPE_ABI
28999 && info->spe_64bit_regs_used != 0
29000 && info->first_gp_reg_save != 32))
29001 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
29002 GEN_INT (frame_off)));
29003
29004 /* If we are restoring registers out-of-line, we will be using the
29005 "exit" variants of the restore routines, which will reset the
29006 stack for us. But we do need to point updt_reg into the
29007 right place for those routines. */
29008 if (frame_off != 0)
29009 return emit_insn (gen_add3_insn (updt_reg_rtx,
29010 frame_reg_rtx, GEN_INT (frame_off)));
29011 else
29012 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
29013
29014 return NULL_RTX;
29015 }
29016
29017 /* Return the register number used as a pointer by out-of-line
29018 save/restore functions. */
29019
29020 static inline unsigned
29021 ptr_regno_for_savres (int sel)
29022 {
29023 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29024 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
29025 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
29026 }
29027
29028 /* Construct a parallel rtx describing the effect of a call to an
29029 out-of-line register save/restore routine, and emit the insn
29030 or jump_insn as appropriate. */
29031
29032 static rtx_insn *
29033 rs6000_emit_savres_rtx (rs6000_stack_t *info,
29034 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
29035 machine_mode reg_mode, int sel)
29036 {
29037 int i;
29038 int offset, start_reg, end_reg, n_regs, use_reg;
29039 int reg_size = GET_MODE_SIZE (reg_mode);
29040 rtx sym;
29041 rtvec p;
29042 rtx par;
29043 rtx_insn *insn;
29044
29045 offset = 0;
29046 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29047 ? info->first_gp_reg_save
29048 : (sel & SAVRES_REG) == SAVRES_FPR
29049 ? info->first_fp_reg_save
29050 : (sel & SAVRES_REG) == SAVRES_VR
29051 ? info->first_altivec_reg_save
29052 : -1);
29053 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
29054 ? 32
29055 : (sel & SAVRES_REG) == SAVRES_FPR
29056 ? 64
29057 : (sel & SAVRES_REG) == SAVRES_VR
29058 ? LAST_ALTIVEC_REGNO + 1
29059 : -1);
29060 n_regs = end_reg - start_reg;
29061 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
29062 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
29063 + n_regs);
29064
29065 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29066 RTVEC_ELT (p, offset++) = ret_rtx;
29067
29068 RTVEC_ELT (p, offset++)
29069 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29070
29071 sym = rs6000_savres_routine_sym (info, sel);
29072 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
29073
29074 use_reg = ptr_regno_for_savres (sel);
29075 if ((sel & SAVRES_REG) == SAVRES_VR)
29076 {
29077 /* Vector regs are saved/restored using [reg+reg] addressing. */
29078 RTVEC_ELT (p, offset++)
29079 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29080 RTVEC_ELT (p, offset++)
29081 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
29082 }
29083 else
29084 RTVEC_ELT (p, offset++)
29085 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
29086
29087 for (i = 0; i < end_reg - start_reg; i++)
29088 RTVEC_ELT (p, i + offset)
29089 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
29090 frame_reg_rtx, save_area_offset + reg_size * i,
29091 (sel & SAVRES_SAVE) != 0);
29092
29093 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29094 RTVEC_ELT (p, i + offset)
29095 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
29096
29097 par = gen_rtx_PARALLEL (VOIDmode, p);
29098
29099 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
29100 {
29101 insn = emit_jump_insn (par);
29102 JUMP_LABEL (insn) = ret_rtx;
29103 }
29104 else
29105 insn = emit_insn (par);
29106 return insn;
29107 }
29108
29109 /* Emit code to store CR fields that need to be saved into REG. */
29110
29111 static void
29112 rs6000_emit_move_from_cr (rtx reg)
29113 {
29114 /* Only the ELFv2 ABI allows storing only selected fields. */
29115 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
29116 {
29117 int i, cr_reg[8], count = 0;
29118
29119 /* Collect CR fields that must be saved. */
29120 for (i = 0; i < 8; i++)
29121 if (save_reg_p (CR0_REGNO + i))
29122 cr_reg[count++] = i;
29123
29124 /* If it's just a single one, use mfcrf. */
29125 if (count == 1)
29126 {
29127 rtvec p = rtvec_alloc (1);
29128 rtvec r = rtvec_alloc (2);
29129 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
29130 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
29131 RTVEC_ELT (p, 0)
29132 = gen_rtx_SET (reg,
29133 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
29134
29135 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29136 return;
29137 }
29138
29139 /* ??? It might be better to handle count == 2 / 3 cases here
29140 as well, using logical operations to combine the values. */
29141 }
29142
29143 emit_insn (gen_movesi_from_cr (reg));
29144 }
29145
29146 /* Return whether the split-stack arg pointer (r12) is used. */
29147
29148 static bool
29149 split_stack_arg_pointer_used_p (void)
29150 {
29151 /* If the pseudo holding the arg pointer is no longer a pseudo,
29152 then the arg pointer is used. */
29153 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
29154 && (!REG_P (cfun->machine->split_stack_arg_pointer)
29155 || (REGNO (cfun->machine->split_stack_arg_pointer)
29156 < FIRST_PSEUDO_REGISTER)))
29157 return true;
29158
29159 /* Unfortunately we also need to do some code scanning, since
29160 r12 may have been substituted for the pseudo. */
29161 rtx_insn *insn;
29162 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
29163 FOR_BB_INSNS (bb, insn)
29164 if (NONDEBUG_INSN_P (insn))
29165 {
29166 /* A call destroys r12. */
29167 if (CALL_P (insn))
29168 return false;
29169
29170 df_ref use;
29171 FOR_EACH_INSN_USE (use, insn)
29172 {
29173 rtx x = DF_REF_REG (use);
29174 if (REG_P (x) && REGNO (x) == 12)
29175 return true;
29176 }
29177 df_ref def;
29178 FOR_EACH_INSN_DEF (def, insn)
29179 {
29180 rtx x = DF_REF_REG (def);
29181 if (REG_P (x) && REGNO (x) == 12)
29182 return false;
29183 }
29184 }
29185 return bitmap_bit_p (DF_LR_OUT (bb), 12);
29186 }
29187
29188 /* Return whether we need to emit an ELFv2 global entry point prologue. */
29189
29190 static bool
29191 rs6000_global_entry_point_needed_p (void)
29192 {
29193 /* Only needed for the ELFv2 ABI. */
29194 if (DEFAULT_ABI != ABI_ELFv2)
29195 return false;
29196
29197 /* With -msingle-pic-base, we assume the whole program shares the same
29198 TOC, so no global entry point prologues are needed anywhere. */
29199 if (TARGET_SINGLE_PIC_BASE)
29200 return false;
29201
29202 /* Ensure we have a global entry point for thunks. ??? We could
29203 avoid that if the target routine doesn't need a global entry point,
29204 but we do not know whether this is the case at this point. */
29205 if (cfun->is_thunk)
29206 return true;
29207
29208 /* For regular functions, rs6000_emit_prologue sets this flag if the
29209 routine ever uses the TOC pointer. */
29210 return cfun->machine->r2_setup_needed;
29211 }
29212
29213 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
29214 static sbitmap
29215 rs6000_get_separate_components (void)
29216 {
29217 rs6000_stack_t *info = rs6000_stack_info ();
29218
29219 if (WORLD_SAVE_P (info))
29220 return NULL;
29221
29222 if (TARGET_SPE_ABI)
29223 return NULL;
29224
29225 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
29226 && !(info->savres_strategy & REST_MULTIPLE));
29227
29228 /* Component 0 is the save/restore of LR (done via GPR0).
29229 Components 13..31 are the save/restore of GPR13..GPR31.
29230 Components 46..63 are the save/restore of FPR14..FPR31. */
29231
29232 cfun->machine->n_components = 64;
29233
29234 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29235 bitmap_clear (components);
29236
29237 int reg_size = TARGET_32BIT ? 4 : 8;
29238 int fp_reg_size = 8;
29239
29240 /* The GPRs we need saved to the frame. */
29241 if ((info->savres_strategy & SAVE_INLINE_GPRS)
29242 && (info->savres_strategy & REST_INLINE_GPRS))
29243 {
29244 int offset = info->gp_save_offset;
29245 if (info->push_p)
29246 offset += info->total_size;
29247
29248 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29249 {
29250 if (IN_RANGE (offset, -0x8000, 0x7fff)
29251 && rs6000_reg_live_or_pic_offset_p (regno))
29252 bitmap_set_bit (components, regno);
29253
29254 offset += reg_size;
29255 }
29256 }
29257
29258 /* Don't mess with the hard frame pointer. */
29259 if (frame_pointer_needed)
29260 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
29261
29262 /* Don't mess with the fixed TOC register. */
29263 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
29264 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
29265 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
29266 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
29267
29268 /* The FPRs we need saved to the frame. */
29269 if ((info->savres_strategy & SAVE_INLINE_FPRS)
29270 && (info->savres_strategy & REST_INLINE_FPRS))
29271 {
29272 int offset = info->fp_save_offset;
29273 if (info->push_p)
29274 offset += info->total_size;
29275
29276 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29277 {
29278 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
29279 bitmap_set_bit (components, regno);
29280
29281 offset += fp_reg_size;
29282 }
29283 }
29284
29285 /* Optimize LR save and restore if we can. This is component 0. Any
29286 out-of-line register save/restore routines need LR. */
29287 if (info->lr_save_p
29288 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
29289 && (info->savres_strategy & SAVE_INLINE_GPRS)
29290 && (info->savres_strategy & REST_INLINE_GPRS)
29291 && (info->savres_strategy & SAVE_INLINE_FPRS)
29292 && (info->savres_strategy & REST_INLINE_FPRS)
29293 && (info->savres_strategy & SAVE_INLINE_VRS)
29294 && (info->savres_strategy & REST_INLINE_VRS))
29295 {
29296 int offset = info->lr_save_offset;
29297 if (info->push_p)
29298 offset += info->total_size;
29299 if (IN_RANGE (offset, -0x8000, 0x7fff))
29300 bitmap_set_bit (components, 0);
29301 }
29302
29303 return components;
29304 }
29305
29306 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
29307 static sbitmap
29308 rs6000_components_for_bb (basic_block bb)
29309 {
29310 rs6000_stack_t *info = rs6000_stack_info ();
29311
29312 bitmap in = DF_LIVE_IN (bb);
29313 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
29314 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
29315
29316 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
29317 bitmap_clear (components);
29318
29319 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
29320
29321 /* GPRs. */
29322 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
29323 if (bitmap_bit_p (in, regno)
29324 || bitmap_bit_p (gen, regno)
29325 || bitmap_bit_p (kill, regno))
29326 bitmap_set_bit (components, regno);
29327
29328 /* FPRs. */
29329 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
29330 if (bitmap_bit_p (in, regno)
29331 || bitmap_bit_p (gen, regno)
29332 || bitmap_bit_p (kill, regno))
29333 bitmap_set_bit (components, regno);
29334
29335 /* The link register. */
29336 if (bitmap_bit_p (in, LR_REGNO)
29337 || bitmap_bit_p (gen, LR_REGNO)
29338 || bitmap_bit_p (kill, LR_REGNO))
29339 bitmap_set_bit (components, 0);
29340
29341 return components;
29342 }
29343
29344 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
29345 static void
29346 rs6000_disqualify_components (sbitmap components, edge e,
29347 sbitmap edge_components, bool /*is_prologue*/)
29348 {
29349 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
29350 live where we want to place that code. */
29351 if (bitmap_bit_p (edge_components, 0)
29352 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
29353 {
29354 if (dump_file)
29355 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
29356 "on entry to bb %d\n", e->dest->index);
29357 bitmap_clear_bit (components, 0);
29358 }
29359 }
29360
29361 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
29362 static void
29363 rs6000_emit_prologue_components (sbitmap components)
29364 {
29365 rs6000_stack_t *info = rs6000_stack_info ();
29366 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29367 ? HARD_FRAME_POINTER_REGNUM
29368 : STACK_POINTER_REGNUM);
29369
29370 machine_mode reg_mode = Pmode;
29371 int reg_size = TARGET_32BIT ? 4 : 8;
29372 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29373 ? DFmode : SFmode;
29374 int fp_reg_size = 8;
29375
29376 /* Prologue for LR. */
29377 if (bitmap_bit_p (components, 0))
29378 {
29379 rtx reg = gen_rtx_REG (reg_mode, 0);
29380 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
29381 RTX_FRAME_RELATED_P (insn) = 1;
29382 add_reg_note (insn, REG_CFA_REGISTER, NULL);
29383
29384 int offset = info->lr_save_offset;
29385 if (info->push_p)
29386 offset += info->total_size;
29387
29388 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29389 RTX_FRAME_RELATED_P (insn) = 1;
29390 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
29391 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
29392 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
29393 }
29394
29395 /* Prologue for the GPRs. */
29396 int offset = info->gp_save_offset;
29397 if (info->push_p)
29398 offset += info->total_size;
29399
29400 for (int i = info->first_gp_reg_save; i < 32; i++)
29401 {
29402 if (bitmap_bit_p (components, i))
29403 {
29404 rtx reg = gen_rtx_REG (reg_mode, i);
29405 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29406 RTX_FRAME_RELATED_P (insn) = 1;
29407 rtx set = copy_rtx (single_set (insn));
29408 add_reg_note (insn, REG_CFA_OFFSET, set);
29409 }
29410
29411 offset += reg_size;
29412 }
29413
29414 /* Prologue for the FPRs. */
29415 offset = info->fp_save_offset;
29416 if (info->push_p)
29417 offset += info->total_size;
29418
29419 for (int i = info->first_fp_reg_save; i < 64; i++)
29420 {
29421 if (bitmap_bit_p (components, i))
29422 {
29423 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29424 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
29425 RTX_FRAME_RELATED_P (insn) = 1;
29426 rtx set = copy_rtx (single_set (insn));
29427 add_reg_note (insn, REG_CFA_OFFSET, set);
29428 }
29429
29430 offset += fp_reg_size;
29431 }
29432 }
29433
29434 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
29435 static void
29436 rs6000_emit_epilogue_components (sbitmap components)
29437 {
29438 rs6000_stack_t *info = rs6000_stack_info ();
29439 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
29440 ? HARD_FRAME_POINTER_REGNUM
29441 : STACK_POINTER_REGNUM);
29442
29443 machine_mode reg_mode = Pmode;
29444 int reg_size = TARGET_32BIT ? 4 : 8;
29445
29446 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29447 ? DFmode : SFmode;
29448 int fp_reg_size = 8;
29449
29450 /* Epilogue for the FPRs. */
29451 int offset = info->fp_save_offset;
29452 if (info->push_p)
29453 offset += info->total_size;
29454
29455 for (int i = info->first_fp_reg_save; i < 64; i++)
29456 {
29457 if (bitmap_bit_p (components, i))
29458 {
29459 rtx reg = gen_rtx_REG (fp_reg_mode, i);
29460 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29461 RTX_FRAME_RELATED_P (insn) = 1;
29462 add_reg_note (insn, REG_CFA_RESTORE, reg);
29463 }
29464
29465 offset += fp_reg_size;
29466 }
29467
29468 /* Epilogue for the GPRs. */
29469 offset = info->gp_save_offset;
29470 if (info->push_p)
29471 offset += info->total_size;
29472
29473 for (int i = info->first_gp_reg_save; i < 32; i++)
29474 {
29475 if (bitmap_bit_p (components, i))
29476 {
29477 rtx reg = gen_rtx_REG (reg_mode, i);
29478 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29479 RTX_FRAME_RELATED_P (insn) = 1;
29480 add_reg_note (insn, REG_CFA_RESTORE, reg);
29481 }
29482
29483 offset += reg_size;
29484 }
29485
29486 /* Epilogue for LR. */
29487 if (bitmap_bit_p (components, 0))
29488 {
29489 int offset = info->lr_save_offset;
29490 if (info->push_p)
29491 offset += info->total_size;
29492
29493 rtx reg = gen_rtx_REG (reg_mode, 0);
29494 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
29495
29496 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
29497 insn = emit_move_insn (lr, reg);
29498 RTX_FRAME_RELATED_P (insn) = 1;
29499 add_reg_note (insn, REG_CFA_RESTORE, lr);
29500 }
29501 }
29502
29503 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
29504 static void
29505 rs6000_set_handled_components (sbitmap components)
29506 {
29507 rs6000_stack_t *info = rs6000_stack_info ();
29508
29509 for (int i = info->first_gp_reg_save; i < 32; i++)
29510 if (bitmap_bit_p (components, i))
29511 cfun->machine->gpr_is_wrapped_separately[i] = true;
29512
29513 for (int i = info->first_fp_reg_save; i < 64; i++)
29514 if (bitmap_bit_p (components, i))
29515 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
29516
29517 if (bitmap_bit_p (components, 0))
29518 cfun->machine->lr_is_wrapped_separately = true;
29519 }
29520
29521 /* Emit function prologue as insns. */
29522
29523 void
29524 rs6000_emit_prologue (void)
29525 {
29526 rs6000_stack_t *info = rs6000_stack_info ();
29527 machine_mode reg_mode = Pmode;
29528 int reg_size = TARGET_32BIT ? 4 : 8;
29529 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
29530 ? DFmode : SFmode;
29531 int fp_reg_size = 8;
29532 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29533 rtx frame_reg_rtx = sp_reg_rtx;
29534 unsigned int cr_save_regno;
29535 rtx cr_save_rtx = NULL_RTX;
29536 rtx_insn *insn;
29537 int strategy;
29538 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
29539 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
29540 && call_used_regs[STATIC_CHAIN_REGNUM]);
29541 int using_split_stack = (flag_split_stack
29542 && (lookup_attribute ("no_split_stack",
29543 DECL_ATTRIBUTES (cfun->decl))
29544 == NULL));
29545
29546 /* Offset to top of frame for frame_reg and sp respectively. */
29547 HOST_WIDE_INT frame_off = 0;
29548 HOST_WIDE_INT sp_off = 0;
29549 /* sp_adjust is the stack adjusting instruction, tracked so that the
29550 insn setting up the split-stack arg pointer can be emitted just
29551 prior to it, when r12 is not used here for other purposes. */
29552 rtx_insn *sp_adjust = 0;
29553
29554 #if CHECKING_P
29555 /* Track and check usage of r0, r11, r12. */
29556 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
29557 #define START_USE(R) do \
29558 { \
29559 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29560 reg_inuse |= 1 << (R); \
29561 } while (0)
29562 #define END_USE(R) do \
29563 { \
29564 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
29565 reg_inuse &= ~(1 << (R)); \
29566 } while (0)
29567 #define NOT_INUSE(R) do \
29568 { \
29569 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
29570 } while (0)
29571 #else
29572 #define START_USE(R) do {} while (0)
29573 #define END_USE(R) do {} while (0)
29574 #define NOT_INUSE(R) do {} while (0)
29575 #endif
29576
29577 if (DEFAULT_ABI == ABI_ELFv2
29578 && !TARGET_SINGLE_PIC_BASE)
29579 {
29580 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
29581
29582 /* With -mminimal-toc we may generate an extra use of r2 below. */
29583 if (TARGET_TOC && TARGET_MINIMAL_TOC
29584 && !constant_pool_empty_p ())
29585 cfun->machine->r2_setup_needed = true;
29586 }
29587
29588
29589 if (flag_stack_usage_info)
29590 current_function_static_stack_size = info->total_size;
29591
29592 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
29593 {
29594 HOST_WIDE_INT size = info->total_size;
29595
29596 if (crtl->is_leaf && !cfun->calls_alloca)
29597 {
29598 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
29599 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
29600 size - STACK_CHECK_PROTECT);
29601 }
29602 else if (size > 0)
29603 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
29604 }
29605
29606 if (TARGET_FIX_AND_CONTINUE)
29607 {
29608 /* gdb on darwin arranges to forward a function from the old
29609 address by modifying the first 5 instructions of the function
29610 to branch to the overriding function. This is necessary to
29611 permit function pointers that point to the old function to
29612 actually forward to the new function. */
29613 emit_insn (gen_nop ());
29614 emit_insn (gen_nop ());
29615 emit_insn (gen_nop ());
29616 emit_insn (gen_nop ());
29617 emit_insn (gen_nop ());
29618 }
29619
29620 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
29621 {
29622 reg_mode = V2SImode;
29623 reg_size = 8;
29624 }
29625
29626 /* Handle world saves specially here. */
29627 if (WORLD_SAVE_P (info))
29628 {
29629 int i, j, sz;
29630 rtx treg;
29631 rtvec p;
29632 rtx reg0;
29633
29634 /* save_world expects lr in r0. */
29635 reg0 = gen_rtx_REG (Pmode, 0);
29636 if (info->lr_save_p)
29637 {
29638 insn = emit_move_insn (reg0,
29639 gen_rtx_REG (Pmode, LR_REGNO));
29640 RTX_FRAME_RELATED_P (insn) = 1;
29641 }
29642
29643 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
29644 assumptions about the offsets of various bits of the stack
29645 frame. */
29646 gcc_assert (info->gp_save_offset == -220
29647 && info->fp_save_offset == -144
29648 && info->lr_save_offset == 8
29649 && info->cr_save_offset == 4
29650 && info->push_p
29651 && info->lr_save_p
29652 && (!crtl->calls_eh_return
29653 || info->ehrd_offset == -432)
29654 && info->vrsave_save_offset == -224
29655 && info->altivec_save_offset == -416);
29656
29657 treg = gen_rtx_REG (SImode, 11);
29658 emit_move_insn (treg, GEN_INT (-info->total_size));
29659
29660 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
29661 in R11. It also clobbers R12, so beware! */
29662
29663 /* Preserve CR2 for save_world prologues */
29664 sz = 5;
29665 sz += 32 - info->first_gp_reg_save;
29666 sz += 64 - info->first_fp_reg_save;
29667 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
29668 p = rtvec_alloc (sz);
29669 j = 0;
29670 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
29671 gen_rtx_REG (SImode,
29672 LR_REGNO));
29673 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
29674 gen_rtx_SYMBOL_REF (Pmode,
29675 "*save_world"));
29676 /* We do floats first so that the instruction pattern matches
29677 properly. */
29678 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29679 RTVEC_ELT (p, j++)
29680 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
29681 ? DFmode : SFmode,
29682 info->first_fp_reg_save + i),
29683 frame_reg_rtx,
29684 info->fp_save_offset + frame_off + 8 * i);
29685 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
29686 RTVEC_ELT (p, j++)
29687 = gen_frame_store (gen_rtx_REG (V4SImode,
29688 info->first_altivec_reg_save + i),
29689 frame_reg_rtx,
29690 info->altivec_save_offset + frame_off + 16 * i);
29691 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29692 RTVEC_ELT (p, j++)
29693 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
29694 frame_reg_rtx,
29695 info->gp_save_offset + frame_off + reg_size * i);
29696
29697 /* CR register traditionally saved as CR2. */
29698 RTVEC_ELT (p, j++)
29699 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
29700 frame_reg_rtx, info->cr_save_offset + frame_off);
29701 /* Explain about use of R0. */
29702 if (info->lr_save_p)
29703 RTVEC_ELT (p, j++)
29704 = gen_frame_store (reg0,
29705 frame_reg_rtx, info->lr_save_offset + frame_off);
29706 /* Explain what happens to the stack pointer. */
29707 {
29708 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
29709 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
29710 }
29711
29712 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
29713 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29714 treg, GEN_INT (-info->total_size));
29715 sp_off = frame_off = info->total_size;
29716 }
29717
29718 strategy = info->savres_strategy;
29719
29720 /* For V.4, update stack before we do any saving and set back pointer. */
29721 if (! WORLD_SAVE_P (info)
29722 && info->push_p
29723 && (DEFAULT_ABI == ABI_V4
29724 || crtl->calls_eh_return))
29725 {
29726 bool need_r11 = (TARGET_SPE
29727 ? (!(strategy & SAVE_INLINE_GPRS)
29728 && info->spe_64bit_regs_used == 0)
29729 : (!(strategy & SAVE_INLINE_FPRS)
29730 || !(strategy & SAVE_INLINE_GPRS)
29731 || !(strategy & SAVE_INLINE_VRS)));
29732 int ptr_regno = -1;
29733 rtx ptr_reg = NULL_RTX;
29734 int ptr_off = 0;
29735
29736 if (info->total_size < 32767)
29737 frame_off = info->total_size;
29738 else if (need_r11)
29739 ptr_regno = 11;
29740 else if (info->cr_save_p
29741 || info->lr_save_p
29742 || info->first_fp_reg_save < 64
29743 || info->first_gp_reg_save < 32
29744 || info->altivec_size != 0
29745 || info->vrsave_size != 0
29746 || crtl->calls_eh_return)
29747 ptr_regno = 12;
29748 else
29749 {
29750 /* The prologue won't be saving any regs so there is no need
29751 to set up a frame register to access any frame save area.
29752 We also won't be using frame_off anywhere below, but set
29753 the correct value anyway to protect against future
29754 changes to this function. */
29755 frame_off = info->total_size;
29756 }
29757 if (ptr_regno != -1)
29758 {
29759 /* Set up the frame offset to that needed by the first
29760 out-of-line save function. */
29761 START_USE (ptr_regno);
29762 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29763 frame_reg_rtx = ptr_reg;
29764 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
29765 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
29766 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
29767 ptr_off = info->gp_save_offset + info->gp_size;
29768 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
29769 ptr_off = info->altivec_save_offset + info->altivec_size;
29770 frame_off = -ptr_off;
29771 }
29772 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
29773 ptr_reg, ptr_off);
29774 if (REGNO (frame_reg_rtx) == 12)
29775 sp_adjust = 0;
29776 sp_off = info->total_size;
29777 if (frame_reg_rtx != sp_reg_rtx)
29778 rs6000_emit_stack_tie (frame_reg_rtx, false);
29779 }
29780
29781 /* If we use the link register, get it into r0. */
29782 if (!WORLD_SAVE_P (info) && info->lr_save_p
29783 && !cfun->machine->lr_is_wrapped_separately)
29784 {
29785 rtx addr, reg, mem;
29786
29787 reg = gen_rtx_REG (Pmode, 0);
29788 START_USE (0);
29789 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
29790 RTX_FRAME_RELATED_P (insn) = 1;
29791
29792 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
29793 | SAVE_NOINLINE_FPRS_SAVES_LR)))
29794 {
29795 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
29796 GEN_INT (info->lr_save_offset + frame_off));
29797 mem = gen_rtx_MEM (Pmode, addr);
29798 /* This should not be of rs6000_sr_alias_set, because of
29799 __builtin_return_address. */
29800
29801 insn = emit_move_insn (mem, reg);
29802 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
29803 NULL_RTX, NULL_RTX);
29804 END_USE (0);
29805 }
29806 }
29807
29808 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
29809 r12 will be needed by out-of-line gpr restore. */
29810 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29811 && !(strategy & (SAVE_INLINE_GPRS
29812 | SAVE_NOINLINE_GPRS_SAVES_LR))
29813 ? 11 : 12);
29814 if (!WORLD_SAVE_P (info)
29815 && info->cr_save_p
29816 && REGNO (frame_reg_rtx) != cr_save_regno
29817 && !(using_static_chain_p && cr_save_regno == 11)
29818 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
29819 {
29820 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
29821 START_USE (cr_save_regno);
29822 rs6000_emit_move_from_cr (cr_save_rtx);
29823 }
29824
29825 /* Do any required saving of fpr's. If only one or two to save, do
29826 it ourselves. Otherwise, call function. */
29827 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
29828 {
29829 int offset = info->fp_save_offset + frame_off;
29830 for (int i = info->first_fp_reg_save; i < 64; i++)
29831 {
29832 if (save_reg_p (i)
29833 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
29834 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
29835 sp_off - frame_off);
29836
29837 offset += fp_reg_size;
29838 }
29839 }
29840 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
29841 {
29842 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
29843 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
29844 unsigned ptr_regno = ptr_regno_for_savres (sel);
29845 rtx ptr_reg = frame_reg_rtx;
29846
29847 if (REGNO (frame_reg_rtx) == ptr_regno)
29848 gcc_checking_assert (frame_off == 0);
29849 else
29850 {
29851 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29852 NOT_INUSE (ptr_regno);
29853 emit_insn (gen_add3_insn (ptr_reg,
29854 frame_reg_rtx, GEN_INT (frame_off)));
29855 }
29856 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29857 info->fp_save_offset,
29858 info->lr_save_offset,
29859 DFmode, sel);
29860 rs6000_frame_related (insn, ptr_reg, sp_off,
29861 NULL_RTX, NULL_RTX);
29862 if (lr)
29863 END_USE (0);
29864 }
29865
29866 /* Save GPRs. This is done as a PARALLEL if we are using
29867 the store-multiple instructions. */
29868 if (!WORLD_SAVE_P (info)
29869 && TARGET_SPE_ABI
29870 && info->spe_64bit_regs_used != 0
29871 && info->first_gp_reg_save != 32)
29872 {
29873 int i;
29874 rtx spe_save_area_ptr;
29875 HOST_WIDE_INT save_off;
29876 int ool_adjust = 0;
29877
29878 /* Determine whether we can address all of the registers that need
29879 to be saved with an offset from frame_reg_rtx that fits in
29880 the small const field for SPE memory instructions. */
29881 int spe_regs_addressable
29882 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
29883 + reg_size * (32 - info->first_gp_reg_save - 1))
29884 && (strategy & SAVE_INLINE_GPRS));
29885
29886 if (spe_regs_addressable)
29887 {
29888 spe_save_area_ptr = frame_reg_rtx;
29889 save_off = frame_off;
29890 }
29891 else
29892 {
29893 /* Make r11 point to the start of the SPE save area. We need
29894 to be careful here if r11 is holding the static chain. If
29895 it is, then temporarily save it in r0. */
29896 HOST_WIDE_INT offset;
29897
29898 if (!(strategy & SAVE_INLINE_GPRS))
29899 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
29900 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
29901 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
29902 save_off = frame_off - offset;
29903
29904 if (using_static_chain_p)
29905 {
29906 rtx r0 = gen_rtx_REG (Pmode, 0);
29907
29908 START_USE (0);
29909 gcc_assert (info->first_gp_reg_save > 11);
29910
29911 emit_move_insn (r0, spe_save_area_ptr);
29912 }
29913 else if (REGNO (frame_reg_rtx) != 11)
29914 START_USE (11);
29915
29916 emit_insn (gen_addsi3 (spe_save_area_ptr,
29917 frame_reg_rtx, GEN_INT (offset)));
29918 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
29919 frame_off = -info->spe_gp_save_offset + ool_adjust;
29920 }
29921
29922 if ((strategy & SAVE_INLINE_GPRS))
29923 {
29924 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
29925 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
29926 emit_frame_save (spe_save_area_ptr, reg_mode,
29927 info->first_gp_reg_save + i,
29928 (info->spe_gp_save_offset + save_off
29929 + reg_size * i),
29930 sp_off - save_off);
29931 }
29932 else
29933 {
29934 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
29935 info->spe_gp_save_offset + save_off,
29936 0, reg_mode,
29937 SAVRES_SAVE | SAVRES_GPR);
29938
29939 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
29940 NULL_RTX, NULL_RTX);
29941 }
29942
29943 /* Move the static chain pointer back. */
29944 if (!spe_regs_addressable)
29945 {
29946 if (using_static_chain_p)
29947 {
29948 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
29949 END_USE (0);
29950 }
29951 else if (REGNO (frame_reg_rtx) != 11)
29952 END_USE (11);
29953 }
29954 }
29955 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
29956 {
29957 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
29958 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
29959 unsigned ptr_regno = ptr_regno_for_savres (sel);
29960 rtx ptr_reg = frame_reg_rtx;
29961 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
29962 int end_save = info->gp_save_offset + info->gp_size;
29963 int ptr_off;
29964
29965 if (ptr_regno == 12)
29966 sp_adjust = 0;
29967 if (!ptr_set_up)
29968 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
29969
29970 /* Need to adjust r11 (r12) if we saved any FPRs. */
29971 if (end_save + frame_off != 0)
29972 {
29973 rtx offset = GEN_INT (end_save + frame_off);
29974
29975 if (ptr_set_up)
29976 frame_off = -end_save;
29977 else
29978 NOT_INUSE (ptr_regno);
29979 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
29980 }
29981 else if (!ptr_set_up)
29982 {
29983 NOT_INUSE (ptr_regno);
29984 emit_move_insn (ptr_reg, frame_reg_rtx);
29985 }
29986 ptr_off = -end_save;
29987 insn = rs6000_emit_savres_rtx (info, ptr_reg,
29988 info->gp_save_offset + ptr_off,
29989 info->lr_save_offset + ptr_off,
29990 reg_mode, sel);
29991 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
29992 NULL_RTX, NULL_RTX);
29993 if (lr)
29994 END_USE (0);
29995 }
29996 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
29997 {
29998 rtvec p;
29999 int i;
30000 p = rtvec_alloc (32 - info->first_gp_reg_save);
30001 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30002 RTVEC_ELT (p, i)
30003 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
30004 frame_reg_rtx,
30005 info->gp_save_offset + frame_off + reg_size * i);
30006 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30007 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30008 NULL_RTX, NULL_RTX);
30009 }
30010 else if (!WORLD_SAVE_P (info))
30011 {
30012 int offset = info->gp_save_offset + frame_off;
30013 for (int i = info->first_gp_reg_save; i < 32; i++)
30014 {
30015 if (rs6000_reg_live_or_pic_offset_p (i)
30016 && !cfun->machine->gpr_is_wrapped_separately[i])
30017 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
30018 sp_off - frame_off);
30019
30020 offset += reg_size;
30021 }
30022 }
30023
30024 if (crtl->calls_eh_return)
30025 {
30026 unsigned int i;
30027 rtvec p;
30028
30029 for (i = 0; ; ++i)
30030 {
30031 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30032 if (regno == INVALID_REGNUM)
30033 break;
30034 }
30035
30036 p = rtvec_alloc (i);
30037
30038 for (i = 0; ; ++i)
30039 {
30040 unsigned int regno = EH_RETURN_DATA_REGNO (i);
30041 if (regno == INVALID_REGNUM)
30042 break;
30043
30044 rtx set
30045 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
30046 sp_reg_rtx,
30047 info->ehrd_offset + sp_off + reg_size * (int) i);
30048 RTVEC_ELT (p, i) = set;
30049 RTX_FRAME_RELATED_P (set) = 1;
30050 }
30051
30052 insn = emit_insn (gen_blockage ());
30053 RTX_FRAME_RELATED_P (insn) = 1;
30054 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
30055 }
30056
30057 /* In AIX ABI we need to make sure r2 is really saved. */
30058 if (TARGET_AIX && crtl->calls_eh_return)
30059 {
30060 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
30061 rtx join_insn, note;
30062 rtx_insn *save_insn;
30063 long toc_restore_insn;
30064
30065 tmp_reg = gen_rtx_REG (Pmode, 11);
30066 tmp_reg_si = gen_rtx_REG (SImode, 11);
30067 if (using_static_chain_p)
30068 {
30069 START_USE (0);
30070 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
30071 }
30072 else
30073 START_USE (11);
30074 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
30075 /* Peek at instruction to which this function returns. If it's
30076 restoring r2, then we know we've already saved r2. We can't
30077 unconditionally save r2 because the value we have will already
30078 be updated if we arrived at this function via a plt call or
30079 toc adjusting stub. */
30080 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
30081 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
30082 + RS6000_TOC_SAVE_SLOT);
30083 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
30084 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
30085 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
30086 validate_condition_mode (EQ, CCUNSmode);
30087 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
30088 emit_insn (gen_rtx_SET (compare_result,
30089 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
30090 toc_save_done = gen_label_rtx ();
30091 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30092 gen_rtx_EQ (VOIDmode, compare_result,
30093 const0_rtx),
30094 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
30095 pc_rtx);
30096 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30097 JUMP_LABEL (jump) = toc_save_done;
30098 LABEL_NUSES (toc_save_done) += 1;
30099
30100 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
30101 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
30102 sp_off - frame_off);
30103
30104 emit_label (toc_save_done);
30105
30106 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
30107 have a CFG that has different saves along different paths.
30108 Move the note to a dummy blockage insn, which describes that
30109 R2 is unconditionally saved after the label. */
30110 /* ??? An alternate representation might be a special insn pattern
30111 containing both the branch and the store. That might let the
30112 code that minimizes the number of DW_CFA_advance opcodes better
30113 freedom in placing the annotations. */
30114 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
30115 if (note)
30116 remove_note (save_insn, note);
30117 else
30118 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
30119 copy_rtx (PATTERN (save_insn)), NULL_RTX);
30120 RTX_FRAME_RELATED_P (save_insn) = 0;
30121
30122 join_insn = emit_insn (gen_blockage ());
30123 REG_NOTES (join_insn) = note;
30124 RTX_FRAME_RELATED_P (join_insn) = 1;
30125
30126 if (using_static_chain_p)
30127 {
30128 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
30129 END_USE (0);
30130 }
30131 else
30132 END_USE (11);
30133 }
30134
30135 /* Save CR if we use any that must be preserved. */
30136 if (!WORLD_SAVE_P (info) && info->cr_save_p)
30137 {
30138 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
30139 GEN_INT (info->cr_save_offset + frame_off));
30140 rtx mem = gen_frame_mem (SImode, addr);
30141
30142 /* If we didn't copy cr before, do so now using r0. */
30143 if (cr_save_rtx == NULL_RTX)
30144 {
30145 START_USE (0);
30146 cr_save_rtx = gen_rtx_REG (SImode, 0);
30147 rs6000_emit_move_from_cr (cr_save_rtx);
30148 }
30149
30150 /* Saving CR requires a two-instruction sequence: one instruction
30151 to move the CR to a general-purpose register, and a second
30152 instruction that stores the GPR to memory.
30153
30154 We do not emit any DWARF CFI records for the first of these,
30155 because we cannot properly represent the fact that CR is saved in
30156 a register. One reason is that we cannot express that multiple
30157 CR fields are saved; another reason is that on 64-bit, the size
30158 of the CR register in DWARF (4 bytes) differs from the size of
30159 a general-purpose register.
30160
30161 This means if any intervening instruction were to clobber one of
30162 the call-saved CR fields, we'd have incorrect CFI. To prevent
30163 this from happening, we mark the store to memory as a use of
30164 those CR fields, which prevents any such instruction from being
30165 scheduled in between the two instructions. */
30166 rtx crsave_v[9];
30167 int n_crsave = 0;
30168 int i;
30169
30170 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
30171 for (i = 0; i < 8; i++)
30172 if (save_reg_p (CR0_REGNO + i))
30173 crsave_v[n_crsave++]
30174 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30175
30176 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
30177 gen_rtvec_v (n_crsave, crsave_v)));
30178 END_USE (REGNO (cr_save_rtx));
30179
30180 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
30181 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
30182 so we need to construct a frame expression manually. */
30183 RTX_FRAME_RELATED_P (insn) = 1;
30184
30185 /* Update address to be stack-pointer relative, like
30186 rs6000_frame_related would do. */
30187 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
30188 GEN_INT (info->cr_save_offset + sp_off));
30189 mem = gen_frame_mem (SImode, addr);
30190
30191 if (DEFAULT_ABI == ABI_ELFv2)
30192 {
30193 /* In the ELFv2 ABI we generate separate CFI records for each
30194 CR field that was actually saved. They all point to the
30195 same 32-bit stack slot. */
30196 rtx crframe[8];
30197 int n_crframe = 0;
30198
30199 for (i = 0; i < 8; i++)
30200 if (save_reg_p (CR0_REGNO + i))
30201 {
30202 crframe[n_crframe]
30203 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
30204
30205 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
30206 n_crframe++;
30207 }
30208
30209 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30210 gen_rtx_PARALLEL (VOIDmode,
30211 gen_rtvec_v (n_crframe, crframe)));
30212 }
30213 else
30214 {
30215 /* In other ABIs, by convention, we use a single CR regnum to
30216 represent the fact that all call-saved CR fields are saved.
30217 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
30218 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
30219 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
30220 }
30221 }
30222
30223 /* In the ELFv2 ABI we need to save all call-saved CR fields into
30224 *separate* slots if the routine calls __builtin_eh_return, so
30225 that they can be independently restored by the unwinder. */
30226 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
30227 {
30228 int i, cr_off = info->ehcr_offset;
30229 rtx crsave;
30230
30231 /* ??? We might get better performance by using multiple mfocrf
30232 instructions. */
30233 crsave = gen_rtx_REG (SImode, 0);
30234 emit_insn (gen_movesi_from_cr (crsave));
30235
30236 for (i = 0; i < 8; i++)
30237 if (!call_used_regs[CR0_REGNO + i])
30238 {
30239 rtvec p = rtvec_alloc (2);
30240 RTVEC_ELT (p, 0)
30241 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
30242 RTVEC_ELT (p, 1)
30243 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
30244
30245 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30246
30247 RTX_FRAME_RELATED_P (insn) = 1;
30248 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
30249 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
30250 sp_reg_rtx, cr_off + sp_off));
30251
30252 cr_off += reg_size;
30253 }
30254 }
30255
30256 /* Update stack and set back pointer unless this is V.4,
30257 for which it was done previously. */
30258 if (!WORLD_SAVE_P (info) && info->push_p
30259 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
30260 {
30261 rtx ptr_reg = NULL;
30262 int ptr_off = 0;
30263
30264 /* If saving altivec regs we need to be able to address all save
30265 locations using a 16-bit offset. */
30266 if ((strategy & SAVE_INLINE_VRS) == 0
30267 || (info->altivec_size != 0
30268 && (info->altivec_save_offset + info->altivec_size - 16
30269 + info->total_size - frame_off) > 32767)
30270 || (info->vrsave_size != 0
30271 && (info->vrsave_save_offset
30272 + info->total_size - frame_off) > 32767))
30273 {
30274 int sel = SAVRES_SAVE | SAVRES_VR;
30275 unsigned ptr_regno = ptr_regno_for_savres (sel);
30276
30277 if (using_static_chain_p
30278 && ptr_regno == STATIC_CHAIN_REGNUM)
30279 ptr_regno = 12;
30280 if (REGNO (frame_reg_rtx) != ptr_regno)
30281 START_USE (ptr_regno);
30282 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
30283 frame_reg_rtx = ptr_reg;
30284 ptr_off = info->altivec_save_offset + info->altivec_size;
30285 frame_off = -ptr_off;
30286 }
30287 else if (REGNO (frame_reg_rtx) == 1)
30288 frame_off = info->total_size;
30289 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
30290 ptr_reg, ptr_off);
30291 if (REGNO (frame_reg_rtx) == 12)
30292 sp_adjust = 0;
30293 sp_off = info->total_size;
30294 if (frame_reg_rtx != sp_reg_rtx)
30295 rs6000_emit_stack_tie (frame_reg_rtx, false);
30296 }
30297
30298 /* Set frame pointer, if needed. */
30299 if (frame_pointer_needed)
30300 {
30301 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
30302 sp_reg_rtx);
30303 RTX_FRAME_RELATED_P (insn) = 1;
30304 }
30305
30306 /* Save AltiVec registers if needed. Save here because the red zone does
30307 not always include AltiVec registers. */
30308 if (!WORLD_SAVE_P (info)
30309 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
30310 {
30311 int end_save = info->altivec_save_offset + info->altivec_size;
30312 int ptr_off;
30313 /* Oddly, the vector save/restore functions point r0 at the end
30314 of the save area, then use r11 or r12 to load offsets for
30315 [reg+reg] addressing. */
30316 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
30317 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
30318 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
30319
30320 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
30321 NOT_INUSE (0);
30322 if (scratch_regno == 12)
30323 sp_adjust = 0;
30324 if (end_save + frame_off != 0)
30325 {
30326 rtx offset = GEN_INT (end_save + frame_off);
30327
30328 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
30329 }
30330 else
30331 emit_move_insn (ptr_reg, frame_reg_rtx);
30332
30333 ptr_off = -end_save;
30334 insn = rs6000_emit_savres_rtx (info, scratch_reg,
30335 info->altivec_save_offset + ptr_off,
30336 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
30337 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
30338 NULL_RTX, NULL_RTX);
30339 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
30340 {
30341 /* The oddity mentioned above clobbered our frame reg. */
30342 emit_move_insn (frame_reg_rtx, ptr_reg);
30343 frame_off = ptr_off;
30344 }
30345 }
30346 else if (!WORLD_SAVE_P (info)
30347 && info->altivec_size != 0)
30348 {
30349 int i;
30350
30351 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
30352 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
30353 {
30354 rtx areg, savereg, mem;
30355 HOST_WIDE_INT offset;
30356
30357 offset = (info->altivec_save_offset + frame_off
30358 + 16 * (i - info->first_altivec_reg_save));
30359
30360 savereg = gen_rtx_REG (V4SImode, i);
30361
30362 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
30363 {
30364 mem = gen_frame_mem (V4SImode,
30365 gen_rtx_PLUS (Pmode, frame_reg_rtx,
30366 GEN_INT (offset)));
30367 insn = emit_insn (gen_rtx_SET (mem, savereg));
30368 areg = NULL_RTX;
30369 }
30370 else
30371 {
30372 NOT_INUSE (0);
30373 areg = gen_rtx_REG (Pmode, 0);
30374 emit_move_insn (areg, GEN_INT (offset));
30375
30376 /* AltiVec addressing mode is [reg+reg]. */
30377 mem = gen_frame_mem (V4SImode,
30378 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
30379
30380 /* Rather than emitting a generic move, force use of the stvx
30381 instruction, which we always want on ISA 2.07 (power8) systems.
30382 In particular we don't want xxpermdi/stxvd2x for little
30383 endian. */
30384 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
30385 }
30386
30387 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
30388 areg, GEN_INT (offset));
30389 }
30390 }
30391
30392 /* VRSAVE is a bit vector representing which AltiVec registers
30393 are used. The OS uses this to determine which vector
30394 registers to save on a context switch. We need to save
30395 VRSAVE on the stack frame, add whatever AltiVec registers we
30396 used in this function, and do the corresponding magic in the
30397 epilogue. */
30398
30399 if (!WORLD_SAVE_P (info)
30400 && info->vrsave_size != 0)
30401 {
30402 rtx reg, vrsave;
30403 int offset;
30404 int save_regno;
30405
30406 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
30407 be using r12 as frame_reg_rtx and r11 as the static chain
30408 pointer for nested functions. */
30409 save_regno = 12;
30410 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30411 && !using_static_chain_p)
30412 save_regno = 11;
30413 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
30414 {
30415 save_regno = 11;
30416 if (using_static_chain_p)
30417 save_regno = 0;
30418 }
30419
30420 NOT_INUSE (save_regno);
30421 reg = gen_rtx_REG (SImode, save_regno);
30422 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
30423 if (TARGET_MACHO)
30424 emit_insn (gen_get_vrsave_internal (reg));
30425 else
30426 emit_insn (gen_rtx_SET (reg, vrsave));
30427
30428 /* Save VRSAVE. */
30429 offset = info->vrsave_save_offset + frame_off;
30430 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
30431
30432 /* Include the registers in the mask. */
30433 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
30434
30435 insn = emit_insn (generate_set_vrsave (reg, info, 0));
30436 }
30437
30438 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
30439 if (!TARGET_SINGLE_PIC_BASE
30440 && ((TARGET_TOC && TARGET_MINIMAL_TOC
30441 && !constant_pool_empty_p ())
30442 || (DEFAULT_ABI == ABI_V4
30443 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
30444 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
30445 {
30446 /* If emit_load_toc_table will use the link register, we need to save
30447 it. We use R12 for this purpose because emit_load_toc_table
30448 can use register 0. This allows us to use a plain 'blr' to return
30449 from the procedure more often. */
30450 int save_LR_around_toc_setup = (TARGET_ELF
30451 && DEFAULT_ABI == ABI_V4
30452 && flag_pic
30453 && ! info->lr_save_p
30454 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
30455 if (save_LR_around_toc_setup)
30456 {
30457 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30458 rtx tmp = gen_rtx_REG (Pmode, 12);
30459
30460 sp_adjust = 0;
30461 insn = emit_move_insn (tmp, lr);
30462 RTX_FRAME_RELATED_P (insn) = 1;
30463
30464 rs6000_emit_load_toc_table (TRUE);
30465
30466 insn = emit_move_insn (lr, tmp);
30467 add_reg_note (insn, REG_CFA_RESTORE, lr);
30468 RTX_FRAME_RELATED_P (insn) = 1;
30469 }
30470 else
30471 rs6000_emit_load_toc_table (TRUE);
30472 }
30473
30474 #if TARGET_MACHO
30475 if (!TARGET_SINGLE_PIC_BASE
30476 && DEFAULT_ABI == ABI_DARWIN
30477 && flag_pic && crtl->uses_pic_offset_table)
30478 {
30479 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30480 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
30481
30482 /* Save and restore LR locally around this call (in R0). */
30483 if (!info->lr_save_p)
30484 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
30485
30486 emit_insn (gen_load_macho_picbase (src));
30487
30488 emit_move_insn (gen_rtx_REG (Pmode,
30489 RS6000_PIC_OFFSET_TABLE_REGNUM),
30490 lr);
30491
30492 if (!info->lr_save_p)
30493 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
30494 }
30495 #endif
30496
30497 /* If we need to, save the TOC register after doing the stack setup.
30498 Do not emit eh frame info for this save. The unwinder wants info,
30499 conceptually attached to instructions in this function, about
30500 register values in the caller of this function. This R2 may have
30501 already been changed from the value in the caller.
30502 We don't attempt to write accurate DWARF EH frame info for R2
30503 because code emitted by gcc for a (non-pointer) function call
30504 doesn't save and restore R2. Instead, R2 is managed out-of-line
30505 by a linker generated plt call stub when the function resides in
30506 a shared library. This behavior is costly to describe in DWARF,
30507 both in terms of the size of DWARF info and the time taken in the
30508 unwinder to interpret it. R2 changes, apart from the
30509 calls_eh_return case earlier in this function, are handled by
30510 linux-unwind.h frob_update_context. */
30511 if (rs6000_save_toc_in_prologue_p ())
30512 {
30513 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
30514 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
30515 }
30516
30517 if (using_split_stack && split_stack_arg_pointer_used_p ())
30518 {
30519 /* Set up the arg pointer (r12) for -fsplit-stack code. If
30520 __morestack was called, it left the arg pointer to the old
30521 stack in r29. Otherwise, the arg pointer is the top of the
30522 current frame. */
30523 cfun->machine->split_stack_argp_used = true;
30524 if (sp_adjust)
30525 {
30526 rtx r12 = gen_rtx_REG (Pmode, 12);
30527 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
30528 emit_insn_before (set_r12, sp_adjust);
30529 }
30530 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
30531 {
30532 rtx r12 = gen_rtx_REG (Pmode, 12);
30533 if (frame_off == 0)
30534 emit_move_insn (r12, frame_reg_rtx);
30535 else
30536 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
30537 }
30538 if (info->push_p)
30539 {
30540 rtx r12 = gen_rtx_REG (Pmode, 12);
30541 rtx r29 = gen_rtx_REG (Pmode, 29);
30542 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
30543 rtx not_more = gen_label_rtx ();
30544 rtx jump;
30545
30546 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
30547 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
30548 gen_rtx_LABEL_REF (VOIDmode, not_more),
30549 pc_rtx);
30550 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
30551 JUMP_LABEL (jump) = not_more;
30552 LABEL_NUSES (not_more) += 1;
30553 emit_move_insn (r12, r29);
30554 emit_label (not_more);
30555 }
30556 }
30557 }
30558
30559 /* Output .extern statements for the save/restore routines we use. */
30560
30561 static void
30562 rs6000_output_savres_externs (FILE *file)
30563 {
30564 rs6000_stack_t *info = rs6000_stack_info ();
30565
30566 if (TARGET_DEBUG_STACK)
30567 debug_stack_info (info);
30568
30569 /* Write .extern for any function we will call to save and restore
30570 fp values. */
30571 if (info->first_fp_reg_save < 64
30572 && !TARGET_MACHO
30573 && !TARGET_ELF)
30574 {
30575 char *name;
30576 int regno = info->first_fp_reg_save - 32;
30577
30578 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
30579 {
30580 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
30581 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
30582 name = rs6000_savres_routine_name (info, regno, sel);
30583 fprintf (file, "\t.extern %s\n", name);
30584 }
30585 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
30586 {
30587 bool lr = (info->savres_strategy
30588 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
30589 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
30590 name = rs6000_savres_routine_name (info, regno, sel);
30591 fprintf (file, "\t.extern %s\n", name);
30592 }
30593 }
30594 }
30595
30596 /* Write function prologue. */
30597
30598 static void
30599 rs6000_output_function_prologue (FILE *file)
30600 {
30601 if (!cfun->is_thunk)
30602 rs6000_output_savres_externs (file);
30603
30604 /* ELFv2 ABI r2 setup code and local entry point. This must follow
30605 immediately after the global entry point label. */
30606 if (rs6000_global_entry_point_needed_p ())
30607 {
30608 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
30609
30610 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
30611
30612 if (TARGET_CMODEL != CMODEL_LARGE)
30613 {
30614 /* In the small and medium code models, we assume the TOC is less
30615 2 GB away from the text section, so it can be computed via the
30616 following two-instruction sequence. */
30617 char buf[256];
30618
30619 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30620 fprintf (file, "0:\taddis 2,12,.TOC.-");
30621 assemble_name (file, buf);
30622 fprintf (file, "@ha\n");
30623 fprintf (file, "\taddi 2,2,.TOC.-");
30624 assemble_name (file, buf);
30625 fprintf (file, "@l\n");
30626 }
30627 else
30628 {
30629 /* In the large code model, we allow arbitrary offsets between the
30630 TOC and the text section, so we have to load the offset from
30631 memory. The data field is emitted directly before the global
30632 entry point in rs6000_elf_declare_function_name. */
30633 char buf[256];
30634
30635 #ifdef HAVE_AS_ENTRY_MARKERS
30636 /* If supported by the linker, emit a marker relocation. If the
30637 total code size of the final executable or shared library
30638 happens to fit into 2 GB after all, the linker will replace
30639 this code sequence with the sequence for the small or medium
30640 code model. */
30641 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
30642 #endif
30643 fprintf (file, "\tld 2,");
30644 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
30645 assemble_name (file, buf);
30646 fprintf (file, "-");
30647 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30648 assemble_name (file, buf);
30649 fprintf (file, "(12)\n");
30650 fprintf (file, "\tadd 2,2,12\n");
30651 }
30652
30653 fputs ("\t.localentry\t", file);
30654 assemble_name (file, name);
30655 fputs (",.-", file);
30656 assemble_name (file, name);
30657 fputs ("\n", file);
30658 }
30659
30660 /* Output -mprofile-kernel code. This needs to be done here instead of
30661 in output_function_profile since it must go after the ELFv2 ABI
30662 local entry point. */
30663 if (TARGET_PROFILE_KERNEL && crtl->profile)
30664 {
30665 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
30666 gcc_assert (!TARGET_32BIT);
30667
30668 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
30669
30670 /* In the ELFv2 ABI we have no compiler stack word. It must be
30671 the resposibility of _mcount to preserve the static chain
30672 register if required. */
30673 if (DEFAULT_ABI != ABI_ELFv2
30674 && cfun->static_chain_decl != NULL)
30675 {
30676 asm_fprintf (file, "\tstd %s,24(%s)\n",
30677 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30678 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30679 asm_fprintf (file, "\tld %s,24(%s)\n",
30680 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
30681 }
30682 else
30683 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
30684 }
30685
30686 rs6000_pic_labelno++;
30687 }
30688
30689 /* -mprofile-kernel code calls mcount before the function prolog,
30690 so a profiled leaf function should stay a leaf function. */
30691 static bool
30692 rs6000_keep_leaf_when_profiled ()
30693 {
30694 return TARGET_PROFILE_KERNEL;
30695 }
30696
30697 /* Non-zero if vmx regs are restored before the frame pop, zero if
30698 we restore after the pop when possible. */
30699 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
30700
30701 /* Restoring cr is a two step process: loading a reg from the frame
30702 save, then moving the reg to cr. For ABI_V4 we must let the
30703 unwinder know that the stack location is no longer valid at or
30704 before the stack deallocation, but we can't emit a cfa_restore for
30705 cr at the stack deallocation like we do for other registers.
30706 The trouble is that it is possible for the move to cr to be
30707 scheduled after the stack deallocation. So say exactly where cr
30708 is located on each of the two insns. */
30709
30710 static rtx
30711 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
30712 {
30713 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
30714 rtx reg = gen_rtx_REG (SImode, regno);
30715 rtx_insn *insn = emit_move_insn (reg, mem);
30716
30717 if (!exit_func && DEFAULT_ABI == ABI_V4)
30718 {
30719 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30720 rtx set = gen_rtx_SET (reg, cr);
30721
30722 add_reg_note (insn, REG_CFA_REGISTER, set);
30723 RTX_FRAME_RELATED_P (insn) = 1;
30724 }
30725 return reg;
30726 }
30727
30728 /* Reload CR from REG. */
30729
30730 static void
30731 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
30732 {
30733 int count = 0;
30734 int i;
30735
30736 if (using_mfcr_multiple)
30737 {
30738 for (i = 0; i < 8; i++)
30739 if (save_reg_p (CR0_REGNO + i))
30740 count++;
30741 gcc_assert (count);
30742 }
30743
30744 if (using_mfcr_multiple && count > 1)
30745 {
30746 rtx_insn *insn;
30747 rtvec p;
30748 int ndx;
30749
30750 p = rtvec_alloc (count);
30751
30752 ndx = 0;
30753 for (i = 0; i < 8; i++)
30754 if (save_reg_p (CR0_REGNO + i))
30755 {
30756 rtvec r = rtvec_alloc (2);
30757 RTVEC_ELT (r, 0) = reg;
30758 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
30759 RTVEC_ELT (p, ndx) =
30760 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
30761 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
30762 ndx++;
30763 }
30764 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
30765 gcc_assert (ndx == count);
30766
30767 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30768 CR field separately. */
30769 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30770 {
30771 for (i = 0; i < 8; i++)
30772 if (save_reg_p (CR0_REGNO + i))
30773 add_reg_note (insn, REG_CFA_RESTORE,
30774 gen_rtx_REG (SImode, CR0_REGNO + i));
30775
30776 RTX_FRAME_RELATED_P (insn) = 1;
30777 }
30778 }
30779 else
30780 for (i = 0; i < 8; i++)
30781 if (save_reg_p (CR0_REGNO + i))
30782 {
30783 rtx insn = emit_insn (gen_movsi_to_cr_one
30784 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
30785
30786 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
30787 CR field separately, attached to the insn that in fact
30788 restores this particular CR field. */
30789 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
30790 {
30791 add_reg_note (insn, REG_CFA_RESTORE,
30792 gen_rtx_REG (SImode, CR0_REGNO + i));
30793
30794 RTX_FRAME_RELATED_P (insn) = 1;
30795 }
30796 }
30797
30798 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
30799 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
30800 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
30801 {
30802 rtx_insn *insn = get_last_insn ();
30803 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
30804
30805 add_reg_note (insn, REG_CFA_RESTORE, cr);
30806 RTX_FRAME_RELATED_P (insn) = 1;
30807 }
30808 }
30809
30810 /* Like cr, the move to lr instruction can be scheduled after the
30811 stack deallocation, but unlike cr, its stack frame save is still
30812 valid. So we only need to emit the cfa_restore on the correct
30813 instruction. */
30814
30815 static void
30816 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
30817 {
30818 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
30819 rtx reg = gen_rtx_REG (Pmode, regno);
30820
30821 emit_move_insn (reg, mem);
30822 }
30823
30824 static void
30825 restore_saved_lr (int regno, bool exit_func)
30826 {
30827 rtx reg = gen_rtx_REG (Pmode, regno);
30828 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
30829 rtx_insn *insn = emit_move_insn (lr, reg);
30830
30831 if (!exit_func && flag_shrink_wrap)
30832 {
30833 add_reg_note (insn, REG_CFA_RESTORE, lr);
30834 RTX_FRAME_RELATED_P (insn) = 1;
30835 }
30836 }
30837
30838 static rtx
30839 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
30840 {
30841 if (DEFAULT_ABI == ABI_ELFv2)
30842 {
30843 int i;
30844 for (i = 0; i < 8; i++)
30845 if (save_reg_p (CR0_REGNO + i))
30846 {
30847 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
30848 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
30849 cfa_restores);
30850 }
30851 }
30852 else if (info->cr_save_p)
30853 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30854 gen_rtx_REG (SImode, CR2_REGNO),
30855 cfa_restores);
30856
30857 if (info->lr_save_p)
30858 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30859 gen_rtx_REG (Pmode, LR_REGNO),
30860 cfa_restores);
30861 return cfa_restores;
30862 }
30863
30864 /* Return true if OFFSET from stack pointer can be clobbered by signals.
30865 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
30866 below stack pointer not cloberred by signals. */
30867
30868 static inline bool
30869 offset_below_red_zone_p (HOST_WIDE_INT offset)
30870 {
30871 return offset < (DEFAULT_ABI == ABI_V4
30872 ? 0
30873 : TARGET_32BIT ? -220 : -288);
30874 }
30875
30876 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
30877
30878 static void
30879 emit_cfa_restores (rtx cfa_restores)
30880 {
30881 rtx_insn *insn = get_last_insn ();
30882 rtx *loc = &REG_NOTES (insn);
30883
30884 while (*loc)
30885 loc = &XEXP (*loc, 1);
30886 *loc = cfa_restores;
30887 RTX_FRAME_RELATED_P (insn) = 1;
30888 }
30889
30890 /* Emit function epilogue as insns. */
30891
30892 void
30893 rs6000_emit_epilogue (int sibcall)
30894 {
30895 rs6000_stack_t *info;
30896 int restoring_GPRs_inline;
30897 int restoring_FPRs_inline;
30898 int using_load_multiple;
30899 int using_mtcr_multiple;
30900 int use_backchain_to_restore_sp;
30901 int restore_lr;
30902 int strategy;
30903 HOST_WIDE_INT frame_off = 0;
30904 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
30905 rtx frame_reg_rtx = sp_reg_rtx;
30906 rtx cfa_restores = NULL_RTX;
30907 rtx insn;
30908 rtx cr_save_reg = NULL_RTX;
30909 machine_mode reg_mode = Pmode;
30910 int reg_size = TARGET_32BIT ? 4 : 8;
30911 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
30912 ? DFmode : SFmode;
30913 int fp_reg_size = 8;
30914 int i;
30915 bool exit_func;
30916 unsigned ptr_regno;
30917
30918 info = rs6000_stack_info ();
30919
30920 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
30921 {
30922 reg_mode = V2SImode;
30923 reg_size = 8;
30924 }
30925
30926 strategy = info->savres_strategy;
30927 using_load_multiple = strategy & REST_MULTIPLE;
30928 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
30929 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
30930 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
30931 || rs6000_cpu == PROCESSOR_PPC603
30932 || rs6000_cpu == PROCESSOR_PPC750
30933 || optimize_size);
30934 /* Restore via the backchain when we have a large frame, since this
30935 is more efficient than an addis, addi pair. The second condition
30936 here will not trigger at the moment; We don't actually need a
30937 frame pointer for alloca, but the generic parts of the compiler
30938 give us one anyway. */
30939 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
30940 ? info->lr_save_offset
30941 : 0) > 32767
30942 || (cfun->calls_alloca
30943 && !frame_pointer_needed));
30944 restore_lr = (info->lr_save_p
30945 && (restoring_FPRs_inline
30946 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
30947 && (restoring_GPRs_inline
30948 || info->first_fp_reg_save < 64)
30949 && !cfun->machine->lr_is_wrapped_separately);
30950
30951
30952 if (WORLD_SAVE_P (info))
30953 {
30954 int i, j;
30955 char rname[30];
30956 const char *alloc_rname;
30957 rtvec p;
30958
30959 /* eh_rest_world_r10 will return to the location saved in the LR
30960 stack slot (which is not likely to be our caller.)
30961 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
30962 rest_world is similar, except any R10 parameter is ignored.
30963 The exception-handling stuff that was here in 2.95 is no
30964 longer necessary. */
30965
30966 p = rtvec_alloc (9
30967 + 32 - info->first_gp_reg_save
30968 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
30969 + 63 + 1 - info->first_fp_reg_save);
30970
30971 strcpy (rname, ((crtl->calls_eh_return) ?
30972 "*eh_rest_world_r10" : "*rest_world"));
30973 alloc_rname = ggc_strdup (rname);
30974
30975 j = 0;
30976 RTVEC_ELT (p, j++) = ret_rtx;
30977 RTVEC_ELT (p, j++)
30978 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
30979 /* The instruction pattern requires a clobber here;
30980 it is shared with the restVEC helper. */
30981 RTVEC_ELT (p, j++)
30982 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
30983
30984 {
30985 /* CR register traditionally saved as CR2. */
30986 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
30987 RTVEC_ELT (p, j++)
30988 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
30989 if (flag_shrink_wrap)
30990 {
30991 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
30992 gen_rtx_REG (Pmode, LR_REGNO),
30993 cfa_restores);
30994 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
30995 }
30996 }
30997
30998 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
30999 {
31000 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31001 RTVEC_ELT (p, j++)
31002 = gen_frame_load (reg,
31003 frame_reg_rtx, info->gp_save_offset + reg_size * i);
31004 if (flag_shrink_wrap)
31005 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31006 }
31007 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
31008 {
31009 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
31010 RTVEC_ELT (p, j++)
31011 = gen_frame_load (reg,
31012 frame_reg_rtx, info->altivec_save_offset + 16 * i);
31013 if (flag_shrink_wrap)
31014 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31015 }
31016 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
31017 {
31018 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
31019 ? DFmode : SFmode),
31020 info->first_fp_reg_save + i);
31021 RTVEC_ELT (p, j++)
31022 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
31023 if (flag_shrink_wrap)
31024 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31025 }
31026 RTVEC_ELT (p, j++)
31027 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
31028 RTVEC_ELT (p, j++)
31029 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
31030 RTVEC_ELT (p, j++)
31031 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
31032 RTVEC_ELT (p, j++)
31033 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
31034 RTVEC_ELT (p, j++)
31035 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
31036 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31037
31038 if (flag_shrink_wrap)
31039 {
31040 REG_NOTES (insn) = cfa_restores;
31041 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31042 RTX_FRAME_RELATED_P (insn) = 1;
31043 }
31044 return;
31045 }
31046
31047 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
31048 if (info->push_p)
31049 frame_off = info->total_size;
31050
31051 /* Restore AltiVec registers if we must do so before adjusting the
31052 stack. */
31053 if (info->altivec_size != 0
31054 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31055 || (DEFAULT_ABI != ABI_V4
31056 && offset_below_red_zone_p (info->altivec_save_offset))))
31057 {
31058 int i;
31059 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31060
31061 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
31062 if (use_backchain_to_restore_sp)
31063 {
31064 int frame_regno = 11;
31065
31066 if ((strategy & REST_INLINE_VRS) == 0)
31067 {
31068 /* Of r11 and r12, select the one not clobbered by an
31069 out-of-line restore function for the frame register. */
31070 frame_regno = 11 + 12 - scratch_regno;
31071 }
31072 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
31073 emit_move_insn (frame_reg_rtx,
31074 gen_rtx_MEM (Pmode, sp_reg_rtx));
31075 frame_off = 0;
31076 }
31077 else if (frame_pointer_needed)
31078 frame_reg_rtx = hard_frame_pointer_rtx;
31079
31080 if ((strategy & REST_INLINE_VRS) == 0)
31081 {
31082 int end_save = info->altivec_save_offset + info->altivec_size;
31083 int ptr_off;
31084 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31085 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31086
31087 if (end_save + frame_off != 0)
31088 {
31089 rtx offset = GEN_INT (end_save + frame_off);
31090
31091 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31092 }
31093 else
31094 emit_move_insn (ptr_reg, frame_reg_rtx);
31095
31096 ptr_off = -end_save;
31097 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31098 info->altivec_save_offset + ptr_off,
31099 0, V4SImode, SAVRES_VR);
31100 }
31101 else
31102 {
31103 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31104 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31105 {
31106 rtx addr, areg, mem, insn;
31107 rtx reg = gen_rtx_REG (V4SImode, i);
31108 HOST_WIDE_INT offset
31109 = (info->altivec_save_offset + frame_off
31110 + 16 * (i - info->first_altivec_reg_save));
31111
31112 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31113 {
31114 mem = gen_frame_mem (V4SImode,
31115 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31116 GEN_INT (offset)));
31117 insn = gen_rtx_SET (reg, mem);
31118 }
31119 else
31120 {
31121 areg = gen_rtx_REG (Pmode, 0);
31122 emit_move_insn (areg, GEN_INT (offset));
31123
31124 /* AltiVec addressing mode is [reg+reg]. */
31125 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31126 mem = gen_frame_mem (V4SImode, addr);
31127
31128 /* Rather than emitting a generic move, force use of the
31129 lvx instruction, which we always want. In particular we
31130 don't want lxvd2x/xxpermdi for little endian. */
31131 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31132 }
31133
31134 (void) emit_insn (insn);
31135 }
31136 }
31137
31138 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31139 if (((strategy & REST_INLINE_VRS) == 0
31140 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31141 && (flag_shrink_wrap
31142 || (offset_below_red_zone_p
31143 (info->altivec_save_offset
31144 + 16 * (i - info->first_altivec_reg_save)))))
31145 {
31146 rtx reg = gen_rtx_REG (V4SImode, i);
31147 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31148 }
31149 }
31150
31151 /* Restore VRSAVE if we must do so before adjusting the stack. */
31152 if (info->vrsave_size != 0
31153 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31154 || (DEFAULT_ABI != ABI_V4
31155 && offset_below_red_zone_p (info->vrsave_save_offset))))
31156 {
31157 rtx reg;
31158
31159 if (frame_reg_rtx == sp_reg_rtx)
31160 {
31161 if (use_backchain_to_restore_sp)
31162 {
31163 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31164 emit_move_insn (frame_reg_rtx,
31165 gen_rtx_MEM (Pmode, sp_reg_rtx));
31166 frame_off = 0;
31167 }
31168 else if (frame_pointer_needed)
31169 frame_reg_rtx = hard_frame_pointer_rtx;
31170 }
31171
31172 reg = gen_rtx_REG (SImode, 12);
31173 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31174 info->vrsave_save_offset + frame_off));
31175
31176 emit_insn (generate_set_vrsave (reg, info, 1));
31177 }
31178
31179 insn = NULL_RTX;
31180 /* If we have a large stack frame, restore the old stack pointer
31181 using the backchain. */
31182 if (use_backchain_to_restore_sp)
31183 {
31184 if (frame_reg_rtx == sp_reg_rtx)
31185 {
31186 /* Under V.4, don't reset the stack pointer until after we're done
31187 loading the saved registers. */
31188 if (DEFAULT_ABI == ABI_V4)
31189 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31190
31191 insn = emit_move_insn (frame_reg_rtx,
31192 gen_rtx_MEM (Pmode, sp_reg_rtx));
31193 frame_off = 0;
31194 }
31195 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31196 && DEFAULT_ABI == ABI_V4)
31197 /* frame_reg_rtx has been set up by the altivec restore. */
31198 ;
31199 else
31200 {
31201 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
31202 frame_reg_rtx = sp_reg_rtx;
31203 }
31204 }
31205 /* If we have a frame pointer, we can restore the old stack pointer
31206 from it. */
31207 else if (frame_pointer_needed)
31208 {
31209 frame_reg_rtx = sp_reg_rtx;
31210 if (DEFAULT_ABI == ABI_V4)
31211 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31212 /* Prevent reordering memory accesses against stack pointer restore. */
31213 else if (cfun->calls_alloca
31214 || offset_below_red_zone_p (-info->total_size))
31215 rs6000_emit_stack_tie (frame_reg_rtx, true);
31216
31217 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
31218 GEN_INT (info->total_size)));
31219 frame_off = 0;
31220 }
31221 else if (info->push_p
31222 && DEFAULT_ABI != ABI_V4
31223 && !crtl->calls_eh_return)
31224 {
31225 /* Prevent reordering memory accesses against stack pointer restore. */
31226 if (cfun->calls_alloca
31227 || offset_below_red_zone_p (-info->total_size))
31228 rs6000_emit_stack_tie (frame_reg_rtx, false);
31229 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
31230 GEN_INT (info->total_size)));
31231 frame_off = 0;
31232 }
31233 if (insn && frame_reg_rtx == sp_reg_rtx)
31234 {
31235 if (cfa_restores)
31236 {
31237 REG_NOTES (insn) = cfa_restores;
31238 cfa_restores = NULL_RTX;
31239 }
31240 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31241 RTX_FRAME_RELATED_P (insn) = 1;
31242 }
31243
31244 /* Restore AltiVec registers if we have not done so already. */
31245 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31246 && info->altivec_size != 0
31247 && (DEFAULT_ABI == ABI_V4
31248 || !offset_below_red_zone_p (info->altivec_save_offset)))
31249 {
31250 int i;
31251
31252 if ((strategy & REST_INLINE_VRS) == 0)
31253 {
31254 int end_save = info->altivec_save_offset + info->altivec_size;
31255 int ptr_off;
31256 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
31257 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
31258 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
31259
31260 if (end_save + frame_off != 0)
31261 {
31262 rtx offset = GEN_INT (end_save + frame_off);
31263
31264 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
31265 }
31266 else
31267 emit_move_insn (ptr_reg, frame_reg_rtx);
31268
31269 ptr_off = -end_save;
31270 insn = rs6000_emit_savres_rtx (info, scratch_reg,
31271 info->altivec_save_offset + ptr_off,
31272 0, V4SImode, SAVRES_VR);
31273 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
31274 {
31275 /* Frame reg was clobbered by out-of-line save. Restore it
31276 from ptr_reg, and if we are calling out-of-line gpr or
31277 fpr restore set up the correct pointer and offset. */
31278 unsigned newptr_regno = 1;
31279 if (!restoring_GPRs_inline)
31280 {
31281 bool lr = info->gp_save_offset + info->gp_size == 0;
31282 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31283 newptr_regno = ptr_regno_for_savres (sel);
31284 end_save = info->gp_save_offset + info->gp_size;
31285 }
31286 else if (!restoring_FPRs_inline)
31287 {
31288 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
31289 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31290 newptr_regno = ptr_regno_for_savres (sel);
31291 end_save = info->fp_save_offset + info->fp_size;
31292 }
31293
31294 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
31295 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
31296
31297 if (end_save + ptr_off != 0)
31298 {
31299 rtx offset = GEN_INT (end_save + ptr_off);
31300
31301 frame_off = -end_save;
31302 if (TARGET_32BIT)
31303 emit_insn (gen_addsi3_carry (frame_reg_rtx,
31304 ptr_reg, offset));
31305 else
31306 emit_insn (gen_adddi3_carry (frame_reg_rtx,
31307 ptr_reg, offset));
31308 }
31309 else
31310 {
31311 frame_off = ptr_off;
31312 emit_move_insn (frame_reg_rtx, ptr_reg);
31313 }
31314 }
31315 }
31316 else
31317 {
31318 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31319 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
31320 {
31321 rtx addr, areg, mem, insn;
31322 rtx reg = gen_rtx_REG (V4SImode, i);
31323 HOST_WIDE_INT offset
31324 = (info->altivec_save_offset + frame_off
31325 + 16 * (i - info->first_altivec_reg_save));
31326
31327 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
31328 {
31329 mem = gen_frame_mem (V4SImode,
31330 gen_rtx_PLUS (Pmode, frame_reg_rtx,
31331 GEN_INT (offset)));
31332 insn = gen_rtx_SET (reg, mem);
31333 }
31334 else
31335 {
31336 areg = gen_rtx_REG (Pmode, 0);
31337 emit_move_insn (areg, GEN_INT (offset));
31338
31339 /* AltiVec addressing mode is [reg+reg]. */
31340 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
31341 mem = gen_frame_mem (V4SImode, addr);
31342
31343 /* Rather than emitting a generic move, force use of the
31344 lvx instruction, which we always want. In particular we
31345 don't want lxvd2x/xxpermdi for little endian. */
31346 insn = gen_altivec_lvx_v4si_internal (reg, mem);
31347 }
31348
31349 (void) emit_insn (insn);
31350 }
31351 }
31352
31353 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
31354 if (((strategy & REST_INLINE_VRS) == 0
31355 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
31356 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
31357 {
31358 rtx reg = gen_rtx_REG (V4SImode, i);
31359 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31360 }
31361 }
31362
31363 /* Restore VRSAVE if we have not done so already. */
31364 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
31365 && info->vrsave_size != 0
31366 && (DEFAULT_ABI == ABI_V4
31367 || !offset_below_red_zone_p (info->vrsave_save_offset)))
31368 {
31369 rtx reg;
31370
31371 reg = gen_rtx_REG (SImode, 12);
31372 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31373 info->vrsave_save_offset + frame_off));
31374
31375 emit_insn (generate_set_vrsave (reg, info, 1));
31376 }
31377
31378 /* If we exit by an out-of-line restore function on ABI_V4 then that
31379 function will deallocate the stack, so we don't need to worry
31380 about the unwinder restoring cr from an invalid stack frame
31381 location. */
31382 exit_func = (!restoring_FPRs_inline
31383 || (!restoring_GPRs_inline
31384 && info->first_fp_reg_save == 64));
31385
31386 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
31387 *separate* slots if the routine calls __builtin_eh_return, so
31388 that they can be independently restored by the unwinder. */
31389 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
31390 {
31391 int i, cr_off = info->ehcr_offset;
31392
31393 for (i = 0; i < 8; i++)
31394 if (!call_used_regs[CR0_REGNO + i])
31395 {
31396 rtx reg = gen_rtx_REG (SImode, 0);
31397 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31398 cr_off + frame_off));
31399
31400 insn = emit_insn (gen_movsi_to_cr_one
31401 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
31402
31403 if (!exit_func && flag_shrink_wrap)
31404 {
31405 add_reg_note (insn, REG_CFA_RESTORE,
31406 gen_rtx_REG (SImode, CR0_REGNO + i));
31407
31408 RTX_FRAME_RELATED_P (insn) = 1;
31409 }
31410
31411 cr_off += reg_size;
31412 }
31413 }
31414
31415 /* Get the old lr if we saved it. If we are restoring registers
31416 out-of-line, then the out-of-line routines can do this for us. */
31417 if (restore_lr && restoring_GPRs_inline)
31418 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31419
31420 /* Get the old cr if we saved it. */
31421 if (info->cr_save_p)
31422 {
31423 unsigned cr_save_regno = 12;
31424
31425 if (!restoring_GPRs_inline)
31426 {
31427 /* Ensure we don't use the register used by the out-of-line
31428 gpr register restore below. */
31429 bool lr = info->gp_save_offset + info->gp_size == 0;
31430 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
31431 int gpr_ptr_regno = ptr_regno_for_savres (sel);
31432
31433 if (gpr_ptr_regno == 12)
31434 cr_save_regno = 11;
31435 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
31436 }
31437 else if (REGNO (frame_reg_rtx) == 12)
31438 cr_save_regno = 11;
31439
31440 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
31441 info->cr_save_offset + frame_off,
31442 exit_func);
31443 }
31444
31445 /* Set LR here to try to overlap restores below. */
31446 if (restore_lr && restoring_GPRs_inline)
31447 restore_saved_lr (0, exit_func);
31448
31449 /* Load exception handler data registers, if needed. */
31450 if (crtl->calls_eh_return)
31451 {
31452 unsigned int i, regno;
31453
31454 if (TARGET_AIX)
31455 {
31456 rtx reg = gen_rtx_REG (reg_mode, 2);
31457 emit_insn (gen_frame_load (reg, frame_reg_rtx,
31458 frame_off + RS6000_TOC_SAVE_SLOT));
31459 }
31460
31461 for (i = 0; ; ++i)
31462 {
31463 rtx mem;
31464
31465 regno = EH_RETURN_DATA_REGNO (i);
31466 if (regno == INVALID_REGNUM)
31467 break;
31468
31469 /* Note: possible use of r0 here to address SPE regs. */
31470 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
31471 info->ehrd_offset + frame_off
31472 + reg_size * (int) i);
31473
31474 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
31475 }
31476 }
31477
31478 /* Restore GPRs. This is done as a PARALLEL if we are using
31479 the load-multiple instructions. */
31480 if (TARGET_SPE_ABI
31481 && info->spe_64bit_regs_used
31482 && info->first_gp_reg_save != 32)
31483 {
31484 /* Determine whether we can address all of the registers that need
31485 to be saved with an offset from frame_reg_rtx that fits in
31486 the small const field for SPE memory instructions. */
31487 int spe_regs_addressable
31488 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
31489 + reg_size * (32 - info->first_gp_reg_save - 1))
31490 && restoring_GPRs_inline);
31491
31492 if (!spe_regs_addressable)
31493 {
31494 int ool_adjust = 0;
31495 rtx old_frame_reg_rtx = frame_reg_rtx;
31496 /* Make r11 point to the start of the SPE save area. We worried about
31497 not clobbering it when we were saving registers in the prologue.
31498 There's no need to worry here because the static chain is passed
31499 anew to every function. */
31500
31501 if (!restoring_GPRs_inline)
31502 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
31503 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
31504 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
31505 GEN_INT (info->spe_gp_save_offset
31506 + frame_off
31507 - ool_adjust)));
31508 /* Keep the invariant that frame_reg_rtx + frame_off points
31509 at the top of the stack frame. */
31510 frame_off = -info->spe_gp_save_offset + ool_adjust;
31511 }
31512
31513 if (restoring_GPRs_inline)
31514 {
31515 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
31516
31517 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31518 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
31519 {
31520 rtx offset, addr, mem, reg;
31521
31522 /* We're doing all this to ensure that the immediate offset
31523 fits into the immediate field of 'evldd'. */
31524 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
31525
31526 offset = GEN_INT (spe_offset + reg_size * i);
31527 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
31528 mem = gen_rtx_MEM (V2SImode, addr);
31529 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
31530
31531 emit_move_insn (reg, mem);
31532 }
31533 }
31534 else
31535 rs6000_emit_savres_rtx (info, frame_reg_rtx,
31536 info->spe_gp_save_offset + frame_off,
31537 info->lr_save_offset + frame_off,
31538 reg_mode,
31539 SAVRES_GPR | SAVRES_LR);
31540 }
31541 else if (!restoring_GPRs_inline)
31542 {
31543 /* We are jumping to an out-of-line function. */
31544 rtx ptr_reg;
31545 int end_save = info->gp_save_offset + info->gp_size;
31546 bool can_use_exit = end_save == 0;
31547 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
31548 int ptr_off;
31549
31550 /* Emit stack reset code if we need it. */
31551 ptr_regno = ptr_regno_for_savres (sel);
31552 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
31553 if (can_use_exit)
31554 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31555 else if (end_save + frame_off != 0)
31556 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
31557 GEN_INT (end_save + frame_off)));
31558 else if (REGNO (frame_reg_rtx) != ptr_regno)
31559 emit_move_insn (ptr_reg, frame_reg_rtx);
31560 if (REGNO (frame_reg_rtx) == ptr_regno)
31561 frame_off = -end_save;
31562
31563 if (can_use_exit && info->cr_save_p)
31564 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
31565
31566 ptr_off = -end_save;
31567 rs6000_emit_savres_rtx (info, ptr_reg,
31568 info->gp_save_offset + ptr_off,
31569 info->lr_save_offset + ptr_off,
31570 reg_mode, sel);
31571 }
31572 else if (using_load_multiple)
31573 {
31574 rtvec p;
31575 p = rtvec_alloc (32 - info->first_gp_reg_save);
31576 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
31577 RTVEC_ELT (p, i)
31578 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
31579 frame_reg_rtx,
31580 info->gp_save_offset + frame_off + reg_size * i);
31581 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
31582 }
31583 else
31584 {
31585 int offset = info->gp_save_offset + frame_off;
31586 for (i = info->first_gp_reg_save; i < 32; i++)
31587 {
31588 if (rs6000_reg_live_or_pic_offset_p (i)
31589 && !cfun->machine->gpr_is_wrapped_separately[i])
31590 {
31591 rtx reg = gen_rtx_REG (reg_mode, i);
31592 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31593 }
31594
31595 offset += reg_size;
31596 }
31597 }
31598
31599 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31600 {
31601 /* If the frame pointer was used then we can't delay emitting
31602 a REG_CFA_DEF_CFA note. This must happen on the insn that
31603 restores the frame pointer, r31. We may have already emitted
31604 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
31605 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
31606 be harmless if emitted. */
31607 if (frame_pointer_needed)
31608 {
31609 insn = get_last_insn ();
31610 add_reg_note (insn, REG_CFA_DEF_CFA,
31611 plus_constant (Pmode, frame_reg_rtx, frame_off));
31612 RTX_FRAME_RELATED_P (insn) = 1;
31613 }
31614
31615 /* Set up cfa_restores. We always need these when
31616 shrink-wrapping. If not shrink-wrapping then we only need
31617 the cfa_restore when the stack location is no longer valid.
31618 The cfa_restores must be emitted on or before the insn that
31619 invalidates the stack, and of course must not be emitted
31620 before the insn that actually does the restore. The latter
31621 is why it is a bad idea to emit the cfa_restores as a group
31622 on the last instruction here that actually does a restore:
31623 That insn may be reordered with respect to others doing
31624 restores. */
31625 if (flag_shrink_wrap
31626 && !restoring_GPRs_inline
31627 && info->first_fp_reg_save == 64)
31628 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31629
31630 for (i = info->first_gp_reg_save; i < 32; i++)
31631 if (!restoring_GPRs_inline
31632 || using_load_multiple
31633 || rs6000_reg_live_or_pic_offset_p (i))
31634 {
31635 if (cfun->machine->gpr_is_wrapped_separately[i])
31636 continue;
31637
31638 rtx reg = gen_rtx_REG (reg_mode, i);
31639 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31640 }
31641 }
31642
31643 if (!restoring_GPRs_inline
31644 && info->first_fp_reg_save == 64)
31645 {
31646 /* We are jumping to an out-of-line function. */
31647 if (cfa_restores)
31648 emit_cfa_restores (cfa_restores);
31649 return;
31650 }
31651
31652 if (restore_lr && !restoring_GPRs_inline)
31653 {
31654 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
31655 restore_saved_lr (0, exit_func);
31656 }
31657
31658 /* Restore fpr's if we need to do it without calling a function. */
31659 if (restoring_FPRs_inline)
31660 {
31661 int offset = info->fp_save_offset + frame_off;
31662 for (i = info->first_fp_reg_save; i < 64; i++)
31663 {
31664 if (save_reg_p (i)
31665 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
31666 {
31667 rtx reg = gen_rtx_REG (fp_reg_mode, i);
31668 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
31669 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
31670 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
31671 cfa_restores);
31672 }
31673
31674 offset += fp_reg_size;
31675 }
31676 }
31677
31678 /* If we saved cr, restore it here. Just those that were used. */
31679 if (info->cr_save_p)
31680 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
31681
31682 /* If this is V.4, unwind the stack pointer after all of the loads
31683 have been done, or set up r11 if we are restoring fp out of line. */
31684 ptr_regno = 1;
31685 if (!restoring_FPRs_inline)
31686 {
31687 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31688 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
31689 ptr_regno = ptr_regno_for_savres (sel);
31690 }
31691
31692 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
31693 if (REGNO (frame_reg_rtx) == ptr_regno)
31694 frame_off = 0;
31695
31696 if (insn && restoring_FPRs_inline)
31697 {
31698 if (cfa_restores)
31699 {
31700 REG_NOTES (insn) = cfa_restores;
31701 cfa_restores = NULL_RTX;
31702 }
31703 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
31704 RTX_FRAME_RELATED_P (insn) = 1;
31705 }
31706
31707 if (crtl->calls_eh_return)
31708 {
31709 rtx sa = EH_RETURN_STACKADJ_RTX;
31710 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
31711 }
31712
31713 if (!sibcall && restoring_FPRs_inline)
31714 {
31715 if (cfa_restores)
31716 {
31717 /* We can't hang the cfa_restores off a simple return,
31718 since the shrink-wrap code sometimes uses an existing
31719 return. This means there might be a path from
31720 pre-prologue code to this return, and dwarf2cfi code
31721 wants the eh_frame unwinder state to be the same on
31722 all paths to any point. So we need to emit the
31723 cfa_restores before the return. For -m64 we really
31724 don't need epilogue cfa_restores at all, except for
31725 this irritating dwarf2cfi with shrink-wrap
31726 requirement; The stack red-zone means eh_frame info
31727 from the prologue telling the unwinder to restore
31728 from the stack is perfectly good right to the end of
31729 the function. */
31730 emit_insn (gen_blockage ());
31731 emit_cfa_restores (cfa_restores);
31732 cfa_restores = NULL_RTX;
31733 }
31734
31735 emit_jump_insn (targetm.gen_simple_return ());
31736 }
31737
31738 if (!sibcall && !restoring_FPRs_inline)
31739 {
31740 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
31741 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
31742 int elt = 0;
31743 RTVEC_ELT (p, elt++) = ret_rtx;
31744 if (lr)
31745 RTVEC_ELT (p, elt++)
31746 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
31747
31748 /* We have to restore more than two FP registers, so branch to the
31749 restore function. It will return to our caller. */
31750 int i;
31751 int reg;
31752 rtx sym;
31753
31754 if (flag_shrink_wrap)
31755 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
31756
31757 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
31758 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
31759 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
31760 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
31761
31762 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
31763 {
31764 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
31765
31766 RTVEC_ELT (p, elt++)
31767 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
31768 if (flag_shrink_wrap)
31769 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
31770 }
31771
31772 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
31773 }
31774
31775 if (cfa_restores)
31776 {
31777 if (sibcall)
31778 /* Ensure the cfa_restores are hung off an insn that won't
31779 be reordered above other restores. */
31780 emit_insn (gen_blockage ());
31781
31782 emit_cfa_restores (cfa_restores);
31783 }
31784 }
31785
31786 /* Write function epilogue. */
31787
31788 static void
31789 rs6000_output_function_epilogue (FILE *file)
31790 {
31791 #if TARGET_MACHO
31792 macho_branch_islands ();
31793
31794 {
31795 rtx_insn *insn = get_last_insn ();
31796 rtx_insn *deleted_debug_label = NULL;
31797
31798 /* Mach-O doesn't support labels at the end of objects, so if
31799 it looks like we might want one, take special action.
31800
31801 First, collect any sequence of deleted debug labels. */
31802 while (insn
31803 && NOTE_P (insn)
31804 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
31805 {
31806 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
31807 notes only, instead set their CODE_LABEL_NUMBER to -1,
31808 otherwise there would be code generation differences
31809 in between -g and -g0. */
31810 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31811 deleted_debug_label = insn;
31812 insn = PREV_INSN (insn);
31813 }
31814
31815 /* Second, if we have:
31816 label:
31817 barrier
31818 then this needs to be detected, so skip past the barrier. */
31819
31820 if (insn && BARRIER_P (insn))
31821 insn = PREV_INSN (insn);
31822
31823 /* Up to now we've only seen notes or barriers. */
31824 if (insn)
31825 {
31826 if (LABEL_P (insn)
31827 || (NOTE_P (insn)
31828 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
31829 /* Trailing label: <barrier>. */
31830 fputs ("\tnop\n", file);
31831 else
31832 {
31833 /* Lastly, see if we have a completely empty function body. */
31834 while (insn && ! INSN_P (insn))
31835 insn = PREV_INSN (insn);
31836 /* If we don't find any insns, we've got an empty function body;
31837 I.e. completely empty - without a return or branch. This is
31838 taken as the case where a function body has been removed
31839 because it contains an inline __builtin_unreachable(). GCC
31840 states that reaching __builtin_unreachable() means UB so we're
31841 not obliged to do anything special; however, we want
31842 non-zero-sized function bodies. To meet this, and help the
31843 user out, let's trap the case. */
31844 if (insn == NULL)
31845 fputs ("\ttrap\n", file);
31846 }
31847 }
31848 else if (deleted_debug_label)
31849 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
31850 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
31851 CODE_LABEL_NUMBER (insn) = -1;
31852 }
31853 #endif
31854
31855 /* Output a traceback table here. See /usr/include/sys/debug.h for info
31856 on its format.
31857
31858 We don't output a traceback table if -finhibit-size-directive was
31859 used. The documentation for -finhibit-size-directive reads
31860 ``don't output a @code{.size} assembler directive, or anything
31861 else that would cause trouble if the function is split in the
31862 middle, and the two halves are placed at locations far apart in
31863 memory.'' The traceback table has this property, since it
31864 includes the offset from the start of the function to the
31865 traceback table itself.
31866
31867 System V.4 Powerpc's (and the embedded ABI derived from it) use a
31868 different traceback table. */
31869 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31870 && ! flag_inhibit_size_directive
31871 && rs6000_traceback != traceback_none && !cfun->is_thunk)
31872 {
31873 const char *fname = NULL;
31874 const char *language_string = lang_hooks.name;
31875 int fixed_parms = 0, float_parms = 0, parm_info = 0;
31876 int i;
31877 int optional_tbtab;
31878 rs6000_stack_t *info = rs6000_stack_info ();
31879
31880 if (rs6000_traceback == traceback_full)
31881 optional_tbtab = 1;
31882 else if (rs6000_traceback == traceback_part)
31883 optional_tbtab = 0;
31884 else
31885 optional_tbtab = !optimize_size && !TARGET_ELF;
31886
31887 if (optional_tbtab)
31888 {
31889 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
31890 while (*fname == '.') /* V.4 encodes . in the name */
31891 fname++;
31892
31893 /* Need label immediately before tbtab, so we can compute
31894 its offset from the function start. */
31895 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
31896 ASM_OUTPUT_LABEL (file, fname);
31897 }
31898
31899 /* The .tbtab pseudo-op can only be used for the first eight
31900 expressions, since it can't handle the possibly variable
31901 length fields that follow. However, if you omit the optional
31902 fields, the assembler outputs zeros for all optional fields
31903 anyways, giving each variable length field is minimum length
31904 (as defined in sys/debug.h). Thus we can not use the .tbtab
31905 pseudo-op at all. */
31906
31907 /* An all-zero word flags the start of the tbtab, for debuggers
31908 that have to find it by searching forward from the entry
31909 point or from the current pc. */
31910 fputs ("\t.long 0\n", file);
31911
31912 /* Tbtab format type. Use format type 0. */
31913 fputs ("\t.byte 0,", file);
31914
31915 /* Language type. Unfortunately, there does not seem to be any
31916 official way to discover the language being compiled, so we
31917 use language_string.
31918 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
31919 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
31920 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
31921 either, so for now use 0. */
31922 if (lang_GNU_C ()
31923 || ! strcmp (language_string, "GNU GIMPLE")
31924 || ! strcmp (language_string, "GNU Go")
31925 || ! strcmp (language_string, "libgccjit"))
31926 i = 0;
31927 else if (! strcmp (language_string, "GNU F77")
31928 || lang_GNU_Fortran ())
31929 i = 1;
31930 else if (! strcmp (language_string, "GNU Pascal"))
31931 i = 2;
31932 else if (! strcmp (language_string, "GNU Ada"))
31933 i = 3;
31934 else if (lang_GNU_CXX ()
31935 || ! strcmp (language_string, "GNU Objective-C++"))
31936 i = 9;
31937 else if (! strcmp (language_string, "GNU Java"))
31938 i = 13;
31939 else if (! strcmp (language_string, "GNU Objective-C"))
31940 i = 14;
31941 else
31942 gcc_unreachable ();
31943 fprintf (file, "%d,", i);
31944
31945 /* 8 single bit fields: global linkage (not set for C extern linkage,
31946 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
31947 from start of procedure stored in tbtab, internal function, function
31948 has controlled storage, function has no toc, function uses fp,
31949 function logs/aborts fp operations. */
31950 /* Assume that fp operations are used if any fp reg must be saved. */
31951 fprintf (file, "%d,",
31952 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
31953
31954 /* 6 bitfields: function is interrupt handler, name present in
31955 proc table, function calls alloca, on condition directives
31956 (controls stack walks, 3 bits), saves condition reg, saves
31957 link reg. */
31958 /* The `function calls alloca' bit seems to be set whenever reg 31 is
31959 set up as a frame pointer, even when there is no alloca call. */
31960 fprintf (file, "%d,",
31961 ((optional_tbtab << 6)
31962 | ((optional_tbtab & frame_pointer_needed) << 5)
31963 | (info->cr_save_p << 1)
31964 | (info->lr_save_p)));
31965
31966 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
31967 (6 bits). */
31968 fprintf (file, "%d,",
31969 (info->push_p << 7) | (64 - info->first_fp_reg_save));
31970
31971 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
31972 fprintf (file, "%d,", (32 - first_reg_to_save ()));
31973
31974 if (optional_tbtab)
31975 {
31976 /* Compute the parameter info from the function decl argument
31977 list. */
31978 tree decl;
31979 int next_parm_info_bit = 31;
31980
31981 for (decl = DECL_ARGUMENTS (current_function_decl);
31982 decl; decl = DECL_CHAIN (decl))
31983 {
31984 rtx parameter = DECL_INCOMING_RTL (decl);
31985 machine_mode mode = GET_MODE (parameter);
31986
31987 if (GET_CODE (parameter) == REG)
31988 {
31989 if (SCALAR_FLOAT_MODE_P (mode))
31990 {
31991 int bits;
31992
31993 float_parms++;
31994
31995 switch (mode)
31996 {
31997 case E_SFmode:
31998 case E_SDmode:
31999 bits = 0x2;
32000 break;
32001
32002 case E_DFmode:
32003 case E_DDmode:
32004 case E_TFmode:
32005 case E_TDmode:
32006 case E_IFmode:
32007 case E_KFmode:
32008 bits = 0x3;
32009 break;
32010
32011 default:
32012 gcc_unreachable ();
32013 }
32014
32015 /* If only one bit will fit, don't or in this entry. */
32016 if (next_parm_info_bit > 0)
32017 parm_info |= (bits << (next_parm_info_bit - 1));
32018 next_parm_info_bit -= 2;
32019 }
32020 else
32021 {
32022 fixed_parms += ((GET_MODE_SIZE (mode)
32023 + (UNITS_PER_WORD - 1))
32024 / UNITS_PER_WORD);
32025 next_parm_info_bit -= 1;
32026 }
32027 }
32028 }
32029 }
32030
32031 /* Number of fixed point parameters. */
32032 /* This is actually the number of words of fixed point parameters; thus
32033 an 8 byte struct counts as 2; and thus the maximum value is 8. */
32034 fprintf (file, "%d,", fixed_parms);
32035
32036 /* 2 bitfields: number of floating point parameters (7 bits), parameters
32037 all on stack. */
32038 /* This is actually the number of fp registers that hold parameters;
32039 and thus the maximum value is 13. */
32040 /* Set parameters on stack bit if parameters are not in their original
32041 registers, regardless of whether they are on the stack? Xlc
32042 seems to set the bit when not optimizing. */
32043 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
32044
32045 if (optional_tbtab)
32046 {
32047 /* Optional fields follow. Some are variable length. */
32048
32049 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
32050 float, 11 double float. */
32051 /* There is an entry for each parameter in a register, in the order
32052 that they occur in the parameter list. Any intervening arguments
32053 on the stack are ignored. If the list overflows a long (max
32054 possible length 34 bits) then completely leave off all elements
32055 that don't fit. */
32056 /* Only emit this long if there was at least one parameter. */
32057 if (fixed_parms || float_parms)
32058 fprintf (file, "\t.long %d\n", parm_info);
32059
32060 /* Offset from start of code to tb table. */
32061 fputs ("\t.long ", file);
32062 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
32063 RS6000_OUTPUT_BASENAME (file, fname);
32064 putc ('-', file);
32065 rs6000_output_function_entry (file, fname);
32066 putc ('\n', file);
32067
32068 /* Interrupt handler mask. */
32069 /* Omit this long, since we never set the interrupt handler bit
32070 above. */
32071
32072 /* Number of CTL (controlled storage) anchors. */
32073 /* Omit this long, since the has_ctl bit is never set above. */
32074
32075 /* Displacement into stack of each CTL anchor. */
32076 /* Omit this list of longs, because there are no CTL anchors. */
32077
32078 /* Length of function name. */
32079 if (*fname == '*')
32080 ++fname;
32081 fprintf (file, "\t.short %d\n", (int) strlen (fname));
32082
32083 /* Function name. */
32084 assemble_string (fname, strlen (fname));
32085
32086 /* Register for alloca automatic storage; this is always reg 31.
32087 Only emit this if the alloca bit was set above. */
32088 if (frame_pointer_needed)
32089 fputs ("\t.byte 31\n", file);
32090
32091 fputs ("\t.align 2\n", file);
32092 }
32093 }
32094
32095 /* Arrange to define .LCTOC1 label, if not already done. */
32096 if (need_toc_init)
32097 {
32098 need_toc_init = 0;
32099 if (!toc_initialized)
32100 {
32101 switch_to_section (toc_section);
32102 switch_to_section (current_function_section ());
32103 }
32104 }
32105 }
32106
32107 /* -fsplit-stack support. */
32108
32109 /* A SYMBOL_REF for __morestack. */
32110 static GTY(()) rtx morestack_ref;
32111
32112 static rtx
32113 gen_add3_const (rtx rt, rtx ra, long c)
32114 {
32115 if (TARGET_64BIT)
32116 return gen_adddi3 (rt, ra, GEN_INT (c));
32117 else
32118 return gen_addsi3 (rt, ra, GEN_INT (c));
32119 }
32120
32121 /* Emit -fsplit-stack prologue, which goes before the regular function
32122 prologue (at local entry point in the case of ELFv2). */
32123
32124 void
32125 rs6000_expand_split_stack_prologue (void)
32126 {
32127 rs6000_stack_t *info = rs6000_stack_info ();
32128 unsigned HOST_WIDE_INT allocate;
32129 long alloc_hi, alloc_lo;
32130 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
32131 rtx_insn *insn;
32132
32133 gcc_assert (flag_split_stack && reload_completed);
32134
32135 if (!info->push_p)
32136 return;
32137
32138 if (global_regs[29])
32139 {
32140 error ("-fsplit-stack uses register r29");
32141 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
32142 "conflicts with %qD", global_regs_decl[29]);
32143 }
32144
32145 allocate = info->total_size;
32146 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
32147 {
32148 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
32149 return;
32150 }
32151 if (morestack_ref == NULL_RTX)
32152 {
32153 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
32154 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
32155 | SYMBOL_FLAG_FUNCTION);
32156 }
32157
32158 r0 = gen_rtx_REG (Pmode, 0);
32159 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32160 r12 = gen_rtx_REG (Pmode, 12);
32161 emit_insn (gen_load_split_stack_limit (r0));
32162 /* Always emit two insns here to calculate the requested stack,
32163 so that the linker can edit them when adjusting size for calling
32164 non-split-stack code. */
32165 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
32166 alloc_lo = -allocate - alloc_hi;
32167 if (alloc_hi != 0)
32168 {
32169 emit_insn (gen_add3_const (r12, r1, alloc_hi));
32170 if (alloc_lo != 0)
32171 emit_insn (gen_add3_const (r12, r12, alloc_lo));
32172 else
32173 emit_insn (gen_nop ());
32174 }
32175 else
32176 {
32177 emit_insn (gen_add3_const (r12, r1, alloc_lo));
32178 emit_insn (gen_nop ());
32179 }
32180
32181 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
32182 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
32183 ok_label = gen_label_rtx ();
32184 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32185 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
32186 gen_rtx_LABEL_REF (VOIDmode, ok_label),
32187 pc_rtx);
32188 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32189 JUMP_LABEL (insn) = ok_label;
32190 /* Mark the jump as very likely to be taken. */
32191 add_reg_br_prob_note (insn, profile_probability::very_likely ());
32192
32193 lr = gen_rtx_REG (Pmode, LR_REGNO);
32194 insn = emit_move_insn (r0, lr);
32195 RTX_FRAME_RELATED_P (insn) = 1;
32196 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
32197 RTX_FRAME_RELATED_P (insn) = 1;
32198
32199 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
32200 const0_rtx, const0_rtx));
32201 call_fusage = NULL_RTX;
32202 use_reg (&call_fusage, r12);
32203 /* Say the call uses r0, even though it doesn't, to stop regrename
32204 from twiddling with the insns saving lr, trashing args for cfun.
32205 The insns restoring lr are similarly protected by making
32206 split_stack_return use r0. */
32207 use_reg (&call_fusage, r0);
32208 add_function_usage_to (insn, call_fusage);
32209 /* Indicate that this function can't jump to non-local gotos. */
32210 make_reg_eh_region_note_nothrow_nononlocal (insn);
32211 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
32212 insn = emit_move_insn (lr, r0);
32213 add_reg_note (insn, REG_CFA_RESTORE, lr);
32214 RTX_FRAME_RELATED_P (insn) = 1;
32215 emit_insn (gen_split_stack_return ());
32216
32217 emit_label (ok_label);
32218 LABEL_NUSES (ok_label) = 1;
32219 }
32220
32221 /* Return the internal arg pointer used for function incoming
32222 arguments. When -fsplit-stack, the arg pointer is r12 so we need
32223 to copy it to a pseudo in order for it to be preserved over calls
32224 and suchlike. We'd really like to use a pseudo here for the
32225 internal arg pointer but data-flow analysis is not prepared to
32226 accept pseudos as live at the beginning of a function. */
32227
32228 static rtx
32229 rs6000_internal_arg_pointer (void)
32230 {
32231 if (flag_split_stack
32232 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
32233 == NULL))
32234
32235 {
32236 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
32237 {
32238 rtx pat;
32239
32240 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
32241 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
32242
32243 /* Put the pseudo initialization right after the note at the
32244 beginning of the function. */
32245 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
32246 gen_rtx_REG (Pmode, 12));
32247 push_topmost_sequence ();
32248 emit_insn_after (pat, get_insns ());
32249 pop_topmost_sequence ();
32250 }
32251 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
32252 FIRST_PARM_OFFSET (current_function_decl));
32253 }
32254 return virtual_incoming_args_rtx;
32255 }
32256
32257 /* We may have to tell the dataflow pass that the split stack prologue
32258 is initializing a register. */
32259
32260 static void
32261 rs6000_live_on_entry (bitmap regs)
32262 {
32263 if (flag_split_stack)
32264 bitmap_set_bit (regs, 12);
32265 }
32266
32267 /* Emit -fsplit-stack dynamic stack allocation space check. */
32268
32269 void
32270 rs6000_split_stack_space_check (rtx size, rtx label)
32271 {
32272 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32273 rtx limit = gen_reg_rtx (Pmode);
32274 rtx requested = gen_reg_rtx (Pmode);
32275 rtx cmp = gen_reg_rtx (CCUNSmode);
32276 rtx jump;
32277
32278 emit_insn (gen_load_split_stack_limit (limit));
32279 if (CONST_INT_P (size))
32280 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
32281 else
32282 {
32283 size = force_reg (Pmode, size);
32284 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
32285 }
32286 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
32287 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
32288 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
32289 gen_rtx_LABEL_REF (VOIDmode, label),
32290 pc_rtx);
32291 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
32292 JUMP_LABEL (jump) = label;
32293 }
32294 \f
32295 /* A C compound statement that outputs the assembler code for a thunk
32296 function, used to implement C++ virtual function calls with
32297 multiple inheritance. The thunk acts as a wrapper around a virtual
32298 function, adjusting the implicit object parameter before handing
32299 control off to the real function.
32300
32301 First, emit code to add the integer DELTA to the location that
32302 contains the incoming first argument. Assume that this argument
32303 contains a pointer, and is the one used to pass the `this' pointer
32304 in C++. This is the incoming argument *before* the function
32305 prologue, e.g. `%o0' on a sparc. The addition must preserve the
32306 values of all other incoming arguments.
32307
32308 After the addition, emit code to jump to FUNCTION, which is a
32309 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
32310 not touch the return address. Hence returning from FUNCTION will
32311 return to whoever called the current `thunk'.
32312
32313 The effect must be as if FUNCTION had been called directly with the
32314 adjusted first argument. This macro is responsible for emitting
32315 all of the code for a thunk function; output_function_prologue()
32316 and output_function_epilogue() are not invoked.
32317
32318 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
32319 been extracted from it.) It might possibly be useful on some
32320 targets, but probably not.
32321
32322 If you do not define this macro, the target-independent code in the
32323 C++ frontend will generate a less efficient heavyweight thunk that
32324 calls FUNCTION instead of jumping to it. The generic approach does
32325 not support varargs. */
32326
32327 static void
32328 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
32329 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
32330 tree function)
32331 {
32332 rtx this_rtx, funexp;
32333 rtx_insn *insn;
32334
32335 reload_completed = 1;
32336 epilogue_completed = 1;
32337
32338 /* Mark the end of the (empty) prologue. */
32339 emit_note (NOTE_INSN_PROLOGUE_END);
32340
32341 /* Find the "this" pointer. If the function returns a structure,
32342 the structure return pointer is in r3. */
32343 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
32344 this_rtx = gen_rtx_REG (Pmode, 4);
32345 else
32346 this_rtx = gen_rtx_REG (Pmode, 3);
32347
32348 /* Apply the constant offset, if required. */
32349 if (delta)
32350 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
32351
32352 /* Apply the offset from the vtable, if required. */
32353 if (vcall_offset)
32354 {
32355 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
32356 rtx tmp = gen_rtx_REG (Pmode, 12);
32357
32358 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
32359 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
32360 {
32361 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
32362 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
32363 }
32364 else
32365 {
32366 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
32367
32368 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
32369 }
32370 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
32371 }
32372
32373 /* Generate a tail call to the target function. */
32374 if (!TREE_USED (function))
32375 {
32376 assemble_external (function);
32377 TREE_USED (function) = 1;
32378 }
32379 funexp = XEXP (DECL_RTL (function), 0);
32380 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
32381
32382 #if TARGET_MACHO
32383 if (MACHOPIC_INDIRECT)
32384 funexp = machopic_indirect_call_target (funexp);
32385 #endif
32386
32387 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
32388 generate sibcall RTL explicitly. */
32389 insn = emit_call_insn (
32390 gen_rtx_PARALLEL (VOIDmode,
32391 gen_rtvec (3,
32392 gen_rtx_CALL (VOIDmode,
32393 funexp, const0_rtx),
32394 gen_rtx_USE (VOIDmode, const0_rtx),
32395 simple_return_rtx)));
32396 SIBLING_CALL_P (insn) = 1;
32397 emit_barrier ();
32398
32399 /* Run just enough of rest_of_compilation to get the insns emitted.
32400 There's not really enough bulk here to make other passes such as
32401 instruction scheduling worth while. Note that use_thunk calls
32402 assemble_start_function and assemble_end_function. */
32403 insn = get_insns ();
32404 shorten_branches (insn);
32405 final_start_function (insn, file, 1);
32406 final (insn, file, 1);
32407 final_end_function ();
32408
32409 reload_completed = 0;
32410 epilogue_completed = 0;
32411 }
32412 \f
32413 /* A quick summary of the various types of 'constant-pool tables'
32414 under PowerPC:
32415
32416 Target Flags Name One table per
32417 AIX (none) AIX TOC object file
32418 AIX -mfull-toc AIX TOC object file
32419 AIX -mminimal-toc AIX minimal TOC translation unit
32420 SVR4/EABI (none) SVR4 SDATA object file
32421 SVR4/EABI -fpic SVR4 pic object file
32422 SVR4/EABI -fPIC SVR4 PIC translation unit
32423 SVR4/EABI -mrelocatable EABI TOC function
32424 SVR4/EABI -maix AIX TOC object file
32425 SVR4/EABI -maix -mminimal-toc
32426 AIX minimal TOC translation unit
32427
32428 Name Reg. Set by entries contains:
32429 made by addrs? fp? sum?
32430
32431 AIX TOC 2 crt0 as Y option option
32432 AIX minimal TOC 30 prolog gcc Y Y option
32433 SVR4 SDATA 13 crt0 gcc N Y N
32434 SVR4 pic 30 prolog ld Y not yet N
32435 SVR4 PIC 30 prolog gcc Y option option
32436 EABI TOC 30 prolog gcc Y option option
32437
32438 */
32439
32440 /* Hash functions for the hash table. */
32441
32442 static unsigned
32443 rs6000_hash_constant (rtx k)
32444 {
32445 enum rtx_code code = GET_CODE (k);
32446 machine_mode mode = GET_MODE (k);
32447 unsigned result = (code << 3) ^ mode;
32448 const char *format;
32449 int flen, fidx;
32450
32451 format = GET_RTX_FORMAT (code);
32452 flen = strlen (format);
32453 fidx = 0;
32454
32455 switch (code)
32456 {
32457 case LABEL_REF:
32458 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
32459
32460 case CONST_WIDE_INT:
32461 {
32462 int i;
32463 flen = CONST_WIDE_INT_NUNITS (k);
32464 for (i = 0; i < flen; i++)
32465 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
32466 return result;
32467 }
32468
32469 case CONST_DOUBLE:
32470 if (mode != VOIDmode)
32471 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
32472 flen = 2;
32473 break;
32474
32475 case CODE_LABEL:
32476 fidx = 3;
32477 break;
32478
32479 default:
32480 break;
32481 }
32482
32483 for (; fidx < flen; fidx++)
32484 switch (format[fidx])
32485 {
32486 case 's':
32487 {
32488 unsigned i, len;
32489 const char *str = XSTR (k, fidx);
32490 len = strlen (str);
32491 result = result * 613 + len;
32492 for (i = 0; i < len; i++)
32493 result = result * 613 + (unsigned) str[i];
32494 break;
32495 }
32496 case 'u':
32497 case 'e':
32498 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
32499 break;
32500 case 'i':
32501 case 'n':
32502 result = result * 613 + (unsigned) XINT (k, fidx);
32503 break;
32504 case 'w':
32505 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
32506 result = result * 613 + (unsigned) XWINT (k, fidx);
32507 else
32508 {
32509 size_t i;
32510 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
32511 result = result * 613 + (unsigned) (XWINT (k, fidx)
32512 >> CHAR_BIT * i);
32513 }
32514 break;
32515 case '0':
32516 break;
32517 default:
32518 gcc_unreachable ();
32519 }
32520
32521 return result;
32522 }
32523
32524 hashval_t
32525 toc_hasher::hash (toc_hash_struct *thc)
32526 {
32527 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
32528 }
32529
32530 /* Compare H1 and H2 for equivalence. */
32531
32532 bool
32533 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
32534 {
32535 rtx r1 = h1->key;
32536 rtx r2 = h2->key;
32537
32538 if (h1->key_mode != h2->key_mode)
32539 return 0;
32540
32541 return rtx_equal_p (r1, r2);
32542 }
32543
32544 /* These are the names given by the C++ front-end to vtables, and
32545 vtable-like objects. Ideally, this logic should not be here;
32546 instead, there should be some programmatic way of inquiring as
32547 to whether or not an object is a vtable. */
32548
32549 #define VTABLE_NAME_P(NAME) \
32550 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
32551 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
32552 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
32553 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
32554 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
32555
32556 #ifdef NO_DOLLAR_IN_LABEL
32557 /* Return a GGC-allocated character string translating dollar signs in
32558 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
32559
32560 const char *
32561 rs6000_xcoff_strip_dollar (const char *name)
32562 {
32563 char *strip, *p;
32564 const char *q;
32565 size_t len;
32566
32567 q = (const char *) strchr (name, '$');
32568
32569 if (q == 0 || q == name)
32570 return name;
32571
32572 len = strlen (name);
32573 strip = XALLOCAVEC (char, len + 1);
32574 strcpy (strip, name);
32575 p = strip + (q - name);
32576 while (p)
32577 {
32578 *p = '_';
32579 p = strchr (p + 1, '$');
32580 }
32581
32582 return ggc_alloc_string (strip, len);
32583 }
32584 #endif
32585
32586 void
32587 rs6000_output_symbol_ref (FILE *file, rtx x)
32588 {
32589 const char *name = XSTR (x, 0);
32590
32591 /* Currently C++ toc references to vtables can be emitted before it
32592 is decided whether the vtable is public or private. If this is
32593 the case, then the linker will eventually complain that there is
32594 a reference to an unknown section. Thus, for vtables only,
32595 we emit the TOC reference to reference the identifier and not the
32596 symbol. */
32597 if (VTABLE_NAME_P (name))
32598 {
32599 RS6000_OUTPUT_BASENAME (file, name);
32600 }
32601 else
32602 assemble_name (file, name);
32603 }
32604
32605 /* Output a TOC entry. We derive the entry name from what is being
32606 written. */
32607
32608 void
32609 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
32610 {
32611 char buf[256];
32612 const char *name = buf;
32613 rtx base = x;
32614 HOST_WIDE_INT offset = 0;
32615
32616 gcc_assert (!TARGET_NO_TOC);
32617
32618 /* When the linker won't eliminate them, don't output duplicate
32619 TOC entries (this happens on AIX if there is any kind of TOC,
32620 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
32621 CODE_LABELs. */
32622 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
32623 {
32624 struct toc_hash_struct *h;
32625
32626 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
32627 time because GGC is not initialized at that point. */
32628 if (toc_hash_table == NULL)
32629 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
32630
32631 h = ggc_alloc<toc_hash_struct> ();
32632 h->key = x;
32633 h->key_mode = mode;
32634 h->labelno = labelno;
32635
32636 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
32637 if (*found == NULL)
32638 *found = h;
32639 else /* This is indeed a duplicate.
32640 Set this label equal to that label. */
32641 {
32642 fputs ("\t.set ", file);
32643 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32644 fprintf (file, "%d,", labelno);
32645 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
32646 fprintf (file, "%d\n", ((*found)->labelno));
32647
32648 #ifdef HAVE_AS_TLS
32649 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
32650 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
32651 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
32652 {
32653 fputs ("\t.set ", file);
32654 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32655 fprintf (file, "%d,", labelno);
32656 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
32657 fprintf (file, "%d\n", ((*found)->labelno));
32658 }
32659 #endif
32660 return;
32661 }
32662 }
32663
32664 /* If we're going to put a double constant in the TOC, make sure it's
32665 aligned properly when strict alignment is on. */
32666 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
32667 && STRICT_ALIGNMENT
32668 && GET_MODE_BITSIZE (mode) >= 64
32669 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
32670 ASM_OUTPUT_ALIGN (file, 3);
32671 }
32672
32673 (*targetm.asm_out.internal_label) (file, "LC", labelno);
32674
32675 /* Handle FP constants specially. Note that if we have a minimal
32676 TOC, things we put here aren't actually in the TOC, so we can allow
32677 FP constants. */
32678 if (GET_CODE (x) == CONST_DOUBLE &&
32679 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
32680 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
32681 {
32682 long k[4];
32683
32684 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32685 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
32686 else
32687 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32688
32689 if (TARGET_64BIT)
32690 {
32691 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32692 fputs (DOUBLE_INT_ASM_OP, file);
32693 else
32694 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32695 k[0] & 0xffffffff, k[1] & 0xffffffff,
32696 k[2] & 0xffffffff, k[3] & 0xffffffff);
32697 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
32698 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32699 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
32700 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
32701 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
32702 return;
32703 }
32704 else
32705 {
32706 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32707 fputs ("\t.long ", file);
32708 else
32709 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
32710 k[0] & 0xffffffff, k[1] & 0xffffffff,
32711 k[2] & 0xffffffff, k[3] & 0xffffffff);
32712 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
32713 k[0] & 0xffffffff, k[1] & 0xffffffff,
32714 k[2] & 0xffffffff, k[3] & 0xffffffff);
32715 return;
32716 }
32717 }
32718 else if (GET_CODE (x) == CONST_DOUBLE &&
32719 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
32720 {
32721 long k[2];
32722
32723 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32724 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
32725 else
32726 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
32727
32728 if (TARGET_64BIT)
32729 {
32730 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32731 fputs (DOUBLE_INT_ASM_OP, file);
32732 else
32733 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32734 k[0] & 0xffffffff, k[1] & 0xffffffff);
32735 fprintf (file, "0x%lx%08lx\n",
32736 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
32737 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
32738 return;
32739 }
32740 else
32741 {
32742 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32743 fputs ("\t.long ", file);
32744 else
32745 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
32746 k[0] & 0xffffffff, k[1] & 0xffffffff);
32747 fprintf (file, "0x%lx,0x%lx\n",
32748 k[0] & 0xffffffff, k[1] & 0xffffffff);
32749 return;
32750 }
32751 }
32752 else if (GET_CODE (x) == CONST_DOUBLE &&
32753 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
32754 {
32755 long l;
32756
32757 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
32758 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
32759 else
32760 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
32761
32762 if (TARGET_64BIT)
32763 {
32764 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32765 fputs (DOUBLE_INT_ASM_OP, file);
32766 else
32767 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32768 if (WORDS_BIG_ENDIAN)
32769 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
32770 else
32771 fprintf (file, "0x%lx\n", l & 0xffffffff);
32772 return;
32773 }
32774 else
32775 {
32776 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32777 fputs ("\t.long ", file);
32778 else
32779 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
32780 fprintf (file, "0x%lx\n", l & 0xffffffff);
32781 return;
32782 }
32783 }
32784 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
32785 {
32786 unsigned HOST_WIDE_INT low;
32787 HOST_WIDE_INT high;
32788
32789 low = INTVAL (x) & 0xffffffff;
32790 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
32791
32792 /* TOC entries are always Pmode-sized, so when big-endian
32793 smaller integer constants in the TOC need to be padded.
32794 (This is still a win over putting the constants in
32795 a separate constant pool, because then we'd have
32796 to have both a TOC entry _and_ the actual constant.)
32797
32798 For a 32-bit target, CONST_INT values are loaded and shifted
32799 entirely within `low' and can be stored in one TOC entry. */
32800
32801 /* It would be easy to make this work, but it doesn't now. */
32802 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
32803
32804 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
32805 {
32806 low |= high << 32;
32807 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
32808 high = (HOST_WIDE_INT) low >> 32;
32809 low &= 0xffffffff;
32810 }
32811
32812 if (TARGET_64BIT)
32813 {
32814 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32815 fputs (DOUBLE_INT_ASM_OP, file);
32816 else
32817 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32818 (long) high & 0xffffffff, (long) low & 0xffffffff);
32819 fprintf (file, "0x%lx%08lx\n",
32820 (long) high & 0xffffffff, (long) low & 0xffffffff);
32821 return;
32822 }
32823 else
32824 {
32825 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
32826 {
32827 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32828 fputs ("\t.long ", file);
32829 else
32830 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
32831 (long) high & 0xffffffff, (long) low & 0xffffffff);
32832 fprintf (file, "0x%lx,0x%lx\n",
32833 (long) high & 0xffffffff, (long) low & 0xffffffff);
32834 }
32835 else
32836 {
32837 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32838 fputs ("\t.long ", file);
32839 else
32840 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
32841 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
32842 }
32843 return;
32844 }
32845 }
32846
32847 if (GET_CODE (x) == CONST)
32848 {
32849 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
32850 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
32851
32852 base = XEXP (XEXP (x, 0), 0);
32853 offset = INTVAL (XEXP (XEXP (x, 0), 1));
32854 }
32855
32856 switch (GET_CODE (base))
32857 {
32858 case SYMBOL_REF:
32859 name = XSTR (base, 0);
32860 break;
32861
32862 case LABEL_REF:
32863 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
32864 CODE_LABEL_NUMBER (XEXP (base, 0)));
32865 break;
32866
32867 case CODE_LABEL:
32868 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
32869 break;
32870
32871 default:
32872 gcc_unreachable ();
32873 }
32874
32875 if (TARGET_ELF || TARGET_MINIMAL_TOC)
32876 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
32877 else
32878 {
32879 fputs ("\t.tc ", file);
32880 RS6000_OUTPUT_BASENAME (file, name);
32881
32882 if (offset < 0)
32883 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
32884 else if (offset)
32885 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
32886
32887 /* Mark large TOC symbols on AIX with [TE] so they are mapped
32888 after other TOC symbols, reducing overflow of small TOC access
32889 to [TC] symbols. */
32890 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
32891 ? "[TE]," : "[TC],", file);
32892 }
32893
32894 /* Currently C++ toc references to vtables can be emitted before it
32895 is decided whether the vtable is public or private. If this is
32896 the case, then the linker will eventually complain that there is
32897 a TOC reference to an unknown section. Thus, for vtables only,
32898 we emit the TOC reference to reference the symbol and not the
32899 section. */
32900 if (VTABLE_NAME_P (name))
32901 {
32902 RS6000_OUTPUT_BASENAME (file, name);
32903 if (offset < 0)
32904 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
32905 else if (offset > 0)
32906 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
32907 }
32908 else
32909 output_addr_const (file, x);
32910
32911 #if HAVE_AS_TLS
32912 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
32913 {
32914 switch (SYMBOL_REF_TLS_MODEL (base))
32915 {
32916 case 0:
32917 break;
32918 case TLS_MODEL_LOCAL_EXEC:
32919 fputs ("@le", file);
32920 break;
32921 case TLS_MODEL_INITIAL_EXEC:
32922 fputs ("@ie", file);
32923 break;
32924 /* Use global-dynamic for local-dynamic. */
32925 case TLS_MODEL_GLOBAL_DYNAMIC:
32926 case TLS_MODEL_LOCAL_DYNAMIC:
32927 putc ('\n', file);
32928 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
32929 fputs ("\t.tc .", file);
32930 RS6000_OUTPUT_BASENAME (file, name);
32931 fputs ("[TC],", file);
32932 output_addr_const (file, x);
32933 fputs ("@m", file);
32934 break;
32935 default:
32936 gcc_unreachable ();
32937 }
32938 }
32939 #endif
32940
32941 putc ('\n', file);
32942 }
32943 \f
32944 /* Output an assembler pseudo-op to write an ASCII string of N characters
32945 starting at P to FILE.
32946
32947 On the RS/6000, we have to do this using the .byte operation and
32948 write out special characters outside the quoted string.
32949 Also, the assembler is broken; very long strings are truncated,
32950 so we must artificially break them up early. */
32951
32952 void
32953 output_ascii (FILE *file, const char *p, int n)
32954 {
32955 char c;
32956 int i, count_string;
32957 const char *for_string = "\t.byte \"";
32958 const char *for_decimal = "\t.byte ";
32959 const char *to_close = NULL;
32960
32961 count_string = 0;
32962 for (i = 0; i < n; i++)
32963 {
32964 c = *p++;
32965 if (c >= ' ' && c < 0177)
32966 {
32967 if (for_string)
32968 fputs (for_string, file);
32969 putc (c, file);
32970
32971 /* Write two quotes to get one. */
32972 if (c == '"')
32973 {
32974 putc (c, file);
32975 ++count_string;
32976 }
32977
32978 for_string = NULL;
32979 for_decimal = "\"\n\t.byte ";
32980 to_close = "\"\n";
32981 ++count_string;
32982
32983 if (count_string >= 512)
32984 {
32985 fputs (to_close, file);
32986
32987 for_string = "\t.byte \"";
32988 for_decimal = "\t.byte ";
32989 to_close = NULL;
32990 count_string = 0;
32991 }
32992 }
32993 else
32994 {
32995 if (for_decimal)
32996 fputs (for_decimal, file);
32997 fprintf (file, "%d", c);
32998
32999 for_string = "\n\t.byte \"";
33000 for_decimal = ", ";
33001 to_close = "\n";
33002 count_string = 0;
33003 }
33004 }
33005
33006 /* Now close the string if we have written one. Then end the line. */
33007 if (to_close)
33008 fputs (to_close, file);
33009 }
33010 \f
33011 /* Generate a unique section name for FILENAME for a section type
33012 represented by SECTION_DESC. Output goes into BUF.
33013
33014 SECTION_DESC can be any string, as long as it is different for each
33015 possible section type.
33016
33017 We name the section in the same manner as xlc. The name begins with an
33018 underscore followed by the filename (after stripping any leading directory
33019 names) with the last period replaced by the string SECTION_DESC. If
33020 FILENAME does not contain a period, SECTION_DESC is appended to the end of
33021 the name. */
33022
33023 void
33024 rs6000_gen_section_name (char **buf, const char *filename,
33025 const char *section_desc)
33026 {
33027 const char *q, *after_last_slash, *last_period = 0;
33028 char *p;
33029 int len;
33030
33031 after_last_slash = filename;
33032 for (q = filename; *q; q++)
33033 {
33034 if (*q == '/')
33035 after_last_slash = q + 1;
33036 else if (*q == '.')
33037 last_period = q;
33038 }
33039
33040 len = strlen (after_last_slash) + strlen (section_desc) + 2;
33041 *buf = (char *) xmalloc (len);
33042
33043 p = *buf;
33044 *p++ = '_';
33045
33046 for (q = after_last_slash; *q; q++)
33047 {
33048 if (q == last_period)
33049 {
33050 strcpy (p, section_desc);
33051 p += strlen (section_desc);
33052 break;
33053 }
33054
33055 else if (ISALNUM (*q))
33056 *p++ = *q;
33057 }
33058
33059 if (last_period == 0)
33060 strcpy (p, section_desc);
33061 else
33062 *p = '\0';
33063 }
33064 \f
33065 /* Emit profile function. */
33066
33067 void
33068 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
33069 {
33070 /* Non-standard profiling for kernels, which just saves LR then calls
33071 _mcount without worrying about arg saves. The idea is to change
33072 the function prologue as little as possible as it isn't easy to
33073 account for arg save/restore code added just for _mcount. */
33074 if (TARGET_PROFILE_KERNEL)
33075 return;
33076
33077 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33078 {
33079 #ifndef NO_PROFILE_COUNTERS
33080 # define NO_PROFILE_COUNTERS 0
33081 #endif
33082 if (NO_PROFILE_COUNTERS)
33083 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33084 LCT_NORMAL, VOIDmode);
33085 else
33086 {
33087 char buf[30];
33088 const char *label_name;
33089 rtx fun;
33090
33091 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33092 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
33093 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
33094
33095 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
33096 LCT_NORMAL, VOIDmode, fun, Pmode);
33097 }
33098 }
33099 else if (DEFAULT_ABI == ABI_DARWIN)
33100 {
33101 const char *mcount_name = RS6000_MCOUNT;
33102 int caller_addr_regno = LR_REGNO;
33103
33104 /* Be conservative and always set this, at least for now. */
33105 crtl->uses_pic_offset_table = 1;
33106
33107 #if TARGET_MACHO
33108 /* For PIC code, set up a stub and collect the caller's address
33109 from r0, which is where the prologue puts it. */
33110 if (MACHOPIC_INDIRECT
33111 && crtl->uses_pic_offset_table)
33112 caller_addr_regno = 0;
33113 #endif
33114 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
33115 LCT_NORMAL, VOIDmode,
33116 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
33117 }
33118 }
33119
33120 /* Write function profiler code. */
33121
33122 void
33123 output_function_profiler (FILE *file, int labelno)
33124 {
33125 char buf[100];
33126
33127 switch (DEFAULT_ABI)
33128 {
33129 default:
33130 gcc_unreachable ();
33131
33132 case ABI_V4:
33133 if (!TARGET_32BIT)
33134 {
33135 warning (0, "no profiling of 64-bit code for this ABI");
33136 return;
33137 }
33138 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
33139 fprintf (file, "\tmflr %s\n", reg_names[0]);
33140 if (NO_PROFILE_COUNTERS)
33141 {
33142 asm_fprintf (file, "\tstw %s,4(%s)\n",
33143 reg_names[0], reg_names[1]);
33144 }
33145 else if (TARGET_SECURE_PLT && flag_pic)
33146 {
33147 if (TARGET_LINK_STACK)
33148 {
33149 char name[32];
33150 get_ppc476_thunk_name (name);
33151 asm_fprintf (file, "\tbl %s\n", name);
33152 }
33153 else
33154 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
33155 asm_fprintf (file, "\tstw %s,4(%s)\n",
33156 reg_names[0], reg_names[1]);
33157 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33158 asm_fprintf (file, "\taddis %s,%s,",
33159 reg_names[12], reg_names[12]);
33160 assemble_name (file, buf);
33161 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
33162 assemble_name (file, buf);
33163 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
33164 }
33165 else if (flag_pic == 1)
33166 {
33167 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
33168 asm_fprintf (file, "\tstw %s,4(%s)\n",
33169 reg_names[0], reg_names[1]);
33170 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
33171 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
33172 assemble_name (file, buf);
33173 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
33174 }
33175 else if (flag_pic > 1)
33176 {
33177 asm_fprintf (file, "\tstw %s,4(%s)\n",
33178 reg_names[0], reg_names[1]);
33179 /* Now, we need to get the address of the label. */
33180 if (TARGET_LINK_STACK)
33181 {
33182 char name[32];
33183 get_ppc476_thunk_name (name);
33184 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
33185 assemble_name (file, buf);
33186 fputs ("-.\n1:", file);
33187 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33188 asm_fprintf (file, "\taddi %s,%s,4\n",
33189 reg_names[11], reg_names[11]);
33190 }
33191 else
33192 {
33193 fputs ("\tbcl 20,31,1f\n\t.long ", file);
33194 assemble_name (file, buf);
33195 fputs ("-.\n1:", file);
33196 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
33197 }
33198 asm_fprintf (file, "\tlwz %s,0(%s)\n",
33199 reg_names[0], reg_names[11]);
33200 asm_fprintf (file, "\tadd %s,%s,%s\n",
33201 reg_names[0], reg_names[0], reg_names[11]);
33202 }
33203 else
33204 {
33205 asm_fprintf (file, "\tlis %s,", reg_names[12]);
33206 assemble_name (file, buf);
33207 fputs ("@ha\n", file);
33208 asm_fprintf (file, "\tstw %s,4(%s)\n",
33209 reg_names[0], reg_names[1]);
33210 asm_fprintf (file, "\tla %s,", reg_names[0]);
33211 assemble_name (file, buf);
33212 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
33213 }
33214
33215 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
33216 fprintf (file, "\tbl %s%s\n",
33217 RS6000_MCOUNT, flag_pic ? "@plt" : "");
33218 break;
33219
33220 case ABI_AIX:
33221 case ABI_ELFv2:
33222 case ABI_DARWIN:
33223 /* Don't do anything, done in output_profile_hook (). */
33224 break;
33225 }
33226 }
33227
33228 \f
33229
33230 /* The following variable value is the last issued insn. */
33231
33232 static rtx_insn *last_scheduled_insn;
33233
33234 /* The following variable helps to balance issuing of load and
33235 store instructions */
33236
33237 static int load_store_pendulum;
33238
33239 /* The following variable helps pair divide insns during scheduling. */
33240 static int divide_cnt;
33241 /* The following variable helps pair and alternate vector and vector load
33242 insns during scheduling. */
33243 static int vec_pairing;
33244
33245
33246 /* Power4 load update and store update instructions are cracked into a
33247 load or store and an integer insn which are executed in the same cycle.
33248 Branches have their own dispatch slot which does not count against the
33249 GCC issue rate, but it changes the program flow so there are no other
33250 instructions to issue in this cycle. */
33251
33252 static int
33253 rs6000_variable_issue_1 (rtx_insn *insn, int more)
33254 {
33255 last_scheduled_insn = insn;
33256 if (GET_CODE (PATTERN (insn)) == USE
33257 || GET_CODE (PATTERN (insn)) == CLOBBER)
33258 {
33259 cached_can_issue_more = more;
33260 return cached_can_issue_more;
33261 }
33262
33263 if (insn_terminates_group_p (insn, current_group))
33264 {
33265 cached_can_issue_more = 0;
33266 return cached_can_issue_more;
33267 }
33268
33269 /* If no reservation, but reach here */
33270 if (recog_memoized (insn) < 0)
33271 return more;
33272
33273 if (rs6000_sched_groups)
33274 {
33275 if (is_microcoded_insn (insn))
33276 cached_can_issue_more = 0;
33277 else if (is_cracked_insn (insn))
33278 cached_can_issue_more = more > 2 ? more - 2 : 0;
33279 else
33280 cached_can_issue_more = more - 1;
33281
33282 return cached_can_issue_more;
33283 }
33284
33285 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
33286 return 0;
33287
33288 cached_can_issue_more = more - 1;
33289 return cached_can_issue_more;
33290 }
33291
33292 static int
33293 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
33294 {
33295 int r = rs6000_variable_issue_1 (insn, more);
33296 if (verbose)
33297 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
33298 return r;
33299 }
33300
33301 /* Adjust the cost of a scheduling dependency. Return the new cost of
33302 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
33303
33304 static int
33305 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
33306 unsigned int)
33307 {
33308 enum attr_type attr_type;
33309
33310 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
33311 return cost;
33312
33313 switch (dep_type)
33314 {
33315 case REG_DEP_TRUE:
33316 {
33317 /* Data dependency; DEP_INSN writes a register that INSN reads
33318 some cycles later. */
33319
33320 /* Separate a load from a narrower, dependent store. */
33321 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
33322 && GET_CODE (PATTERN (insn)) == SET
33323 && GET_CODE (PATTERN (dep_insn)) == SET
33324 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
33325 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
33326 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
33327 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
33328 return cost + 14;
33329
33330 attr_type = get_attr_type (insn);
33331
33332 switch (attr_type)
33333 {
33334 case TYPE_JMPREG:
33335 /* Tell the first scheduling pass about the latency between
33336 a mtctr and bctr (and mtlr and br/blr). The first
33337 scheduling pass will not know about this latency since
33338 the mtctr instruction, which has the latency associated
33339 to it, will be generated by reload. */
33340 return 4;
33341 case TYPE_BRANCH:
33342 /* Leave some extra cycles between a compare and its
33343 dependent branch, to inhibit expensive mispredicts. */
33344 if ((rs6000_cpu_attr == CPU_PPC603
33345 || rs6000_cpu_attr == CPU_PPC604
33346 || rs6000_cpu_attr == CPU_PPC604E
33347 || rs6000_cpu_attr == CPU_PPC620
33348 || rs6000_cpu_attr == CPU_PPC630
33349 || rs6000_cpu_attr == CPU_PPC750
33350 || rs6000_cpu_attr == CPU_PPC7400
33351 || rs6000_cpu_attr == CPU_PPC7450
33352 || rs6000_cpu_attr == CPU_PPCE5500
33353 || rs6000_cpu_attr == CPU_PPCE6500
33354 || rs6000_cpu_attr == CPU_POWER4
33355 || rs6000_cpu_attr == CPU_POWER5
33356 || rs6000_cpu_attr == CPU_POWER7
33357 || rs6000_cpu_attr == CPU_POWER8
33358 || rs6000_cpu_attr == CPU_POWER9
33359 || rs6000_cpu_attr == CPU_CELL)
33360 && recog_memoized (dep_insn)
33361 && (INSN_CODE (dep_insn) >= 0))
33362
33363 switch (get_attr_type (dep_insn))
33364 {
33365 case TYPE_CMP:
33366 case TYPE_FPCOMPARE:
33367 case TYPE_CR_LOGICAL:
33368 case TYPE_DELAYED_CR:
33369 return cost + 2;
33370 case TYPE_EXTS:
33371 case TYPE_MUL:
33372 if (get_attr_dot (dep_insn) == DOT_YES)
33373 return cost + 2;
33374 else
33375 break;
33376 case TYPE_SHIFT:
33377 if (get_attr_dot (dep_insn) == DOT_YES
33378 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
33379 return cost + 2;
33380 else
33381 break;
33382 default:
33383 break;
33384 }
33385 break;
33386
33387 case TYPE_STORE:
33388 case TYPE_FPSTORE:
33389 if ((rs6000_cpu == PROCESSOR_POWER6)
33390 && recog_memoized (dep_insn)
33391 && (INSN_CODE (dep_insn) >= 0))
33392 {
33393
33394 if (GET_CODE (PATTERN (insn)) != SET)
33395 /* If this happens, we have to extend this to schedule
33396 optimally. Return default for now. */
33397 return cost;
33398
33399 /* Adjust the cost for the case where the value written
33400 by a fixed point operation is used as the address
33401 gen value on a store. */
33402 switch (get_attr_type (dep_insn))
33403 {
33404 case TYPE_LOAD:
33405 case TYPE_CNTLZ:
33406 {
33407 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33408 return get_attr_sign_extend (dep_insn)
33409 == SIGN_EXTEND_YES ? 6 : 4;
33410 break;
33411 }
33412 case TYPE_SHIFT:
33413 {
33414 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33415 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33416 6 : 3;
33417 break;
33418 }
33419 case TYPE_INTEGER:
33420 case TYPE_ADD:
33421 case TYPE_LOGICAL:
33422 case TYPE_EXTS:
33423 case TYPE_INSERT:
33424 {
33425 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33426 return 3;
33427 break;
33428 }
33429 case TYPE_STORE:
33430 case TYPE_FPLOAD:
33431 case TYPE_FPSTORE:
33432 {
33433 if (get_attr_update (dep_insn) == UPDATE_YES
33434 && ! rs6000_store_data_bypass_p (dep_insn, insn))
33435 return 3;
33436 break;
33437 }
33438 case TYPE_MUL:
33439 {
33440 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33441 return 17;
33442 break;
33443 }
33444 case TYPE_DIV:
33445 {
33446 if (! rs6000_store_data_bypass_p (dep_insn, insn))
33447 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33448 break;
33449 }
33450 default:
33451 break;
33452 }
33453 }
33454 break;
33455
33456 case TYPE_LOAD:
33457 if ((rs6000_cpu == PROCESSOR_POWER6)
33458 && recog_memoized (dep_insn)
33459 && (INSN_CODE (dep_insn) >= 0))
33460 {
33461
33462 /* Adjust the cost for the case where the value written
33463 by a fixed point instruction is used within the address
33464 gen portion of a subsequent load(u)(x) */
33465 switch (get_attr_type (dep_insn))
33466 {
33467 case TYPE_LOAD:
33468 case TYPE_CNTLZ:
33469 {
33470 if (set_to_load_agen (dep_insn, insn))
33471 return get_attr_sign_extend (dep_insn)
33472 == SIGN_EXTEND_YES ? 6 : 4;
33473 break;
33474 }
33475 case TYPE_SHIFT:
33476 {
33477 if (set_to_load_agen (dep_insn, insn))
33478 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
33479 6 : 3;
33480 break;
33481 }
33482 case TYPE_INTEGER:
33483 case TYPE_ADD:
33484 case TYPE_LOGICAL:
33485 case TYPE_EXTS:
33486 case TYPE_INSERT:
33487 {
33488 if (set_to_load_agen (dep_insn, insn))
33489 return 3;
33490 break;
33491 }
33492 case TYPE_STORE:
33493 case TYPE_FPLOAD:
33494 case TYPE_FPSTORE:
33495 {
33496 if (get_attr_update (dep_insn) == UPDATE_YES
33497 && set_to_load_agen (dep_insn, insn))
33498 return 3;
33499 break;
33500 }
33501 case TYPE_MUL:
33502 {
33503 if (set_to_load_agen (dep_insn, insn))
33504 return 17;
33505 break;
33506 }
33507 case TYPE_DIV:
33508 {
33509 if (set_to_load_agen (dep_insn, insn))
33510 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
33511 break;
33512 }
33513 default:
33514 break;
33515 }
33516 }
33517 break;
33518
33519 case TYPE_FPLOAD:
33520 if ((rs6000_cpu == PROCESSOR_POWER6)
33521 && get_attr_update (insn) == UPDATE_NO
33522 && recog_memoized (dep_insn)
33523 && (INSN_CODE (dep_insn) >= 0)
33524 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
33525 return 2;
33526
33527 default:
33528 break;
33529 }
33530
33531 /* Fall out to return default cost. */
33532 }
33533 break;
33534
33535 case REG_DEP_OUTPUT:
33536 /* Output dependency; DEP_INSN writes a register that INSN writes some
33537 cycles later. */
33538 if ((rs6000_cpu == PROCESSOR_POWER6)
33539 && recog_memoized (dep_insn)
33540 && (INSN_CODE (dep_insn) >= 0))
33541 {
33542 attr_type = get_attr_type (insn);
33543
33544 switch (attr_type)
33545 {
33546 case TYPE_FP:
33547 case TYPE_FPSIMPLE:
33548 if (get_attr_type (dep_insn) == TYPE_FP
33549 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
33550 return 1;
33551 break;
33552 case TYPE_FPLOAD:
33553 if (get_attr_update (insn) == UPDATE_NO
33554 && get_attr_type (dep_insn) == TYPE_MFFGPR)
33555 return 2;
33556 break;
33557 default:
33558 break;
33559 }
33560 }
33561 /* Fall through, no cost for output dependency. */
33562 /* FALLTHRU */
33563
33564 case REG_DEP_ANTI:
33565 /* Anti dependency; DEP_INSN reads a register that INSN writes some
33566 cycles later. */
33567 return 0;
33568
33569 default:
33570 gcc_unreachable ();
33571 }
33572
33573 return cost;
33574 }
33575
33576 /* Debug version of rs6000_adjust_cost. */
33577
33578 static int
33579 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
33580 int cost, unsigned int dw)
33581 {
33582 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
33583
33584 if (ret != cost)
33585 {
33586 const char *dep;
33587
33588 switch (dep_type)
33589 {
33590 default: dep = "unknown depencency"; break;
33591 case REG_DEP_TRUE: dep = "data dependency"; break;
33592 case REG_DEP_OUTPUT: dep = "output dependency"; break;
33593 case REG_DEP_ANTI: dep = "anti depencency"; break;
33594 }
33595
33596 fprintf (stderr,
33597 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
33598 "%s, insn:\n", ret, cost, dep);
33599
33600 debug_rtx (insn);
33601 }
33602
33603 return ret;
33604 }
33605
33606 /* The function returns a true if INSN is microcoded.
33607 Return false otherwise. */
33608
33609 static bool
33610 is_microcoded_insn (rtx_insn *insn)
33611 {
33612 if (!insn || !NONDEBUG_INSN_P (insn)
33613 || GET_CODE (PATTERN (insn)) == USE
33614 || GET_CODE (PATTERN (insn)) == CLOBBER)
33615 return false;
33616
33617 if (rs6000_cpu_attr == CPU_CELL)
33618 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
33619
33620 if (rs6000_sched_groups
33621 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33622 {
33623 enum attr_type type = get_attr_type (insn);
33624 if ((type == TYPE_LOAD
33625 && get_attr_update (insn) == UPDATE_YES
33626 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
33627 || ((type == TYPE_LOAD || type == TYPE_STORE)
33628 && get_attr_update (insn) == UPDATE_YES
33629 && get_attr_indexed (insn) == INDEXED_YES)
33630 || type == TYPE_MFCR)
33631 return true;
33632 }
33633
33634 return false;
33635 }
33636
33637 /* The function returns true if INSN is cracked into 2 instructions
33638 by the processor (and therefore occupies 2 issue slots). */
33639
33640 static bool
33641 is_cracked_insn (rtx_insn *insn)
33642 {
33643 if (!insn || !NONDEBUG_INSN_P (insn)
33644 || GET_CODE (PATTERN (insn)) == USE
33645 || GET_CODE (PATTERN (insn)) == CLOBBER)
33646 return false;
33647
33648 if (rs6000_sched_groups
33649 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
33650 {
33651 enum attr_type type = get_attr_type (insn);
33652 if ((type == TYPE_LOAD
33653 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
33654 && get_attr_update (insn) == UPDATE_NO)
33655 || (type == TYPE_LOAD
33656 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
33657 && get_attr_update (insn) == UPDATE_YES
33658 && get_attr_indexed (insn) == INDEXED_NO)
33659 || (type == TYPE_STORE
33660 && get_attr_update (insn) == UPDATE_YES
33661 && get_attr_indexed (insn) == INDEXED_NO)
33662 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
33663 && get_attr_update (insn) == UPDATE_YES)
33664 || type == TYPE_DELAYED_CR
33665 || (type == TYPE_EXTS
33666 && get_attr_dot (insn) == DOT_YES)
33667 || (type == TYPE_SHIFT
33668 && get_attr_dot (insn) == DOT_YES
33669 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
33670 || (type == TYPE_MUL
33671 && get_attr_dot (insn) == DOT_YES)
33672 || type == TYPE_DIV
33673 || (type == TYPE_INSERT
33674 && get_attr_size (insn) == SIZE_32))
33675 return true;
33676 }
33677
33678 return false;
33679 }
33680
33681 /* The function returns true if INSN can be issued only from
33682 the branch slot. */
33683
33684 static bool
33685 is_branch_slot_insn (rtx_insn *insn)
33686 {
33687 if (!insn || !NONDEBUG_INSN_P (insn)
33688 || GET_CODE (PATTERN (insn)) == USE
33689 || GET_CODE (PATTERN (insn)) == CLOBBER)
33690 return false;
33691
33692 if (rs6000_sched_groups)
33693 {
33694 enum attr_type type = get_attr_type (insn);
33695 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
33696 return true;
33697 return false;
33698 }
33699
33700 return false;
33701 }
33702
33703 /* The function returns true if out_inst sets a value that is
33704 used in the address generation computation of in_insn */
33705 static bool
33706 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
33707 {
33708 rtx out_set, in_set;
33709
33710 /* For performance reasons, only handle the simple case where
33711 both loads are a single_set. */
33712 out_set = single_set (out_insn);
33713 if (out_set)
33714 {
33715 in_set = single_set (in_insn);
33716 if (in_set)
33717 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
33718 }
33719
33720 return false;
33721 }
33722
33723 /* Try to determine base/offset/size parts of the given MEM.
33724 Return true if successful, false if all the values couldn't
33725 be determined.
33726
33727 This function only looks for REG or REG+CONST address forms.
33728 REG+REG address form will return false. */
33729
33730 static bool
33731 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
33732 HOST_WIDE_INT *size)
33733 {
33734 rtx addr_rtx;
33735 if MEM_SIZE_KNOWN_P (mem)
33736 *size = MEM_SIZE (mem);
33737 else
33738 return false;
33739
33740 addr_rtx = (XEXP (mem, 0));
33741 if (GET_CODE (addr_rtx) == PRE_MODIFY)
33742 addr_rtx = XEXP (addr_rtx, 1);
33743
33744 *offset = 0;
33745 while (GET_CODE (addr_rtx) == PLUS
33746 && CONST_INT_P (XEXP (addr_rtx, 1)))
33747 {
33748 *offset += INTVAL (XEXP (addr_rtx, 1));
33749 addr_rtx = XEXP (addr_rtx, 0);
33750 }
33751 if (!REG_P (addr_rtx))
33752 return false;
33753
33754 *base = addr_rtx;
33755 return true;
33756 }
33757
33758 /* The function returns true if the target storage location of
33759 mem1 is adjacent to the target storage location of mem2 */
33760 /* Return 1 if memory locations are adjacent. */
33761
33762 static bool
33763 adjacent_mem_locations (rtx mem1, rtx mem2)
33764 {
33765 rtx reg1, reg2;
33766 HOST_WIDE_INT off1, size1, off2, size2;
33767
33768 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33769 && get_memref_parts (mem2, &reg2, &off2, &size2))
33770 return ((REGNO (reg1) == REGNO (reg2))
33771 && ((off1 + size1 == off2)
33772 || (off2 + size2 == off1)));
33773
33774 return false;
33775 }
33776
33777 /* This function returns true if it can be determined that the two MEM
33778 locations overlap by at least 1 byte based on base reg/offset/size. */
33779
33780 static bool
33781 mem_locations_overlap (rtx mem1, rtx mem2)
33782 {
33783 rtx reg1, reg2;
33784 HOST_WIDE_INT off1, size1, off2, size2;
33785
33786 if (get_memref_parts (mem1, &reg1, &off1, &size1)
33787 && get_memref_parts (mem2, &reg2, &off2, &size2))
33788 return ((REGNO (reg1) == REGNO (reg2))
33789 && (((off1 <= off2) && (off1 + size1 > off2))
33790 || ((off2 <= off1) && (off2 + size2 > off1))));
33791
33792 return false;
33793 }
33794
33795 /* A C statement (sans semicolon) to update the integer scheduling
33796 priority INSN_PRIORITY (INSN). Increase the priority to execute the
33797 INSN earlier, reduce the priority to execute INSN later. Do not
33798 define this macro if you do not need to adjust the scheduling
33799 priorities of insns. */
33800
33801 static int
33802 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
33803 {
33804 rtx load_mem, str_mem;
33805 /* On machines (like the 750) which have asymmetric integer units,
33806 where one integer unit can do multiply and divides and the other
33807 can't, reduce the priority of multiply/divide so it is scheduled
33808 before other integer operations. */
33809
33810 #if 0
33811 if (! INSN_P (insn))
33812 return priority;
33813
33814 if (GET_CODE (PATTERN (insn)) == USE)
33815 return priority;
33816
33817 switch (rs6000_cpu_attr) {
33818 case CPU_PPC750:
33819 switch (get_attr_type (insn))
33820 {
33821 default:
33822 break;
33823
33824 case TYPE_MUL:
33825 case TYPE_DIV:
33826 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
33827 priority, priority);
33828 if (priority >= 0 && priority < 0x01000000)
33829 priority >>= 3;
33830 break;
33831 }
33832 }
33833 #endif
33834
33835 if (insn_must_be_first_in_group (insn)
33836 && reload_completed
33837 && current_sched_info->sched_max_insns_priority
33838 && rs6000_sched_restricted_insns_priority)
33839 {
33840
33841 /* Prioritize insns that can be dispatched only in the first
33842 dispatch slot. */
33843 if (rs6000_sched_restricted_insns_priority == 1)
33844 /* Attach highest priority to insn. This means that in
33845 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
33846 precede 'priority' (critical path) considerations. */
33847 return current_sched_info->sched_max_insns_priority;
33848 else if (rs6000_sched_restricted_insns_priority == 2)
33849 /* Increase priority of insn by a minimal amount. This means that in
33850 haifa-sched.c:ready_sort(), only 'priority' (critical path)
33851 considerations precede dispatch-slot restriction considerations. */
33852 return (priority + 1);
33853 }
33854
33855 if (rs6000_cpu == PROCESSOR_POWER6
33856 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
33857 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
33858 /* Attach highest priority to insn if the scheduler has just issued two
33859 stores and this instruction is a load, or two loads and this instruction
33860 is a store. Power6 wants loads and stores scheduled alternately
33861 when possible */
33862 return current_sched_info->sched_max_insns_priority;
33863
33864 return priority;
33865 }
33866
33867 /* Return true if the instruction is nonpipelined on the Cell. */
33868 static bool
33869 is_nonpipeline_insn (rtx_insn *insn)
33870 {
33871 enum attr_type type;
33872 if (!insn || !NONDEBUG_INSN_P (insn)
33873 || GET_CODE (PATTERN (insn)) == USE
33874 || GET_CODE (PATTERN (insn)) == CLOBBER)
33875 return false;
33876
33877 type = get_attr_type (insn);
33878 if (type == TYPE_MUL
33879 || type == TYPE_DIV
33880 || type == TYPE_SDIV
33881 || type == TYPE_DDIV
33882 || type == TYPE_SSQRT
33883 || type == TYPE_DSQRT
33884 || type == TYPE_MFCR
33885 || type == TYPE_MFCRF
33886 || type == TYPE_MFJMPR)
33887 {
33888 return true;
33889 }
33890 return false;
33891 }
33892
33893
33894 /* Return how many instructions the machine can issue per cycle. */
33895
33896 static int
33897 rs6000_issue_rate (void)
33898 {
33899 /* Unless scheduling for register pressure, use issue rate of 1 for
33900 first scheduling pass to decrease degradation. */
33901 if (!reload_completed && !flag_sched_pressure)
33902 return 1;
33903
33904 switch (rs6000_cpu_attr) {
33905 case CPU_RS64A:
33906 case CPU_PPC601: /* ? */
33907 case CPU_PPC7450:
33908 return 3;
33909 case CPU_PPC440:
33910 case CPU_PPC603:
33911 case CPU_PPC750:
33912 case CPU_PPC7400:
33913 case CPU_PPC8540:
33914 case CPU_PPC8548:
33915 case CPU_CELL:
33916 case CPU_PPCE300C2:
33917 case CPU_PPCE300C3:
33918 case CPU_PPCE500MC:
33919 case CPU_PPCE500MC64:
33920 case CPU_PPCE5500:
33921 case CPU_PPCE6500:
33922 case CPU_TITAN:
33923 return 2;
33924 case CPU_PPC476:
33925 case CPU_PPC604:
33926 case CPU_PPC604E:
33927 case CPU_PPC620:
33928 case CPU_PPC630:
33929 return 4;
33930 case CPU_POWER4:
33931 case CPU_POWER5:
33932 case CPU_POWER6:
33933 case CPU_POWER7:
33934 return 5;
33935 case CPU_POWER8:
33936 return 7;
33937 case CPU_POWER9:
33938 return 6;
33939 default:
33940 return 1;
33941 }
33942 }
33943
33944 /* Return how many instructions to look ahead for better insn
33945 scheduling. */
33946
33947 static int
33948 rs6000_use_sched_lookahead (void)
33949 {
33950 switch (rs6000_cpu_attr)
33951 {
33952 case CPU_PPC8540:
33953 case CPU_PPC8548:
33954 return 4;
33955
33956 case CPU_CELL:
33957 return (reload_completed ? 8 : 0);
33958
33959 default:
33960 return 0;
33961 }
33962 }
33963
33964 /* We are choosing insn from the ready queue. Return zero if INSN can be
33965 chosen. */
33966 static int
33967 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
33968 {
33969 if (ready_index == 0)
33970 return 0;
33971
33972 if (rs6000_cpu_attr != CPU_CELL)
33973 return 0;
33974
33975 gcc_assert (insn != NULL_RTX && INSN_P (insn));
33976
33977 if (!reload_completed
33978 || is_nonpipeline_insn (insn)
33979 || is_microcoded_insn (insn))
33980 return 1;
33981
33982 return 0;
33983 }
33984
33985 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
33986 and return true. */
33987
33988 static bool
33989 find_mem_ref (rtx pat, rtx *mem_ref)
33990 {
33991 const char * fmt;
33992 int i, j;
33993
33994 /* stack_tie does not produce any real memory traffic. */
33995 if (tie_operand (pat, VOIDmode))
33996 return false;
33997
33998 if (GET_CODE (pat) == MEM)
33999 {
34000 *mem_ref = pat;
34001 return true;
34002 }
34003
34004 /* Recursively process the pattern. */
34005 fmt = GET_RTX_FORMAT (GET_CODE (pat));
34006
34007 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
34008 {
34009 if (fmt[i] == 'e')
34010 {
34011 if (find_mem_ref (XEXP (pat, i), mem_ref))
34012 return true;
34013 }
34014 else if (fmt[i] == 'E')
34015 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
34016 {
34017 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
34018 return true;
34019 }
34020 }
34021
34022 return false;
34023 }
34024
34025 /* Determine if PAT is a PATTERN of a load insn. */
34026
34027 static bool
34028 is_load_insn1 (rtx pat, rtx *load_mem)
34029 {
34030 if (!pat || pat == NULL_RTX)
34031 return false;
34032
34033 if (GET_CODE (pat) == SET)
34034 return find_mem_ref (SET_SRC (pat), load_mem);
34035
34036 if (GET_CODE (pat) == PARALLEL)
34037 {
34038 int i;
34039
34040 for (i = 0; i < XVECLEN (pat, 0); i++)
34041 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
34042 return true;
34043 }
34044
34045 return false;
34046 }
34047
34048 /* Determine if INSN loads from memory. */
34049
34050 static bool
34051 is_load_insn (rtx insn, rtx *load_mem)
34052 {
34053 if (!insn || !INSN_P (insn))
34054 return false;
34055
34056 if (CALL_P (insn))
34057 return false;
34058
34059 return is_load_insn1 (PATTERN (insn), load_mem);
34060 }
34061
34062 /* Determine if PAT is a PATTERN of a store insn. */
34063
34064 static bool
34065 is_store_insn1 (rtx pat, rtx *str_mem)
34066 {
34067 if (!pat || pat == NULL_RTX)
34068 return false;
34069
34070 if (GET_CODE (pat) == SET)
34071 return find_mem_ref (SET_DEST (pat), str_mem);
34072
34073 if (GET_CODE (pat) == PARALLEL)
34074 {
34075 int i;
34076
34077 for (i = 0; i < XVECLEN (pat, 0); i++)
34078 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
34079 return true;
34080 }
34081
34082 return false;
34083 }
34084
34085 /* Determine if INSN stores to memory. */
34086
34087 static bool
34088 is_store_insn (rtx insn, rtx *str_mem)
34089 {
34090 if (!insn || !INSN_P (insn))
34091 return false;
34092
34093 return is_store_insn1 (PATTERN (insn), str_mem);
34094 }
34095
34096 /* Return whether TYPE is a Power9 pairable vector instruction type. */
34097
34098 static bool
34099 is_power9_pairable_vec_type (enum attr_type type)
34100 {
34101 switch (type)
34102 {
34103 case TYPE_VECSIMPLE:
34104 case TYPE_VECCOMPLEX:
34105 case TYPE_VECDIV:
34106 case TYPE_VECCMP:
34107 case TYPE_VECPERM:
34108 case TYPE_VECFLOAT:
34109 case TYPE_VECFDIV:
34110 case TYPE_VECDOUBLE:
34111 return true;
34112 default:
34113 break;
34114 }
34115 return false;
34116 }
34117
34118 /* Returns whether the dependence between INSN and NEXT is considered
34119 costly by the given target. */
34120
34121 static bool
34122 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
34123 {
34124 rtx insn;
34125 rtx next;
34126 rtx load_mem, str_mem;
34127
34128 /* If the flag is not enabled - no dependence is considered costly;
34129 allow all dependent insns in the same group.
34130 This is the most aggressive option. */
34131 if (rs6000_sched_costly_dep == no_dep_costly)
34132 return false;
34133
34134 /* If the flag is set to 1 - a dependence is always considered costly;
34135 do not allow dependent instructions in the same group.
34136 This is the most conservative option. */
34137 if (rs6000_sched_costly_dep == all_deps_costly)
34138 return true;
34139
34140 insn = DEP_PRO (dep);
34141 next = DEP_CON (dep);
34142
34143 if (rs6000_sched_costly_dep == store_to_load_dep_costly
34144 && is_load_insn (next, &load_mem)
34145 && is_store_insn (insn, &str_mem))
34146 /* Prevent load after store in the same group. */
34147 return true;
34148
34149 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
34150 && is_load_insn (next, &load_mem)
34151 && is_store_insn (insn, &str_mem)
34152 && DEP_TYPE (dep) == REG_DEP_TRUE
34153 && mem_locations_overlap(str_mem, load_mem))
34154 /* Prevent load after store in the same group if it is a true
34155 dependence. */
34156 return true;
34157
34158 /* The flag is set to X; dependences with latency >= X are considered costly,
34159 and will not be scheduled in the same group. */
34160 if (rs6000_sched_costly_dep <= max_dep_latency
34161 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
34162 return true;
34163
34164 return false;
34165 }
34166
34167 /* Return the next insn after INSN that is found before TAIL is reached,
34168 skipping any "non-active" insns - insns that will not actually occupy
34169 an issue slot. Return NULL_RTX if such an insn is not found. */
34170
34171 static rtx_insn *
34172 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
34173 {
34174 if (insn == NULL_RTX || insn == tail)
34175 return NULL;
34176
34177 while (1)
34178 {
34179 insn = NEXT_INSN (insn);
34180 if (insn == NULL_RTX || insn == tail)
34181 return NULL;
34182
34183 if (CALL_P (insn)
34184 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
34185 || (NONJUMP_INSN_P (insn)
34186 && GET_CODE (PATTERN (insn)) != USE
34187 && GET_CODE (PATTERN (insn)) != CLOBBER
34188 && INSN_CODE (insn) != CODE_FOR_stack_tie))
34189 break;
34190 }
34191 return insn;
34192 }
34193
34194 /* Do Power9 specific sched_reorder2 reordering of ready list. */
34195
34196 static int
34197 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
34198 {
34199 int pos;
34200 int i;
34201 rtx_insn *tmp;
34202 enum attr_type type, type2;
34203
34204 type = get_attr_type (last_scheduled_insn);
34205
34206 /* Try to issue fixed point divides back-to-back in pairs so they will be
34207 routed to separate execution units and execute in parallel. */
34208 if (type == TYPE_DIV && divide_cnt == 0)
34209 {
34210 /* First divide has been scheduled. */
34211 divide_cnt = 1;
34212
34213 /* Scan the ready list looking for another divide, if found move it
34214 to the end of the list so it is chosen next. */
34215 pos = lastpos;
34216 while (pos >= 0)
34217 {
34218 if (recog_memoized (ready[pos]) >= 0
34219 && get_attr_type (ready[pos]) == TYPE_DIV)
34220 {
34221 tmp = ready[pos];
34222 for (i = pos; i < lastpos; i++)
34223 ready[i] = ready[i + 1];
34224 ready[lastpos] = tmp;
34225 break;
34226 }
34227 pos--;
34228 }
34229 }
34230 else
34231 {
34232 /* Last insn was the 2nd divide or not a divide, reset the counter. */
34233 divide_cnt = 0;
34234
34235 /* The best dispatch throughput for vector and vector load insns can be
34236 achieved by interleaving a vector and vector load such that they'll
34237 dispatch to the same superslice. If this pairing cannot be achieved
34238 then it is best to pair vector insns together and vector load insns
34239 together.
34240
34241 To aid in this pairing, vec_pairing maintains the current state with
34242 the following values:
34243
34244 0 : Initial state, no vecload/vector pairing has been started.
34245
34246 1 : A vecload or vector insn has been issued and a candidate for
34247 pairing has been found and moved to the end of the ready
34248 list. */
34249 if (type == TYPE_VECLOAD)
34250 {
34251 /* Issued a vecload. */
34252 if (vec_pairing == 0)
34253 {
34254 int vecload_pos = -1;
34255 /* We issued a single vecload, look for a vector insn to pair it
34256 with. If one isn't found, try to pair another vecload. */
34257 pos = lastpos;
34258 while (pos >= 0)
34259 {
34260 if (recog_memoized (ready[pos]) >= 0)
34261 {
34262 type2 = get_attr_type (ready[pos]);
34263 if (is_power9_pairable_vec_type (type2))
34264 {
34265 /* Found a vector insn to pair with, move it to the
34266 end of the ready list so it is scheduled next. */
34267 tmp = ready[pos];
34268 for (i = pos; i < lastpos; i++)
34269 ready[i] = ready[i + 1];
34270 ready[lastpos] = tmp;
34271 vec_pairing = 1;
34272 return cached_can_issue_more;
34273 }
34274 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
34275 /* Remember position of first vecload seen. */
34276 vecload_pos = pos;
34277 }
34278 pos--;
34279 }
34280 if (vecload_pos >= 0)
34281 {
34282 /* Didn't find a vector to pair with but did find a vecload,
34283 move it to the end of the ready list. */
34284 tmp = ready[vecload_pos];
34285 for (i = vecload_pos; i < lastpos; i++)
34286 ready[i] = ready[i + 1];
34287 ready[lastpos] = tmp;
34288 vec_pairing = 1;
34289 return cached_can_issue_more;
34290 }
34291 }
34292 }
34293 else if (is_power9_pairable_vec_type (type))
34294 {
34295 /* Issued a vector operation. */
34296 if (vec_pairing == 0)
34297 {
34298 int vec_pos = -1;
34299 /* We issued a single vector insn, look for a vecload to pair it
34300 with. If one isn't found, try to pair another vector. */
34301 pos = lastpos;
34302 while (pos >= 0)
34303 {
34304 if (recog_memoized (ready[pos]) >= 0)
34305 {
34306 type2 = get_attr_type (ready[pos]);
34307 if (type2 == TYPE_VECLOAD)
34308 {
34309 /* Found a vecload insn to pair with, move it to the
34310 end of the ready list so it is scheduled next. */
34311 tmp = ready[pos];
34312 for (i = pos; i < lastpos; i++)
34313 ready[i] = ready[i + 1];
34314 ready[lastpos] = tmp;
34315 vec_pairing = 1;
34316 return cached_can_issue_more;
34317 }
34318 else if (is_power9_pairable_vec_type (type2)
34319 && vec_pos == -1)
34320 /* Remember position of first vector insn seen. */
34321 vec_pos = pos;
34322 }
34323 pos--;
34324 }
34325 if (vec_pos >= 0)
34326 {
34327 /* Didn't find a vecload to pair with but did find a vector
34328 insn, move it to the end of the ready list. */
34329 tmp = ready[vec_pos];
34330 for (i = vec_pos; i < lastpos; i++)
34331 ready[i] = ready[i + 1];
34332 ready[lastpos] = tmp;
34333 vec_pairing = 1;
34334 return cached_can_issue_more;
34335 }
34336 }
34337 }
34338
34339 /* We've either finished a vec/vecload pair, couldn't find an insn to
34340 continue the current pair, or the last insn had nothing to do with
34341 with pairing. In any case, reset the state. */
34342 vec_pairing = 0;
34343 }
34344
34345 return cached_can_issue_more;
34346 }
34347
34348 /* We are about to begin issuing insns for this clock cycle. */
34349
34350 static int
34351 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
34352 rtx_insn **ready ATTRIBUTE_UNUSED,
34353 int *pn_ready ATTRIBUTE_UNUSED,
34354 int clock_var ATTRIBUTE_UNUSED)
34355 {
34356 int n_ready = *pn_ready;
34357
34358 if (sched_verbose)
34359 fprintf (dump, "// rs6000_sched_reorder :\n");
34360
34361 /* Reorder the ready list, if the second to last ready insn
34362 is a nonepipeline insn. */
34363 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
34364 {
34365 if (is_nonpipeline_insn (ready[n_ready - 1])
34366 && (recog_memoized (ready[n_ready - 2]) > 0))
34367 /* Simply swap first two insns. */
34368 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
34369 }
34370
34371 if (rs6000_cpu == PROCESSOR_POWER6)
34372 load_store_pendulum = 0;
34373
34374 return rs6000_issue_rate ();
34375 }
34376
34377 /* Like rs6000_sched_reorder, but called after issuing each insn. */
34378
34379 static int
34380 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
34381 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
34382 {
34383 if (sched_verbose)
34384 fprintf (dump, "// rs6000_sched_reorder2 :\n");
34385
34386 /* For Power6, we need to handle some special cases to try and keep the
34387 store queue from overflowing and triggering expensive flushes.
34388
34389 This code monitors how load and store instructions are being issued
34390 and skews the ready list one way or the other to increase the likelihood
34391 that a desired instruction is issued at the proper time.
34392
34393 A couple of things are done. First, we maintain a "load_store_pendulum"
34394 to track the current state of load/store issue.
34395
34396 - If the pendulum is at zero, then no loads or stores have been
34397 issued in the current cycle so we do nothing.
34398
34399 - If the pendulum is 1, then a single load has been issued in this
34400 cycle and we attempt to locate another load in the ready list to
34401 issue with it.
34402
34403 - If the pendulum is -2, then two stores have already been
34404 issued in this cycle, so we increase the priority of the first load
34405 in the ready list to increase it's likelihood of being chosen first
34406 in the next cycle.
34407
34408 - If the pendulum is -1, then a single store has been issued in this
34409 cycle and we attempt to locate another store in the ready list to
34410 issue with it, preferring a store to an adjacent memory location to
34411 facilitate store pairing in the store queue.
34412
34413 - If the pendulum is 2, then two loads have already been
34414 issued in this cycle, so we increase the priority of the first store
34415 in the ready list to increase it's likelihood of being chosen first
34416 in the next cycle.
34417
34418 - If the pendulum < -2 or > 2, then do nothing.
34419
34420 Note: This code covers the most common scenarios. There exist non
34421 load/store instructions which make use of the LSU and which
34422 would need to be accounted for to strictly model the behavior
34423 of the machine. Those instructions are currently unaccounted
34424 for to help minimize compile time overhead of this code.
34425 */
34426 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
34427 {
34428 int pos;
34429 int i;
34430 rtx_insn *tmp;
34431 rtx load_mem, str_mem;
34432
34433 if (is_store_insn (last_scheduled_insn, &str_mem))
34434 /* Issuing a store, swing the load_store_pendulum to the left */
34435 load_store_pendulum--;
34436 else if (is_load_insn (last_scheduled_insn, &load_mem))
34437 /* Issuing a load, swing the load_store_pendulum to the right */
34438 load_store_pendulum++;
34439 else
34440 return cached_can_issue_more;
34441
34442 /* If the pendulum is balanced, or there is only one instruction on
34443 the ready list, then all is well, so return. */
34444 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
34445 return cached_can_issue_more;
34446
34447 if (load_store_pendulum == 1)
34448 {
34449 /* A load has been issued in this cycle. Scan the ready list
34450 for another load to issue with it */
34451 pos = *pn_ready-1;
34452
34453 while (pos >= 0)
34454 {
34455 if (is_load_insn (ready[pos], &load_mem))
34456 {
34457 /* Found a load. Move it to the head of the ready list,
34458 and adjust it's priority so that it is more likely to
34459 stay there */
34460 tmp = ready[pos];
34461 for (i=pos; i<*pn_ready-1; i++)
34462 ready[i] = ready[i + 1];
34463 ready[*pn_ready-1] = tmp;
34464
34465 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34466 INSN_PRIORITY (tmp)++;
34467 break;
34468 }
34469 pos--;
34470 }
34471 }
34472 else if (load_store_pendulum == -2)
34473 {
34474 /* Two stores have been issued in this cycle. Increase the
34475 priority of the first load in the ready list to favor it for
34476 issuing in the next cycle. */
34477 pos = *pn_ready-1;
34478
34479 while (pos >= 0)
34480 {
34481 if (is_load_insn (ready[pos], &load_mem)
34482 && !sel_sched_p ()
34483 && INSN_PRIORITY_KNOWN (ready[pos]))
34484 {
34485 INSN_PRIORITY (ready[pos])++;
34486
34487 /* Adjust the pendulum to account for the fact that a load
34488 was found and increased in priority. This is to prevent
34489 increasing the priority of multiple loads */
34490 load_store_pendulum--;
34491
34492 break;
34493 }
34494 pos--;
34495 }
34496 }
34497 else if (load_store_pendulum == -1)
34498 {
34499 /* A store has been issued in this cycle. Scan the ready list for
34500 another store to issue with it, preferring a store to an adjacent
34501 memory location */
34502 int first_store_pos = -1;
34503
34504 pos = *pn_ready-1;
34505
34506 while (pos >= 0)
34507 {
34508 if (is_store_insn (ready[pos], &str_mem))
34509 {
34510 rtx str_mem2;
34511 /* Maintain the index of the first store found on the
34512 list */
34513 if (first_store_pos == -1)
34514 first_store_pos = pos;
34515
34516 if (is_store_insn (last_scheduled_insn, &str_mem2)
34517 && adjacent_mem_locations (str_mem, str_mem2))
34518 {
34519 /* Found an adjacent store. Move it to the head of the
34520 ready list, and adjust it's priority so that it is
34521 more likely to stay there */
34522 tmp = ready[pos];
34523 for (i=pos; i<*pn_ready-1; i++)
34524 ready[i] = ready[i + 1];
34525 ready[*pn_ready-1] = tmp;
34526
34527 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34528 INSN_PRIORITY (tmp)++;
34529
34530 first_store_pos = -1;
34531
34532 break;
34533 };
34534 }
34535 pos--;
34536 }
34537
34538 if (first_store_pos >= 0)
34539 {
34540 /* An adjacent store wasn't found, but a non-adjacent store was,
34541 so move the non-adjacent store to the front of the ready
34542 list, and adjust its priority so that it is more likely to
34543 stay there. */
34544 tmp = ready[first_store_pos];
34545 for (i=first_store_pos; i<*pn_ready-1; i++)
34546 ready[i] = ready[i + 1];
34547 ready[*pn_ready-1] = tmp;
34548 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
34549 INSN_PRIORITY (tmp)++;
34550 }
34551 }
34552 else if (load_store_pendulum == 2)
34553 {
34554 /* Two loads have been issued in this cycle. Increase the priority
34555 of the first store in the ready list to favor it for issuing in
34556 the next cycle. */
34557 pos = *pn_ready-1;
34558
34559 while (pos >= 0)
34560 {
34561 if (is_store_insn (ready[pos], &str_mem)
34562 && !sel_sched_p ()
34563 && INSN_PRIORITY_KNOWN (ready[pos]))
34564 {
34565 INSN_PRIORITY (ready[pos])++;
34566
34567 /* Adjust the pendulum to account for the fact that a store
34568 was found and increased in priority. This is to prevent
34569 increasing the priority of multiple stores */
34570 load_store_pendulum++;
34571
34572 break;
34573 }
34574 pos--;
34575 }
34576 }
34577 }
34578
34579 /* Do Power9 dependent reordering if necessary. */
34580 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
34581 && recog_memoized (last_scheduled_insn) >= 0)
34582 return power9_sched_reorder2 (ready, *pn_ready - 1);
34583
34584 return cached_can_issue_more;
34585 }
34586
34587 /* Return whether the presence of INSN causes a dispatch group termination
34588 of group WHICH_GROUP.
34589
34590 If WHICH_GROUP == current_group, this function will return true if INSN
34591 causes the termination of the current group (i.e, the dispatch group to
34592 which INSN belongs). This means that INSN will be the last insn in the
34593 group it belongs to.
34594
34595 If WHICH_GROUP == previous_group, this function will return true if INSN
34596 causes the termination of the previous group (i.e, the dispatch group that
34597 precedes the group to which INSN belongs). This means that INSN will be
34598 the first insn in the group it belongs to). */
34599
34600 static bool
34601 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
34602 {
34603 bool first, last;
34604
34605 if (! insn)
34606 return false;
34607
34608 first = insn_must_be_first_in_group (insn);
34609 last = insn_must_be_last_in_group (insn);
34610
34611 if (first && last)
34612 return true;
34613
34614 if (which_group == current_group)
34615 return last;
34616 else if (which_group == previous_group)
34617 return first;
34618
34619 return false;
34620 }
34621
34622
34623 static bool
34624 insn_must_be_first_in_group (rtx_insn *insn)
34625 {
34626 enum attr_type type;
34627
34628 if (!insn
34629 || NOTE_P (insn)
34630 || DEBUG_INSN_P (insn)
34631 || GET_CODE (PATTERN (insn)) == USE
34632 || GET_CODE (PATTERN (insn)) == CLOBBER)
34633 return false;
34634
34635 switch (rs6000_cpu)
34636 {
34637 case PROCESSOR_POWER5:
34638 if (is_cracked_insn (insn))
34639 return true;
34640 /* FALLTHRU */
34641 case PROCESSOR_POWER4:
34642 if (is_microcoded_insn (insn))
34643 return true;
34644
34645 if (!rs6000_sched_groups)
34646 return false;
34647
34648 type = get_attr_type (insn);
34649
34650 switch (type)
34651 {
34652 case TYPE_MFCR:
34653 case TYPE_MFCRF:
34654 case TYPE_MTCR:
34655 case TYPE_DELAYED_CR:
34656 case TYPE_CR_LOGICAL:
34657 case TYPE_MTJMPR:
34658 case TYPE_MFJMPR:
34659 case TYPE_DIV:
34660 case TYPE_LOAD_L:
34661 case TYPE_STORE_C:
34662 case TYPE_ISYNC:
34663 case TYPE_SYNC:
34664 return true;
34665 default:
34666 break;
34667 }
34668 break;
34669 case PROCESSOR_POWER6:
34670 type = get_attr_type (insn);
34671
34672 switch (type)
34673 {
34674 case TYPE_EXTS:
34675 case TYPE_CNTLZ:
34676 case TYPE_TRAP:
34677 case TYPE_MUL:
34678 case TYPE_INSERT:
34679 case TYPE_FPCOMPARE:
34680 case TYPE_MFCR:
34681 case TYPE_MTCR:
34682 case TYPE_MFJMPR:
34683 case TYPE_MTJMPR:
34684 case TYPE_ISYNC:
34685 case TYPE_SYNC:
34686 case TYPE_LOAD_L:
34687 case TYPE_STORE_C:
34688 return true;
34689 case TYPE_SHIFT:
34690 if (get_attr_dot (insn) == DOT_NO
34691 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34692 return true;
34693 else
34694 break;
34695 case TYPE_DIV:
34696 if (get_attr_size (insn) == SIZE_32)
34697 return true;
34698 else
34699 break;
34700 case TYPE_LOAD:
34701 case TYPE_STORE:
34702 case TYPE_FPLOAD:
34703 case TYPE_FPSTORE:
34704 if (get_attr_update (insn) == UPDATE_YES)
34705 return true;
34706 else
34707 break;
34708 default:
34709 break;
34710 }
34711 break;
34712 case PROCESSOR_POWER7:
34713 type = get_attr_type (insn);
34714
34715 switch (type)
34716 {
34717 case TYPE_CR_LOGICAL:
34718 case TYPE_MFCR:
34719 case TYPE_MFCRF:
34720 case TYPE_MTCR:
34721 case TYPE_DIV:
34722 case TYPE_ISYNC:
34723 case TYPE_LOAD_L:
34724 case TYPE_STORE_C:
34725 case TYPE_MFJMPR:
34726 case TYPE_MTJMPR:
34727 return true;
34728 case TYPE_MUL:
34729 case TYPE_SHIFT:
34730 case TYPE_EXTS:
34731 if (get_attr_dot (insn) == DOT_YES)
34732 return true;
34733 else
34734 break;
34735 case TYPE_LOAD:
34736 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34737 || get_attr_update (insn) == UPDATE_YES)
34738 return true;
34739 else
34740 break;
34741 case TYPE_STORE:
34742 case TYPE_FPLOAD:
34743 case TYPE_FPSTORE:
34744 if (get_attr_update (insn) == UPDATE_YES)
34745 return true;
34746 else
34747 break;
34748 default:
34749 break;
34750 }
34751 break;
34752 case PROCESSOR_POWER8:
34753 type = get_attr_type (insn);
34754
34755 switch (type)
34756 {
34757 case TYPE_CR_LOGICAL:
34758 case TYPE_DELAYED_CR:
34759 case TYPE_MFCR:
34760 case TYPE_MFCRF:
34761 case TYPE_MTCR:
34762 case TYPE_SYNC:
34763 case TYPE_ISYNC:
34764 case TYPE_LOAD_L:
34765 case TYPE_STORE_C:
34766 case TYPE_VECSTORE:
34767 case TYPE_MFJMPR:
34768 case TYPE_MTJMPR:
34769 return true;
34770 case TYPE_SHIFT:
34771 case TYPE_EXTS:
34772 case TYPE_MUL:
34773 if (get_attr_dot (insn) == DOT_YES)
34774 return true;
34775 else
34776 break;
34777 case TYPE_LOAD:
34778 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34779 || get_attr_update (insn) == UPDATE_YES)
34780 return true;
34781 else
34782 break;
34783 case TYPE_STORE:
34784 if (get_attr_update (insn) == UPDATE_YES
34785 && get_attr_indexed (insn) == INDEXED_YES)
34786 return true;
34787 else
34788 break;
34789 default:
34790 break;
34791 }
34792 break;
34793 default:
34794 break;
34795 }
34796
34797 return false;
34798 }
34799
34800 static bool
34801 insn_must_be_last_in_group (rtx_insn *insn)
34802 {
34803 enum attr_type type;
34804
34805 if (!insn
34806 || NOTE_P (insn)
34807 || DEBUG_INSN_P (insn)
34808 || GET_CODE (PATTERN (insn)) == USE
34809 || GET_CODE (PATTERN (insn)) == CLOBBER)
34810 return false;
34811
34812 switch (rs6000_cpu) {
34813 case PROCESSOR_POWER4:
34814 case PROCESSOR_POWER5:
34815 if (is_microcoded_insn (insn))
34816 return true;
34817
34818 if (is_branch_slot_insn (insn))
34819 return true;
34820
34821 break;
34822 case PROCESSOR_POWER6:
34823 type = get_attr_type (insn);
34824
34825 switch (type)
34826 {
34827 case TYPE_EXTS:
34828 case TYPE_CNTLZ:
34829 case TYPE_TRAP:
34830 case TYPE_MUL:
34831 case TYPE_FPCOMPARE:
34832 case TYPE_MFCR:
34833 case TYPE_MTCR:
34834 case TYPE_MFJMPR:
34835 case TYPE_MTJMPR:
34836 case TYPE_ISYNC:
34837 case TYPE_SYNC:
34838 case TYPE_LOAD_L:
34839 case TYPE_STORE_C:
34840 return true;
34841 case TYPE_SHIFT:
34842 if (get_attr_dot (insn) == DOT_NO
34843 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
34844 return true;
34845 else
34846 break;
34847 case TYPE_DIV:
34848 if (get_attr_size (insn) == SIZE_32)
34849 return true;
34850 else
34851 break;
34852 default:
34853 break;
34854 }
34855 break;
34856 case PROCESSOR_POWER7:
34857 type = get_attr_type (insn);
34858
34859 switch (type)
34860 {
34861 case TYPE_ISYNC:
34862 case TYPE_SYNC:
34863 case TYPE_LOAD_L:
34864 case TYPE_STORE_C:
34865 return true;
34866 case TYPE_LOAD:
34867 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34868 && get_attr_update (insn) == UPDATE_YES)
34869 return true;
34870 else
34871 break;
34872 case TYPE_STORE:
34873 if (get_attr_update (insn) == UPDATE_YES
34874 && get_attr_indexed (insn) == INDEXED_YES)
34875 return true;
34876 else
34877 break;
34878 default:
34879 break;
34880 }
34881 break;
34882 case PROCESSOR_POWER8:
34883 type = get_attr_type (insn);
34884
34885 switch (type)
34886 {
34887 case TYPE_MFCR:
34888 case TYPE_MTCR:
34889 case TYPE_ISYNC:
34890 case TYPE_SYNC:
34891 case TYPE_LOAD_L:
34892 case TYPE_STORE_C:
34893 return true;
34894 case TYPE_LOAD:
34895 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
34896 && get_attr_update (insn) == UPDATE_YES)
34897 return true;
34898 else
34899 break;
34900 case TYPE_STORE:
34901 if (get_attr_update (insn) == UPDATE_YES
34902 && get_attr_indexed (insn) == INDEXED_YES)
34903 return true;
34904 else
34905 break;
34906 default:
34907 break;
34908 }
34909 break;
34910 default:
34911 break;
34912 }
34913
34914 return false;
34915 }
34916
34917 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
34918 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
34919
34920 static bool
34921 is_costly_group (rtx *group_insns, rtx next_insn)
34922 {
34923 int i;
34924 int issue_rate = rs6000_issue_rate ();
34925
34926 for (i = 0; i < issue_rate; i++)
34927 {
34928 sd_iterator_def sd_it;
34929 dep_t dep;
34930 rtx insn = group_insns[i];
34931
34932 if (!insn)
34933 continue;
34934
34935 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
34936 {
34937 rtx next = DEP_CON (dep);
34938
34939 if (next == next_insn
34940 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
34941 return true;
34942 }
34943 }
34944
34945 return false;
34946 }
34947
34948 /* Utility of the function redefine_groups.
34949 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
34950 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
34951 to keep it "far" (in a separate group) from GROUP_INSNS, following
34952 one of the following schemes, depending on the value of the flag
34953 -minsert_sched_nops = X:
34954 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
34955 in order to force NEXT_INSN into a separate group.
34956 (2) X < sched_finish_regroup_exact: insert exactly X nops.
34957 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
34958 insertion (has a group just ended, how many vacant issue slots remain in the
34959 last group, and how many dispatch groups were encountered so far). */
34960
34961 static int
34962 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
34963 rtx_insn *next_insn, bool *group_end, int can_issue_more,
34964 int *group_count)
34965 {
34966 rtx nop;
34967 bool force;
34968 int issue_rate = rs6000_issue_rate ();
34969 bool end = *group_end;
34970 int i;
34971
34972 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
34973 return can_issue_more;
34974
34975 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
34976 return can_issue_more;
34977
34978 force = is_costly_group (group_insns, next_insn);
34979 if (!force)
34980 return can_issue_more;
34981
34982 if (sched_verbose > 6)
34983 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
34984 *group_count ,can_issue_more);
34985
34986 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
34987 {
34988 if (*group_end)
34989 can_issue_more = 0;
34990
34991 /* Since only a branch can be issued in the last issue_slot, it is
34992 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
34993 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
34994 in this case the last nop will start a new group and the branch
34995 will be forced to the new group. */
34996 if (can_issue_more && !is_branch_slot_insn (next_insn))
34997 can_issue_more--;
34998
34999 /* Do we have a special group ending nop? */
35000 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
35001 || rs6000_cpu_attr == CPU_POWER8)
35002 {
35003 nop = gen_group_ending_nop ();
35004 emit_insn_before (nop, next_insn);
35005 can_issue_more = 0;
35006 }
35007 else
35008 while (can_issue_more > 0)
35009 {
35010 nop = gen_nop ();
35011 emit_insn_before (nop, next_insn);
35012 can_issue_more--;
35013 }
35014
35015 *group_end = true;
35016 return 0;
35017 }
35018
35019 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
35020 {
35021 int n_nops = rs6000_sched_insert_nops;
35022
35023 /* Nops can't be issued from the branch slot, so the effective
35024 issue_rate for nops is 'issue_rate - 1'. */
35025 if (can_issue_more == 0)
35026 can_issue_more = issue_rate;
35027 can_issue_more--;
35028 if (can_issue_more == 0)
35029 {
35030 can_issue_more = issue_rate - 1;
35031 (*group_count)++;
35032 end = true;
35033 for (i = 0; i < issue_rate; i++)
35034 {
35035 group_insns[i] = 0;
35036 }
35037 }
35038
35039 while (n_nops > 0)
35040 {
35041 nop = gen_nop ();
35042 emit_insn_before (nop, next_insn);
35043 if (can_issue_more == issue_rate - 1) /* new group begins */
35044 end = false;
35045 can_issue_more--;
35046 if (can_issue_more == 0)
35047 {
35048 can_issue_more = issue_rate - 1;
35049 (*group_count)++;
35050 end = true;
35051 for (i = 0; i < issue_rate; i++)
35052 {
35053 group_insns[i] = 0;
35054 }
35055 }
35056 n_nops--;
35057 }
35058
35059 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
35060 can_issue_more++;
35061
35062 /* Is next_insn going to start a new group? */
35063 *group_end
35064 = (end
35065 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35066 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35067 || (can_issue_more < issue_rate &&
35068 insn_terminates_group_p (next_insn, previous_group)));
35069 if (*group_end && end)
35070 (*group_count)--;
35071
35072 if (sched_verbose > 6)
35073 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
35074 *group_count, can_issue_more);
35075 return can_issue_more;
35076 }
35077
35078 return can_issue_more;
35079 }
35080
35081 /* This function tries to synch the dispatch groups that the compiler "sees"
35082 with the dispatch groups that the processor dispatcher is expected to
35083 form in practice. It tries to achieve this synchronization by forcing the
35084 estimated processor grouping on the compiler (as opposed to the function
35085 'pad_goups' which tries to force the scheduler's grouping on the processor).
35086
35087 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
35088 examines the (estimated) dispatch groups that will be formed by the processor
35089 dispatcher. It marks these group boundaries to reflect the estimated
35090 processor grouping, overriding the grouping that the scheduler had marked.
35091 Depending on the value of the flag '-minsert-sched-nops' this function can
35092 force certain insns into separate groups or force a certain distance between
35093 them by inserting nops, for example, if there exists a "costly dependence"
35094 between the insns.
35095
35096 The function estimates the group boundaries that the processor will form as
35097 follows: It keeps track of how many vacant issue slots are available after
35098 each insn. A subsequent insn will start a new group if one of the following
35099 4 cases applies:
35100 - no more vacant issue slots remain in the current dispatch group.
35101 - only the last issue slot, which is the branch slot, is vacant, but the next
35102 insn is not a branch.
35103 - only the last 2 or less issue slots, including the branch slot, are vacant,
35104 which means that a cracked insn (which occupies two issue slots) can't be
35105 issued in this group.
35106 - less than 'issue_rate' slots are vacant, and the next insn always needs to
35107 start a new group. */
35108
35109 static int
35110 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35111 rtx_insn *tail)
35112 {
35113 rtx_insn *insn, *next_insn;
35114 int issue_rate;
35115 int can_issue_more;
35116 int slot, i;
35117 bool group_end;
35118 int group_count = 0;
35119 rtx *group_insns;
35120
35121 /* Initialize. */
35122 issue_rate = rs6000_issue_rate ();
35123 group_insns = XALLOCAVEC (rtx, issue_rate);
35124 for (i = 0; i < issue_rate; i++)
35125 {
35126 group_insns[i] = 0;
35127 }
35128 can_issue_more = issue_rate;
35129 slot = 0;
35130 insn = get_next_active_insn (prev_head_insn, tail);
35131 group_end = false;
35132
35133 while (insn != NULL_RTX)
35134 {
35135 slot = (issue_rate - can_issue_more);
35136 group_insns[slot] = insn;
35137 can_issue_more =
35138 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35139 if (insn_terminates_group_p (insn, current_group))
35140 can_issue_more = 0;
35141
35142 next_insn = get_next_active_insn (insn, tail);
35143 if (next_insn == NULL_RTX)
35144 return group_count + 1;
35145
35146 /* Is next_insn going to start a new group? */
35147 group_end
35148 = (can_issue_more == 0
35149 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
35150 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
35151 || (can_issue_more < issue_rate &&
35152 insn_terminates_group_p (next_insn, previous_group)));
35153
35154 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
35155 next_insn, &group_end, can_issue_more,
35156 &group_count);
35157
35158 if (group_end)
35159 {
35160 group_count++;
35161 can_issue_more = 0;
35162 for (i = 0; i < issue_rate; i++)
35163 {
35164 group_insns[i] = 0;
35165 }
35166 }
35167
35168 if (GET_MODE (next_insn) == TImode && can_issue_more)
35169 PUT_MODE (next_insn, VOIDmode);
35170 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
35171 PUT_MODE (next_insn, TImode);
35172
35173 insn = next_insn;
35174 if (can_issue_more == 0)
35175 can_issue_more = issue_rate;
35176 } /* while */
35177
35178 return group_count;
35179 }
35180
35181 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
35182 dispatch group boundaries that the scheduler had marked. Pad with nops
35183 any dispatch groups which have vacant issue slots, in order to force the
35184 scheduler's grouping on the processor dispatcher. The function
35185 returns the number of dispatch groups found. */
35186
35187 static int
35188 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
35189 rtx_insn *tail)
35190 {
35191 rtx_insn *insn, *next_insn;
35192 rtx nop;
35193 int issue_rate;
35194 int can_issue_more;
35195 int group_end;
35196 int group_count = 0;
35197
35198 /* Initialize issue_rate. */
35199 issue_rate = rs6000_issue_rate ();
35200 can_issue_more = issue_rate;
35201
35202 insn = get_next_active_insn (prev_head_insn, tail);
35203 next_insn = get_next_active_insn (insn, tail);
35204
35205 while (insn != NULL_RTX)
35206 {
35207 can_issue_more =
35208 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
35209
35210 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
35211
35212 if (next_insn == NULL_RTX)
35213 break;
35214
35215 if (group_end)
35216 {
35217 /* If the scheduler had marked group termination at this location
35218 (between insn and next_insn), and neither insn nor next_insn will
35219 force group termination, pad the group with nops to force group
35220 termination. */
35221 if (can_issue_more
35222 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
35223 && !insn_terminates_group_p (insn, current_group)
35224 && !insn_terminates_group_p (next_insn, previous_group))
35225 {
35226 if (!is_branch_slot_insn (next_insn))
35227 can_issue_more--;
35228
35229 while (can_issue_more)
35230 {
35231 nop = gen_nop ();
35232 emit_insn_before (nop, next_insn);
35233 can_issue_more--;
35234 }
35235 }
35236
35237 can_issue_more = issue_rate;
35238 group_count++;
35239 }
35240
35241 insn = next_insn;
35242 next_insn = get_next_active_insn (insn, tail);
35243 }
35244
35245 return group_count;
35246 }
35247
35248 /* We're beginning a new block. Initialize data structures as necessary. */
35249
35250 static void
35251 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
35252 int sched_verbose ATTRIBUTE_UNUSED,
35253 int max_ready ATTRIBUTE_UNUSED)
35254 {
35255 last_scheduled_insn = NULL;
35256 load_store_pendulum = 0;
35257 divide_cnt = 0;
35258 vec_pairing = 0;
35259 }
35260
35261 /* The following function is called at the end of scheduling BB.
35262 After reload, it inserts nops at insn group bundling. */
35263
35264 static void
35265 rs6000_sched_finish (FILE *dump, int sched_verbose)
35266 {
35267 int n_groups;
35268
35269 if (sched_verbose)
35270 fprintf (dump, "=== Finishing schedule.\n");
35271
35272 if (reload_completed && rs6000_sched_groups)
35273 {
35274 /* Do not run sched_finish hook when selective scheduling enabled. */
35275 if (sel_sched_p ())
35276 return;
35277
35278 if (rs6000_sched_insert_nops == sched_finish_none)
35279 return;
35280
35281 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
35282 n_groups = pad_groups (dump, sched_verbose,
35283 current_sched_info->prev_head,
35284 current_sched_info->next_tail);
35285 else
35286 n_groups = redefine_groups (dump, sched_verbose,
35287 current_sched_info->prev_head,
35288 current_sched_info->next_tail);
35289
35290 if (sched_verbose >= 6)
35291 {
35292 fprintf (dump, "ngroups = %d\n", n_groups);
35293 print_rtl (dump, current_sched_info->prev_head);
35294 fprintf (dump, "Done finish_sched\n");
35295 }
35296 }
35297 }
35298
35299 struct rs6000_sched_context
35300 {
35301 short cached_can_issue_more;
35302 rtx_insn *last_scheduled_insn;
35303 int load_store_pendulum;
35304 int divide_cnt;
35305 int vec_pairing;
35306 };
35307
35308 typedef struct rs6000_sched_context rs6000_sched_context_def;
35309 typedef rs6000_sched_context_def *rs6000_sched_context_t;
35310
35311 /* Allocate store for new scheduling context. */
35312 static void *
35313 rs6000_alloc_sched_context (void)
35314 {
35315 return xmalloc (sizeof (rs6000_sched_context_def));
35316 }
35317
35318 /* If CLEAN_P is true then initializes _SC with clean data,
35319 and from the global context otherwise. */
35320 static void
35321 rs6000_init_sched_context (void *_sc, bool clean_p)
35322 {
35323 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35324
35325 if (clean_p)
35326 {
35327 sc->cached_can_issue_more = 0;
35328 sc->last_scheduled_insn = NULL;
35329 sc->load_store_pendulum = 0;
35330 sc->divide_cnt = 0;
35331 sc->vec_pairing = 0;
35332 }
35333 else
35334 {
35335 sc->cached_can_issue_more = cached_can_issue_more;
35336 sc->last_scheduled_insn = last_scheduled_insn;
35337 sc->load_store_pendulum = load_store_pendulum;
35338 sc->divide_cnt = divide_cnt;
35339 sc->vec_pairing = vec_pairing;
35340 }
35341 }
35342
35343 /* Sets the global scheduling context to the one pointed to by _SC. */
35344 static void
35345 rs6000_set_sched_context (void *_sc)
35346 {
35347 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
35348
35349 gcc_assert (sc != NULL);
35350
35351 cached_can_issue_more = sc->cached_can_issue_more;
35352 last_scheduled_insn = sc->last_scheduled_insn;
35353 load_store_pendulum = sc->load_store_pendulum;
35354 divide_cnt = sc->divide_cnt;
35355 vec_pairing = sc->vec_pairing;
35356 }
35357
35358 /* Free _SC. */
35359 static void
35360 rs6000_free_sched_context (void *_sc)
35361 {
35362 gcc_assert (_sc != NULL);
35363
35364 free (_sc);
35365 }
35366
35367 static bool
35368 rs6000_sched_can_speculate_insn (rtx_insn *insn)
35369 {
35370 switch (get_attr_type (insn))
35371 {
35372 case TYPE_DIV:
35373 case TYPE_SDIV:
35374 case TYPE_DDIV:
35375 case TYPE_VECDIV:
35376 case TYPE_SSQRT:
35377 case TYPE_DSQRT:
35378 return false;
35379
35380 default:
35381 return true;
35382 }
35383 }
35384 \f
35385 /* Length in units of the trampoline for entering a nested function. */
35386
35387 int
35388 rs6000_trampoline_size (void)
35389 {
35390 int ret = 0;
35391
35392 switch (DEFAULT_ABI)
35393 {
35394 default:
35395 gcc_unreachable ();
35396
35397 case ABI_AIX:
35398 ret = (TARGET_32BIT) ? 12 : 24;
35399 break;
35400
35401 case ABI_ELFv2:
35402 gcc_assert (!TARGET_32BIT);
35403 ret = 32;
35404 break;
35405
35406 case ABI_DARWIN:
35407 case ABI_V4:
35408 ret = (TARGET_32BIT) ? 40 : 48;
35409 break;
35410 }
35411
35412 return ret;
35413 }
35414
35415 /* Emit RTL insns to initialize the variable parts of a trampoline.
35416 FNADDR is an RTX for the address of the function's pure code.
35417 CXT is an RTX for the static chain value for the function. */
35418
35419 static void
35420 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
35421 {
35422 int regsize = (TARGET_32BIT) ? 4 : 8;
35423 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
35424 rtx ctx_reg = force_reg (Pmode, cxt);
35425 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
35426
35427 switch (DEFAULT_ABI)
35428 {
35429 default:
35430 gcc_unreachable ();
35431
35432 /* Under AIX, just build the 3 word function descriptor */
35433 case ABI_AIX:
35434 {
35435 rtx fnmem, fn_reg, toc_reg;
35436
35437 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
35438 error ("You cannot take the address of a nested function if you use "
35439 "the -mno-pointers-to-nested-functions option.");
35440
35441 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
35442 fn_reg = gen_reg_rtx (Pmode);
35443 toc_reg = gen_reg_rtx (Pmode);
35444
35445 /* Macro to shorten the code expansions below. */
35446 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
35447
35448 m_tramp = replace_equiv_address (m_tramp, addr);
35449
35450 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
35451 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
35452 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
35453 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
35454 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
35455
35456 # undef MEM_PLUS
35457 }
35458 break;
35459
35460 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
35461 case ABI_ELFv2:
35462 case ABI_DARWIN:
35463 case ABI_V4:
35464 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
35465 LCT_NORMAL, VOIDmode,
35466 addr, Pmode,
35467 GEN_INT (rs6000_trampoline_size ()), SImode,
35468 fnaddr, Pmode,
35469 ctx_reg, Pmode);
35470 break;
35471 }
35472 }
35473
35474 \f
35475 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
35476 identifier as an argument, so the front end shouldn't look it up. */
35477
35478 static bool
35479 rs6000_attribute_takes_identifier_p (const_tree attr_id)
35480 {
35481 return is_attribute_p ("altivec", attr_id);
35482 }
35483
35484 /* Handle the "altivec" attribute. The attribute may have
35485 arguments as follows:
35486
35487 __attribute__((altivec(vector__)))
35488 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
35489 __attribute__((altivec(bool__))) (always followed by 'unsigned')
35490
35491 and may appear more than once (e.g., 'vector bool char') in a
35492 given declaration. */
35493
35494 static tree
35495 rs6000_handle_altivec_attribute (tree *node,
35496 tree name ATTRIBUTE_UNUSED,
35497 tree args,
35498 int flags ATTRIBUTE_UNUSED,
35499 bool *no_add_attrs)
35500 {
35501 tree type = *node, result = NULL_TREE;
35502 machine_mode mode;
35503 int unsigned_p;
35504 char altivec_type
35505 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
35506 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
35507 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
35508 : '?');
35509
35510 while (POINTER_TYPE_P (type)
35511 || TREE_CODE (type) == FUNCTION_TYPE
35512 || TREE_CODE (type) == METHOD_TYPE
35513 || TREE_CODE (type) == ARRAY_TYPE)
35514 type = TREE_TYPE (type);
35515
35516 mode = TYPE_MODE (type);
35517
35518 /* Check for invalid AltiVec type qualifiers. */
35519 if (type == long_double_type_node)
35520 error ("use of %<long double%> in AltiVec types is invalid");
35521 else if (type == boolean_type_node)
35522 error ("use of boolean types in AltiVec types is invalid");
35523 else if (TREE_CODE (type) == COMPLEX_TYPE)
35524 error ("use of %<complex%> in AltiVec types is invalid");
35525 else if (DECIMAL_FLOAT_MODE_P (mode))
35526 error ("use of decimal floating point types in AltiVec types is invalid");
35527 else if (!TARGET_VSX)
35528 {
35529 if (type == long_unsigned_type_node || type == long_integer_type_node)
35530 {
35531 if (TARGET_64BIT)
35532 error ("use of %<long%> in AltiVec types is invalid for "
35533 "64-bit code without -mvsx");
35534 else if (rs6000_warn_altivec_long)
35535 warning (0, "use of %<long%> in AltiVec types is deprecated; "
35536 "use %<int%>");
35537 }
35538 else if (type == long_long_unsigned_type_node
35539 || type == long_long_integer_type_node)
35540 error ("use of %<long long%> in AltiVec types is invalid without "
35541 "-mvsx");
35542 else if (type == double_type_node)
35543 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
35544 }
35545
35546 switch (altivec_type)
35547 {
35548 case 'v':
35549 unsigned_p = TYPE_UNSIGNED (type);
35550 switch (mode)
35551 {
35552 case E_TImode:
35553 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
35554 break;
35555 case E_DImode:
35556 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
35557 break;
35558 case E_SImode:
35559 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
35560 break;
35561 case E_HImode:
35562 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
35563 break;
35564 case E_QImode:
35565 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
35566 break;
35567 case E_SFmode: result = V4SF_type_node; break;
35568 case E_DFmode: result = V2DF_type_node; break;
35569 /* If the user says 'vector int bool', we may be handed the 'bool'
35570 attribute _before_ the 'vector' attribute, and so select the
35571 proper type in the 'b' case below. */
35572 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
35573 case E_V2DImode: case E_V2DFmode:
35574 result = type;
35575 default: break;
35576 }
35577 break;
35578 case 'b':
35579 switch (mode)
35580 {
35581 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
35582 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
35583 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
35584 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
35585 default: break;
35586 }
35587 break;
35588 case 'p':
35589 switch (mode)
35590 {
35591 case E_V8HImode: result = pixel_V8HI_type_node;
35592 default: break;
35593 }
35594 default: break;
35595 }
35596
35597 /* Propagate qualifiers attached to the element type
35598 onto the vector type. */
35599 if (result && result != type && TYPE_QUALS (type))
35600 result = build_qualified_type (result, TYPE_QUALS (type));
35601
35602 *no_add_attrs = true; /* No need to hang on to the attribute. */
35603
35604 if (result)
35605 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
35606
35607 return NULL_TREE;
35608 }
35609
35610 /* AltiVec defines four built-in scalar types that serve as vector
35611 elements; we must teach the compiler how to mangle them. */
35612
35613 static const char *
35614 rs6000_mangle_type (const_tree type)
35615 {
35616 type = TYPE_MAIN_VARIANT (type);
35617
35618 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35619 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35620 return NULL;
35621
35622 if (type == bool_char_type_node) return "U6__boolc";
35623 if (type == bool_short_type_node) return "U6__bools";
35624 if (type == pixel_type_node) return "u7__pixel";
35625 if (type == bool_int_type_node) return "U6__booli";
35626 if (type == bool_long_type_node) return "U6__booll";
35627
35628 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
35629 "g" for IBM extended double, no matter whether it is long double (using
35630 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
35631 if (TARGET_FLOAT128_TYPE)
35632 {
35633 if (type == ieee128_float_type_node)
35634 return "U10__float128";
35635
35636 if (type == ibm128_float_type_node)
35637 return "g";
35638
35639 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
35640 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
35641 }
35642
35643 /* Mangle IBM extended float long double as `g' (__float128) on
35644 powerpc*-linux where long-double-64 previously was the default. */
35645 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
35646 && TARGET_ELF
35647 && TARGET_LONG_DOUBLE_128
35648 && !TARGET_IEEEQUAD)
35649 return "g";
35650
35651 /* For all other types, use normal C++ mangling. */
35652 return NULL;
35653 }
35654
35655 /* Handle a "longcall" or "shortcall" attribute; arguments as in
35656 struct attribute_spec.handler. */
35657
35658 static tree
35659 rs6000_handle_longcall_attribute (tree *node, tree name,
35660 tree args ATTRIBUTE_UNUSED,
35661 int flags ATTRIBUTE_UNUSED,
35662 bool *no_add_attrs)
35663 {
35664 if (TREE_CODE (*node) != FUNCTION_TYPE
35665 && TREE_CODE (*node) != FIELD_DECL
35666 && TREE_CODE (*node) != TYPE_DECL)
35667 {
35668 warning (OPT_Wattributes, "%qE attribute only applies to functions",
35669 name);
35670 *no_add_attrs = true;
35671 }
35672
35673 return NULL_TREE;
35674 }
35675
35676 /* Set longcall attributes on all functions declared when
35677 rs6000_default_long_calls is true. */
35678 static void
35679 rs6000_set_default_type_attributes (tree type)
35680 {
35681 if (rs6000_default_long_calls
35682 && (TREE_CODE (type) == FUNCTION_TYPE
35683 || TREE_CODE (type) == METHOD_TYPE))
35684 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
35685 NULL_TREE,
35686 TYPE_ATTRIBUTES (type));
35687
35688 #if TARGET_MACHO
35689 darwin_set_default_type_attributes (type);
35690 #endif
35691 }
35692
35693 /* Return a reference suitable for calling a function with the
35694 longcall attribute. */
35695
35696 rtx
35697 rs6000_longcall_ref (rtx call_ref)
35698 {
35699 const char *call_name;
35700 tree node;
35701
35702 if (GET_CODE (call_ref) != SYMBOL_REF)
35703 return call_ref;
35704
35705 /* System V adds '.' to the internal name, so skip them. */
35706 call_name = XSTR (call_ref, 0);
35707 if (*call_name == '.')
35708 {
35709 while (*call_name == '.')
35710 call_name++;
35711
35712 node = get_identifier (call_name);
35713 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
35714 }
35715
35716 return force_reg (Pmode, call_ref);
35717 }
35718 \f
35719 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
35720 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
35721 #endif
35722
35723 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35724 struct attribute_spec.handler. */
35725 static tree
35726 rs6000_handle_struct_attribute (tree *node, tree name,
35727 tree args ATTRIBUTE_UNUSED,
35728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
35729 {
35730 tree *type = NULL;
35731 if (DECL_P (*node))
35732 {
35733 if (TREE_CODE (*node) == TYPE_DECL)
35734 type = &TREE_TYPE (*node);
35735 }
35736 else
35737 type = node;
35738
35739 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
35740 || TREE_CODE (*type) == UNION_TYPE)))
35741 {
35742 warning (OPT_Wattributes, "%qE attribute ignored", name);
35743 *no_add_attrs = true;
35744 }
35745
35746 else if ((is_attribute_p ("ms_struct", name)
35747 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
35748 || ((is_attribute_p ("gcc_struct", name)
35749 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
35750 {
35751 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
35752 name);
35753 *no_add_attrs = true;
35754 }
35755
35756 return NULL_TREE;
35757 }
35758
35759 static bool
35760 rs6000_ms_bitfield_layout_p (const_tree record_type)
35761 {
35762 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
35763 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35764 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
35765 }
35766 \f
35767 #ifdef USING_ELFOS_H
35768
35769 /* A get_unnamed_section callback, used for switching to toc_section. */
35770
35771 static void
35772 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
35773 {
35774 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35775 && TARGET_MINIMAL_TOC)
35776 {
35777 if (!toc_initialized)
35778 {
35779 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35780 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35781 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
35782 fprintf (asm_out_file, "\t.tc ");
35783 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
35784 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35785 fprintf (asm_out_file, "\n");
35786
35787 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35788 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35789 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35790 fprintf (asm_out_file, " = .+32768\n");
35791 toc_initialized = 1;
35792 }
35793 else
35794 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35795 }
35796 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
35797 {
35798 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
35799 if (!toc_initialized)
35800 {
35801 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35802 toc_initialized = 1;
35803 }
35804 }
35805 else
35806 {
35807 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
35808 if (!toc_initialized)
35809 {
35810 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
35811 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
35812 fprintf (asm_out_file, " = .+32768\n");
35813 toc_initialized = 1;
35814 }
35815 }
35816 }
35817
35818 /* Implement TARGET_ASM_INIT_SECTIONS. */
35819
35820 static void
35821 rs6000_elf_asm_init_sections (void)
35822 {
35823 toc_section
35824 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
35825
35826 sdata2_section
35827 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
35828 SDATA2_SECTION_ASM_OP);
35829 }
35830
35831 /* Implement TARGET_SELECT_RTX_SECTION. */
35832
35833 static section *
35834 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
35835 unsigned HOST_WIDE_INT align)
35836 {
35837 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
35838 return toc_section;
35839 else
35840 return default_elf_select_rtx_section (mode, x, align);
35841 }
35842 \f
35843 /* For a SYMBOL_REF, set generic flags and then perform some
35844 target-specific processing.
35845
35846 When the AIX ABI is requested on a non-AIX system, replace the
35847 function name with the real name (with a leading .) rather than the
35848 function descriptor name. This saves a lot of overriding code to
35849 read the prefixes. */
35850
35851 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
35852 static void
35853 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
35854 {
35855 default_encode_section_info (decl, rtl, first);
35856
35857 if (first
35858 && TREE_CODE (decl) == FUNCTION_DECL
35859 && !TARGET_AIX
35860 && DEFAULT_ABI == ABI_AIX)
35861 {
35862 rtx sym_ref = XEXP (rtl, 0);
35863 size_t len = strlen (XSTR (sym_ref, 0));
35864 char *str = XALLOCAVEC (char, len + 2);
35865 str[0] = '.';
35866 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
35867 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
35868 }
35869 }
35870
35871 static inline bool
35872 compare_section_name (const char *section, const char *templ)
35873 {
35874 int len;
35875
35876 len = strlen (templ);
35877 return (strncmp (section, templ, len) == 0
35878 && (section[len] == 0 || section[len] == '.'));
35879 }
35880
35881 bool
35882 rs6000_elf_in_small_data_p (const_tree decl)
35883 {
35884 if (rs6000_sdata == SDATA_NONE)
35885 return false;
35886
35887 /* We want to merge strings, so we never consider them small data. */
35888 if (TREE_CODE (decl) == STRING_CST)
35889 return false;
35890
35891 /* Functions are never in the small data area. */
35892 if (TREE_CODE (decl) == FUNCTION_DECL)
35893 return false;
35894
35895 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
35896 {
35897 const char *section = DECL_SECTION_NAME (decl);
35898 if (compare_section_name (section, ".sdata")
35899 || compare_section_name (section, ".sdata2")
35900 || compare_section_name (section, ".gnu.linkonce.s")
35901 || compare_section_name (section, ".sbss")
35902 || compare_section_name (section, ".sbss2")
35903 || compare_section_name (section, ".gnu.linkonce.sb")
35904 || strcmp (section, ".PPC.EMB.sdata0") == 0
35905 || strcmp (section, ".PPC.EMB.sbss0") == 0)
35906 return true;
35907 }
35908 else
35909 {
35910 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
35911
35912 if (size > 0
35913 && size <= g_switch_value
35914 /* If it's not public, and we're not going to reference it there,
35915 there's no need to put it in the small data section. */
35916 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
35917 return true;
35918 }
35919
35920 return false;
35921 }
35922
35923 #endif /* USING_ELFOS_H */
35924 \f
35925 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
35926
35927 static bool
35928 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
35929 {
35930 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
35931 }
35932
35933 /* Do not place thread-local symbols refs in the object blocks. */
35934
35935 static bool
35936 rs6000_use_blocks_for_decl_p (const_tree decl)
35937 {
35938 return !DECL_THREAD_LOCAL_P (decl);
35939 }
35940 \f
35941 /* Return a REG that occurs in ADDR with coefficient 1.
35942 ADDR can be effectively incremented by incrementing REG.
35943
35944 r0 is special and we must not select it as an address
35945 register by this routine since our caller will try to
35946 increment the returned register via an "la" instruction. */
35947
35948 rtx
35949 find_addr_reg (rtx addr)
35950 {
35951 while (GET_CODE (addr) == PLUS)
35952 {
35953 if (GET_CODE (XEXP (addr, 0)) == REG
35954 && REGNO (XEXP (addr, 0)) != 0)
35955 addr = XEXP (addr, 0);
35956 else if (GET_CODE (XEXP (addr, 1)) == REG
35957 && REGNO (XEXP (addr, 1)) != 0)
35958 addr = XEXP (addr, 1);
35959 else if (CONSTANT_P (XEXP (addr, 0)))
35960 addr = XEXP (addr, 1);
35961 else if (CONSTANT_P (XEXP (addr, 1)))
35962 addr = XEXP (addr, 0);
35963 else
35964 gcc_unreachable ();
35965 }
35966 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
35967 return addr;
35968 }
35969
35970 void
35971 rs6000_fatal_bad_address (rtx op)
35972 {
35973 fatal_insn ("bad address", op);
35974 }
35975
35976 #if TARGET_MACHO
35977
35978 typedef struct branch_island_d {
35979 tree function_name;
35980 tree label_name;
35981 int line_number;
35982 } branch_island;
35983
35984
35985 static vec<branch_island, va_gc> *branch_islands;
35986
35987 /* Remember to generate a branch island for far calls to the given
35988 function. */
35989
35990 static void
35991 add_compiler_branch_island (tree label_name, tree function_name,
35992 int line_number)
35993 {
35994 branch_island bi = {function_name, label_name, line_number};
35995 vec_safe_push (branch_islands, bi);
35996 }
35997
35998 /* Generate far-jump branch islands for everything recorded in
35999 branch_islands. Invoked immediately after the last instruction of
36000 the epilogue has been emitted; the branch islands must be appended
36001 to, and contiguous with, the function body. Mach-O stubs are
36002 generated in machopic_output_stub(). */
36003
36004 static void
36005 macho_branch_islands (void)
36006 {
36007 char tmp_buf[512];
36008
36009 while (!vec_safe_is_empty (branch_islands))
36010 {
36011 branch_island *bi = &branch_islands->last ();
36012 const char *label = IDENTIFIER_POINTER (bi->label_name);
36013 const char *name = IDENTIFIER_POINTER (bi->function_name);
36014 char name_buf[512];
36015 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
36016 if (name[0] == '*' || name[0] == '&')
36017 strcpy (name_buf, name+1);
36018 else
36019 {
36020 name_buf[0] = '_';
36021 strcpy (name_buf+1, name);
36022 }
36023 strcpy (tmp_buf, "\n");
36024 strcat (tmp_buf, label);
36025 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36026 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36027 dbxout_stabd (N_SLINE, bi->line_number);
36028 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36029 if (flag_pic)
36030 {
36031 if (TARGET_LINK_STACK)
36032 {
36033 char name[32];
36034 get_ppc476_thunk_name (name);
36035 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
36036 strcat (tmp_buf, name);
36037 strcat (tmp_buf, "\n");
36038 strcat (tmp_buf, label);
36039 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36040 }
36041 else
36042 {
36043 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
36044 strcat (tmp_buf, label);
36045 strcat (tmp_buf, "_pic\n");
36046 strcat (tmp_buf, label);
36047 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
36048 }
36049
36050 strcat (tmp_buf, "\taddis r11,r11,ha16(");
36051 strcat (tmp_buf, name_buf);
36052 strcat (tmp_buf, " - ");
36053 strcat (tmp_buf, label);
36054 strcat (tmp_buf, "_pic)\n");
36055
36056 strcat (tmp_buf, "\tmtlr r0\n");
36057
36058 strcat (tmp_buf, "\taddi r12,r11,lo16(");
36059 strcat (tmp_buf, name_buf);
36060 strcat (tmp_buf, " - ");
36061 strcat (tmp_buf, label);
36062 strcat (tmp_buf, "_pic)\n");
36063
36064 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
36065 }
36066 else
36067 {
36068 strcat (tmp_buf, ":\nlis r12,hi16(");
36069 strcat (tmp_buf, name_buf);
36070 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
36071 strcat (tmp_buf, name_buf);
36072 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
36073 }
36074 output_asm_insn (tmp_buf, 0);
36075 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
36076 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
36077 dbxout_stabd (N_SLINE, bi->line_number);
36078 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
36079 branch_islands->pop ();
36080 }
36081 }
36082
36083 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
36084 already there or not. */
36085
36086 static int
36087 no_previous_def (tree function_name)
36088 {
36089 branch_island *bi;
36090 unsigned ix;
36091
36092 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36093 if (function_name == bi->function_name)
36094 return 0;
36095 return 1;
36096 }
36097
36098 /* GET_PREV_LABEL gets the label name from the previous definition of
36099 the function. */
36100
36101 static tree
36102 get_prev_label (tree function_name)
36103 {
36104 branch_island *bi;
36105 unsigned ix;
36106
36107 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
36108 if (function_name == bi->function_name)
36109 return bi->label_name;
36110 return NULL_TREE;
36111 }
36112
36113 /* INSN is either a function call or a millicode call. It may have an
36114 unconditional jump in its delay slot.
36115
36116 CALL_DEST is the routine we are calling. */
36117
36118 char *
36119 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
36120 int cookie_operand_number)
36121 {
36122 static char buf[256];
36123 if (darwin_emit_branch_islands
36124 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
36125 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
36126 {
36127 tree labelname;
36128 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
36129
36130 if (no_previous_def (funname))
36131 {
36132 rtx label_rtx = gen_label_rtx ();
36133 char *label_buf, temp_buf[256];
36134 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
36135 CODE_LABEL_NUMBER (label_rtx));
36136 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
36137 labelname = get_identifier (label_buf);
36138 add_compiler_branch_island (labelname, funname, insn_line (insn));
36139 }
36140 else
36141 labelname = get_prev_label (funname);
36142
36143 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
36144 instruction will reach 'foo', otherwise link as 'bl L42'".
36145 "L42" should be a 'branch island', that will do a far jump to
36146 'foo'. Branch islands are generated in
36147 macho_branch_islands(). */
36148 sprintf (buf, "jbsr %%z%d,%.246s",
36149 dest_operand_number, IDENTIFIER_POINTER (labelname));
36150 }
36151 else
36152 sprintf (buf, "bl %%z%d", dest_operand_number);
36153 return buf;
36154 }
36155
36156 /* Generate PIC and indirect symbol stubs. */
36157
36158 void
36159 machopic_output_stub (FILE *file, const char *symb, const char *stub)
36160 {
36161 unsigned int length;
36162 char *symbol_name, *lazy_ptr_name;
36163 char *local_label_0;
36164 static int label = 0;
36165
36166 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
36167 symb = (*targetm.strip_name_encoding) (symb);
36168
36169
36170 length = strlen (symb);
36171 symbol_name = XALLOCAVEC (char, length + 32);
36172 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
36173
36174 lazy_ptr_name = XALLOCAVEC (char, length + 32);
36175 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
36176
36177 if (flag_pic == 2)
36178 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
36179 else
36180 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
36181
36182 if (flag_pic == 2)
36183 {
36184 fprintf (file, "\t.align 5\n");
36185
36186 fprintf (file, "%s:\n", stub);
36187 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36188
36189 label++;
36190 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
36191 sprintf (local_label_0, "\"L%011d$spb\"", label);
36192
36193 fprintf (file, "\tmflr r0\n");
36194 if (TARGET_LINK_STACK)
36195 {
36196 char name[32];
36197 get_ppc476_thunk_name (name);
36198 fprintf (file, "\tbl %s\n", name);
36199 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36200 }
36201 else
36202 {
36203 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
36204 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
36205 }
36206 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
36207 lazy_ptr_name, local_label_0);
36208 fprintf (file, "\tmtlr r0\n");
36209 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
36210 (TARGET_64BIT ? "ldu" : "lwzu"),
36211 lazy_ptr_name, local_label_0);
36212 fprintf (file, "\tmtctr r12\n");
36213 fprintf (file, "\tbctr\n");
36214 }
36215 else
36216 {
36217 fprintf (file, "\t.align 4\n");
36218
36219 fprintf (file, "%s:\n", stub);
36220 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36221
36222 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
36223 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
36224 (TARGET_64BIT ? "ldu" : "lwzu"),
36225 lazy_ptr_name);
36226 fprintf (file, "\tmtctr r12\n");
36227 fprintf (file, "\tbctr\n");
36228 }
36229
36230 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
36231 fprintf (file, "%s:\n", lazy_ptr_name);
36232 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
36233 fprintf (file, "%sdyld_stub_binding_helper\n",
36234 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
36235 }
36236
36237 /* Legitimize PIC addresses. If the address is already
36238 position-independent, we return ORIG. Newly generated
36239 position-independent addresses go into a reg. This is REG if non
36240 zero, otherwise we allocate register(s) as necessary. */
36241
36242 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
36243
36244 rtx
36245 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
36246 rtx reg)
36247 {
36248 rtx base, offset;
36249
36250 if (reg == NULL && ! reload_in_progress && ! reload_completed)
36251 reg = gen_reg_rtx (Pmode);
36252
36253 if (GET_CODE (orig) == CONST)
36254 {
36255 rtx reg_temp;
36256
36257 if (GET_CODE (XEXP (orig, 0)) == PLUS
36258 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
36259 return orig;
36260
36261 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
36262
36263 /* Use a different reg for the intermediate value, as
36264 it will be marked UNCHANGING. */
36265 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
36266 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
36267 Pmode, reg_temp);
36268 offset =
36269 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
36270 Pmode, reg);
36271
36272 if (GET_CODE (offset) == CONST_INT)
36273 {
36274 if (SMALL_INT (offset))
36275 return plus_constant (Pmode, base, INTVAL (offset));
36276 else if (! reload_in_progress && ! reload_completed)
36277 offset = force_reg (Pmode, offset);
36278 else
36279 {
36280 rtx mem = force_const_mem (Pmode, orig);
36281 return machopic_legitimize_pic_address (mem, Pmode, reg);
36282 }
36283 }
36284 return gen_rtx_PLUS (Pmode, base, offset);
36285 }
36286
36287 /* Fall back on generic machopic code. */
36288 return machopic_legitimize_pic_address (orig, mode, reg);
36289 }
36290
36291 /* Output a .machine directive for the Darwin assembler, and call
36292 the generic start_file routine. */
36293
36294 static void
36295 rs6000_darwin_file_start (void)
36296 {
36297 static const struct
36298 {
36299 const char *arg;
36300 const char *name;
36301 HOST_WIDE_INT if_set;
36302 } mapping[] = {
36303 { "ppc64", "ppc64", MASK_64BIT },
36304 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
36305 { "power4", "ppc970", 0 },
36306 { "G5", "ppc970", 0 },
36307 { "7450", "ppc7450", 0 },
36308 { "7400", "ppc7400", MASK_ALTIVEC },
36309 { "G4", "ppc7400", 0 },
36310 { "750", "ppc750", 0 },
36311 { "740", "ppc750", 0 },
36312 { "G3", "ppc750", 0 },
36313 { "604e", "ppc604e", 0 },
36314 { "604", "ppc604", 0 },
36315 { "603e", "ppc603", 0 },
36316 { "603", "ppc603", 0 },
36317 { "601", "ppc601", 0 },
36318 { NULL, "ppc", 0 } };
36319 const char *cpu_id = "";
36320 size_t i;
36321
36322 rs6000_file_start ();
36323 darwin_file_start ();
36324
36325 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
36326
36327 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
36328 cpu_id = rs6000_default_cpu;
36329
36330 if (global_options_set.x_rs6000_cpu_index)
36331 cpu_id = processor_target_table[rs6000_cpu_index].name;
36332
36333 /* Look through the mapping array. Pick the first name that either
36334 matches the argument, has a bit set in IF_SET that is also set
36335 in the target flags, or has a NULL name. */
36336
36337 i = 0;
36338 while (mapping[i].arg != NULL
36339 && strcmp (mapping[i].arg, cpu_id) != 0
36340 && (mapping[i].if_set & rs6000_isa_flags) == 0)
36341 i++;
36342
36343 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
36344 }
36345
36346 #endif /* TARGET_MACHO */
36347
36348 #if TARGET_ELF
36349 static int
36350 rs6000_elf_reloc_rw_mask (void)
36351 {
36352 if (flag_pic)
36353 return 3;
36354 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
36355 return 2;
36356 else
36357 return 0;
36358 }
36359
36360 /* Record an element in the table of global constructors. SYMBOL is
36361 a SYMBOL_REF of the function to be called; PRIORITY is a number
36362 between 0 and MAX_INIT_PRIORITY.
36363
36364 This differs from default_named_section_asm_out_constructor in
36365 that we have special handling for -mrelocatable. */
36366
36367 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
36368 static void
36369 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
36370 {
36371 const char *section = ".ctors";
36372 char buf[18];
36373
36374 if (priority != DEFAULT_INIT_PRIORITY)
36375 {
36376 sprintf (buf, ".ctors.%.5u",
36377 /* Invert the numbering so the linker puts us in the proper
36378 order; constructors are run from right to left, and the
36379 linker sorts in increasing order. */
36380 MAX_INIT_PRIORITY - priority);
36381 section = buf;
36382 }
36383
36384 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36385 assemble_align (POINTER_SIZE);
36386
36387 if (DEFAULT_ABI == ABI_V4
36388 && (TARGET_RELOCATABLE || flag_pic > 1))
36389 {
36390 fputs ("\t.long (", asm_out_file);
36391 output_addr_const (asm_out_file, symbol);
36392 fputs (")@fixup\n", asm_out_file);
36393 }
36394 else
36395 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36396 }
36397
36398 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
36399 static void
36400 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
36401 {
36402 const char *section = ".dtors";
36403 char buf[18];
36404
36405 if (priority != DEFAULT_INIT_PRIORITY)
36406 {
36407 sprintf (buf, ".dtors.%.5u",
36408 /* Invert the numbering so the linker puts us in the proper
36409 order; constructors are run from right to left, and the
36410 linker sorts in increasing order. */
36411 MAX_INIT_PRIORITY - priority);
36412 section = buf;
36413 }
36414
36415 switch_to_section (get_section (section, SECTION_WRITE, NULL));
36416 assemble_align (POINTER_SIZE);
36417
36418 if (DEFAULT_ABI == ABI_V4
36419 && (TARGET_RELOCATABLE || flag_pic > 1))
36420 {
36421 fputs ("\t.long (", asm_out_file);
36422 output_addr_const (asm_out_file, symbol);
36423 fputs (")@fixup\n", asm_out_file);
36424 }
36425 else
36426 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
36427 }
36428
36429 void
36430 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
36431 {
36432 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
36433 {
36434 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
36435 ASM_OUTPUT_LABEL (file, name);
36436 fputs (DOUBLE_INT_ASM_OP, file);
36437 rs6000_output_function_entry (file, name);
36438 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
36439 if (DOT_SYMBOLS)
36440 {
36441 fputs ("\t.size\t", file);
36442 assemble_name (file, name);
36443 fputs (",24\n\t.type\t.", file);
36444 assemble_name (file, name);
36445 fputs (",@function\n", file);
36446 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
36447 {
36448 fputs ("\t.globl\t.", file);
36449 assemble_name (file, name);
36450 putc ('\n', file);
36451 }
36452 }
36453 else
36454 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36455 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36456 rs6000_output_function_entry (file, name);
36457 fputs (":\n", file);
36458 return;
36459 }
36460
36461 if (DEFAULT_ABI == ABI_V4
36462 && (TARGET_RELOCATABLE || flag_pic > 1)
36463 && !TARGET_SECURE_PLT
36464 && (!constant_pool_empty_p () || crtl->profile)
36465 && uses_TOC ())
36466 {
36467 char buf[256];
36468
36469 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36470
36471 fprintf (file, "\t.long ");
36472 assemble_name (file, toc_label_name);
36473 need_toc_init = 1;
36474 putc ('-', file);
36475 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36476 assemble_name (file, buf);
36477 putc ('\n', file);
36478 }
36479
36480 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
36481 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
36482
36483 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
36484 {
36485 char buf[256];
36486
36487 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
36488
36489 fprintf (file, "\t.quad .TOC.-");
36490 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
36491 assemble_name (file, buf);
36492 putc ('\n', file);
36493 }
36494
36495 if (DEFAULT_ABI == ABI_AIX)
36496 {
36497 const char *desc_name, *orig_name;
36498
36499 orig_name = (*targetm.strip_name_encoding) (name);
36500 desc_name = orig_name;
36501 while (*desc_name == '.')
36502 desc_name++;
36503
36504 if (TREE_PUBLIC (decl))
36505 fprintf (file, "\t.globl %s\n", desc_name);
36506
36507 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
36508 fprintf (file, "%s:\n", desc_name);
36509 fprintf (file, "\t.long %s\n", orig_name);
36510 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
36511 fputs ("\t.long 0\n", file);
36512 fprintf (file, "\t.previous\n");
36513 }
36514 ASM_OUTPUT_LABEL (file, name);
36515 }
36516
36517 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
36518 static void
36519 rs6000_elf_file_end (void)
36520 {
36521 #ifdef HAVE_AS_GNU_ATTRIBUTE
36522 /* ??? The value emitted depends on options active at file end.
36523 Assume anyone using #pragma or attributes that might change
36524 options knows what they are doing. */
36525 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
36526 && rs6000_passes_float)
36527 {
36528 int fp;
36529
36530 if (TARGET_DF_FPR | TARGET_DF_SPE)
36531 fp = 1;
36532 else if (TARGET_SF_FPR | TARGET_SF_SPE)
36533 fp = 3;
36534 else
36535 fp = 2;
36536 if (rs6000_passes_long_double)
36537 {
36538 if (!TARGET_LONG_DOUBLE_128)
36539 fp |= 2 * 4;
36540 else if (TARGET_IEEEQUAD)
36541 fp |= 3 * 4;
36542 else
36543 fp |= 1 * 4;
36544 }
36545 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
36546 }
36547 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
36548 {
36549 if (rs6000_passes_vector)
36550 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
36551 (TARGET_ALTIVEC_ABI ? 2
36552 : TARGET_SPE_ABI ? 3
36553 : 1));
36554 if (rs6000_returns_struct)
36555 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
36556 aix_struct_return ? 2 : 1);
36557 }
36558 #endif
36559 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
36560 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
36561 file_end_indicate_exec_stack ();
36562 #endif
36563
36564 if (flag_split_stack)
36565 file_end_indicate_split_stack ();
36566
36567 if (cpu_builtin_p)
36568 {
36569 /* We have expanded a CPU builtin, so we need to emit a reference to
36570 the special symbol that LIBC uses to declare it supports the
36571 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
36572 switch_to_section (data_section);
36573 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
36574 fprintf (asm_out_file, "\t%s %s\n",
36575 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
36576 }
36577 }
36578 #endif
36579
36580 #if TARGET_XCOFF
36581
36582 #ifndef HAVE_XCOFF_DWARF_EXTRAS
36583 #define HAVE_XCOFF_DWARF_EXTRAS 0
36584 #endif
36585
36586 static enum unwind_info_type
36587 rs6000_xcoff_debug_unwind_info (void)
36588 {
36589 return UI_NONE;
36590 }
36591
36592 static void
36593 rs6000_xcoff_asm_output_anchor (rtx symbol)
36594 {
36595 char buffer[100];
36596
36597 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
36598 SYMBOL_REF_BLOCK_OFFSET (symbol));
36599 fprintf (asm_out_file, "%s", SET_ASM_OP);
36600 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
36601 fprintf (asm_out_file, ",");
36602 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
36603 fprintf (asm_out_file, "\n");
36604 }
36605
36606 static void
36607 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
36608 {
36609 fputs (GLOBAL_ASM_OP, stream);
36610 RS6000_OUTPUT_BASENAME (stream, name);
36611 putc ('\n', stream);
36612 }
36613
36614 /* A get_unnamed_decl callback, used for read-only sections. PTR
36615 points to the section string variable. */
36616
36617 static void
36618 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
36619 {
36620 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
36621 *(const char *const *) directive,
36622 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36623 }
36624
36625 /* Likewise for read-write sections. */
36626
36627 static void
36628 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
36629 {
36630 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
36631 *(const char *const *) directive,
36632 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36633 }
36634
36635 static void
36636 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
36637 {
36638 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
36639 *(const char *const *) directive,
36640 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
36641 }
36642
36643 /* A get_unnamed_section callback, used for switching to toc_section. */
36644
36645 static void
36646 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
36647 {
36648 if (TARGET_MINIMAL_TOC)
36649 {
36650 /* toc_section is always selected at least once from
36651 rs6000_xcoff_file_start, so this is guaranteed to
36652 always be defined once and only once in each file. */
36653 if (!toc_initialized)
36654 {
36655 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
36656 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
36657 toc_initialized = 1;
36658 }
36659 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
36660 (TARGET_32BIT ? "" : ",3"));
36661 }
36662 else
36663 fputs ("\t.toc\n", asm_out_file);
36664 }
36665
36666 /* Implement TARGET_ASM_INIT_SECTIONS. */
36667
36668 static void
36669 rs6000_xcoff_asm_init_sections (void)
36670 {
36671 read_only_data_section
36672 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36673 &xcoff_read_only_section_name);
36674
36675 private_data_section
36676 = get_unnamed_section (SECTION_WRITE,
36677 rs6000_xcoff_output_readwrite_section_asm_op,
36678 &xcoff_private_data_section_name);
36679
36680 tls_data_section
36681 = get_unnamed_section (SECTION_TLS,
36682 rs6000_xcoff_output_tls_section_asm_op,
36683 &xcoff_tls_data_section_name);
36684
36685 tls_private_data_section
36686 = get_unnamed_section (SECTION_TLS,
36687 rs6000_xcoff_output_tls_section_asm_op,
36688 &xcoff_private_data_section_name);
36689
36690 read_only_private_data_section
36691 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
36692 &xcoff_private_data_section_name);
36693
36694 toc_section
36695 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
36696
36697 readonly_data_section = read_only_data_section;
36698 }
36699
36700 static int
36701 rs6000_xcoff_reloc_rw_mask (void)
36702 {
36703 return 3;
36704 }
36705
36706 static void
36707 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
36708 tree decl ATTRIBUTE_UNUSED)
36709 {
36710 int smclass;
36711 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
36712
36713 if (flags & SECTION_EXCLUDE)
36714 smclass = 4;
36715 else if (flags & SECTION_DEBUG)
36716 {
36717 fprintf (asm_out_file, "\t.dwsect %s\n", name);
36718 return;
36719 }
36720 else if (flags & SECTION_CODE)
36721 smclass = 0;
36722 else if (flags & SECTION_TLS)
36723 smclass = 3;
36724 else if (flags & SECTION_WRITE)
36725 smclass = 2;
36726 else
36727 smclass = 1;
36728
36729 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
36730 (flags & SECTION_CODE) ? "." : "",
36731 name, suffix[smclass], flags & SECTION_ENTSIZE);
36732 }
36733
36734 #define IN_NAMED_SECTION(DECL) \
36735 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
36736 && DECL_SECTION_NAME (DECL) != NULL)
36737
36738 static section *
36739 rs6000_xcoff_select_section (tree decl, int reloc,
36740 unsigned HOST_WIDE_INT align)
36741 {
36742 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
36743 named section. */
36744 if (align > BIGGEST_ALIGNMENT)
36745 {
36746 resolve_unique_section (decl, reloc, true);
36747 if (IN_NAMED_SECTION (decl))
36748 return get_named_section (decl, NULL, reloc);
36749 }
36750
36751 if (decl_readonly_section (decl, reloc))
36752 {
36753 if (TREE_PUBLIC (decl))
36754 return read_only_data_section;
36755 else
36756 return read_only_private_data_section;
36757 }
36758 else
36759 {
36760 #if HAVE_AS_TLS
36761 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
36762 {
36763 if (TREE_PUBLIC (decl))
36764 return tls_data_section;
36765 else if (bss_initializer_p (decl))
36766 {
36767 /* Convert to COMMON to emit in BSS. */
36768 DECL_COMMON (decl) = 1;
36769 return tls_comm_section;
36770 }
36771 else
36772 return tls_private_data_section;
36773 }
36774 else
36775 #endif
36776 if (TREE_PUBLIC (decl))
36777 return data_section;
36778 else
36779 return private_data_section;
36780 }
36781 }
36782
36783 static void
36784 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
36785 {
36786 const char *name;
36787
36788 /* Use select_section for private data and uninitialized data with
36789 alignment <= BIGGEST_ALIGNMENT. */
36790 if (!TREE_PUBLIC (decl)
36791 || DECL_COMMON (decl)
36792 || (DECL_INITIAL (decl) == NULL_TREE
36793 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
36794 || DECL_INITIAL (decl) == error_mark_node
36795 || (flag_zero_initialized_in_bss
36796 && initializer_zerop (DECL_INITIAL (decl))))
36797 return;
36798
36799 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36800 name = (*targetm.strip_name_encoding) (name);
36801 set_decl_section_name (decl, name);
36802 }
36803
36804 /* Select section for constant in constant pool.
36805
36806 On RS/6000, all constants are in the private read-only data area.
36807 However, if this is being placed in the TOC it must be output as a
36808 toc entry. */
36809
36810 static section *
36811 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
36812 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
36813 {
36814 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
36815 return toc_section;
36816 else
36817 return read_only_private_data_section;
36818 }
36819
36820 /* Remove any trailing [DS] or the like from the symbol name. */
36821
36822 static const char *
36823 rs6000_xcoff_strip_name_encoding (const char *name)
36824 {
36825 size_t len;
36826 if (*name == '*')
36827 name++;
36828 len = strlen (name);
36829 if (name[len - 1] == ']')
36830 return ggc_alloc_string (name, len - 4);
36831 else
36832 return name;
36833 }
36834
36835 /* Section attributes. AIX is always PIC. */
36836
36837 static unsigned int
36838 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
36839 {
36840 unsigned int align;
36841 unsigned int flags = default_section_type_flags (decl, name, reloc);
36842
36843 /* Align to at least UNIT size. */
36844 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
36845 align = MIN_UNITS_PER_WORD;
36846 else
36847 /* Increase alignment of large objects if not already stricter. */
36848 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
36849 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
36850 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
36851
36852 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
36853 }
36854
36855 /* Output at beginning of assembler file.
36856
36857 Initialize the section names for the RS/6000 at this point.
36858
36859 Specify filename, including full path, to assembler.
36860
36861 We want to go into the TOC section so at least one .toc will be emitted.
36862 Also, in order to output proper .bs/.es pairs, we need at least one static
36863 [RW] section emitted.
36864
36865 Finally, declare mcount when profiling to make the assembler happy. */
36866
36867 static void
36868 rs6000_xcoff_file_start (void)
36869 {
36870 rs6000_gen_section_name (&xcoff_bss_section_name,
36871 main_input_filename, ".bss_");
36872 rs6000_gen_section_name (&xcoff_private_data_section_name,
36873 main_input_filename, ".rw_");
36874 rs6000_gen_section_name (&xcoff_read_only_section_name,
36875 main_input_filename, ".ro_");
36876 rs6000_gen_section_name (&xcoff_tls_data_section_name,
36877 main_input_filename, ".tls_");
36878 rs6000_gen_section_name (&xcoff_tbss_section_name,
36879 main_input_filename, ".tbss_[UL]");
36880
36881 fputs ("\t.file\t", asm_out_file);
36882 output_quoted_string (asm_out_file, main_input_filename);
36883 fputc ('\n', asm_out_file);
36884 if (write_symbols != NO_DEBUG)
36885 switch_to_section (private_data_section);
36886 switch_to_section (toc_section);
36887 switch_to_section (text_section);
36888 if (profile_flag)
36889 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
36890 rs6000_file_start ();
36891 }
36892
36893 /* Output at end of assembler file.
36894 On the RS/6000, referencing data should automatically pull in text. */
36895
36896 static void
36897 rs6000_xcoff_file_end (void)
36898 {
36899 switch_to_section (text_section);
36900 fputs ("_section_.text:\n", asm_out_file);
36901 switch_to_section (data_section);
36902 fputs (TARGET_32BIT
36903 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
36904 asm_out_file);
36905 }
36906
36907 struct declare_alias_data
36908 {
36909 FILE *file;
36910 bool function_descriptor;
36911 };
36912
36913 /* Declare alias N. A helper function for for_node_and_aliases. */
36914
36915 static bool
36916 rs6000_declare_alias (struct symtab_node *n, void *d)
36917 {
36918 struct declare_alias_data *data = (struct declare_alias_data *)d;
36919 /* Main symbol is output specially, because varasm machinery does part of
36920 the job for us - we do not need to declare .globl/lglobs and such. */
36921 if (!n->alias || n->weakref)
36922 return false;
36923
36924 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
36925 return false;
36926
36927 /* Prevent assemble_alias from trying to use .set pseudo operation
36928 that does not behave as expected by the middle-end. */
36929 TREE_ASM_WRITTEN (n->decl) = true;
36930
36931 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
36932 char *buffer = (char *) alloca (strlen (name) + 2);
36933 char *p;
36934 int dollar_inside = 0;
36935
36936 strcpy (buffer, name);
36937 p = strchr (buffer, '$');
36938 while (p) {
36939 *p = '_';
36940 dollar_inside++;
36941 p = strchr (p + 1, '$');
36942 }
36943 if (TREE_PUBLIC (n->decl))
36944 {
36945 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
36946 {
36947 if (dollar_inside) {
36948 if (data->function_descriptor)
36949 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36950 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36951 }
36952 if (data->function_descriptor)
36953 {
36954 fputs ("\t.globl .", data->file);
36955 RS6000_OUTPUT_BASENAME (data->file, buffer);
36956 putc ('\n', data->file);
36957 }
36958 fputs ("\t.globl ", data->file);
36959 RS6000_OUTPUT_BASENAME (data->file, buffer);
36960 putc ('\n', data->file);
36961 }
36962 #ifdef ASM_WEAKEN_DECL
36963 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
36964 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
36965 #endif
36966 }
36967 else
36968 {
36969 if (dollar_inside)
36970 {
36971 if (data->function_descriptor)
36972 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
36973 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
36974 }
36975 if (data->function_descriptor)
36976 {
36977 fputs ("\t.lglobl .", data->file);
36978 RS6000_OUTPUT_BASENAME (data->file, buffer);
36979 putc ('\n', data->file);
36980 }
36981 fputs ("\t.lglobl ", data->file);
36982 RS6000_OUTPUT_BASENAME (data->file, buffer);
36983 putc ('\n', data->file);
36984 }
36985 if (data->function_descriptor)
36986 fputs (".", data->file);
36987 RS6000_OUTPUT_BASENAME (data->file, buffer);
36988 fputs (":\n", data->file);
36989 return false;
36990 }
36991
36992
36993 #ifdef HAVE_GAS_HIDDEN
36994 /* Helper function to calculate visibility of a DECL
36995 and return the value as a const string. */
36996
36997 static const char *
36998 rs6000_xcoff_visibility (tree decl)
36999 {
37000 static const char * const visibility_types[] = {
37001 "", ",protected", ",hidden", ",internal"
37002 };
37003
37004 enum symbol_visibility vis = DECL_VISIBILITY (decl);
37005
37006 if (TREE_CODE (decl) == FUNCTION_DECL
37007 && cgraph_node::get (decl)
37008 && cgraph_node::get (decl)->instrumentation_clone
37009 && cgraph_node::get (decl)->instrumented_version)
37010 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
37011
37012 return visibility_types[vis];
37013 }
37014 #endif
37015
37016
37017 /* This macro produces the initial definition of a function name.
37018 On the RS/6000, we need to place an extra '.' in the function name and
37019 output the function descriptor.
37020 Dollar signs are converted to underscores.
37021
37022 The csect for the function will have already been created when
37023 text_section was selected. We do have to go back to that csect, however.
37024
37025 The third and fourth parameters to the .function pseudo-op (16 and 044)
37026 are placeholders which no longer have any use.
37027
37028 Because AIX assembler's .set command has unexpected semantics, we output
37029 all aliases as alternative labels in front of the definition. */
37030
37031 void
37032 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
37033 {
37034 char *buffer = (char *) alloca (strlen (name) + 1);
37035 char *p;
37036 int dollar_inside = 0;
37037 struct declare_alias_data data = {file, false};
37038
37039 strcpy (buffer, name);
37040 p = strchr (buffer, '$');
37041 while (p) {
37042 *p = '_';
37043 dollar_inside++;
37044 p = strchr (p + 1, '$');
37045 }
37046 if (TREE_PUBLIC (decl))
37047 {
37048 if (!RS6000_WEAK || !DECL_WEAK (decl))
37049 {
37050 if (dollar_inside) {
37051 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37052 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37053 }
37054 fputs ("\t.globl .", file);
37055 RS6000_OUTPUT_BASENAME (file, buffer);
37056 #ifdef HAVE_GAS_HIDDEN
37057 fputs (rs6000_xcoff_visibility (decl), file);
37058 #endif
37059 putc ('\n', file);
37060 }
37061 }
37062 else
37063 {
37064 if (dollar_inside) {
37065 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
37066 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
37067 }
37068 fputs ("\t.lglobl .", file);
37069 RS6000_OUTPUT_BASENAME (file, buffer);
37070 putc ('\n', file);
37071 }
37072 fputs ("\t.csect ", file);
37073 RS6000_OUTPUT_BASENAME (file, buffer);
37074 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
37075 RS6000_OUTPUT_BASENAME (file, buffer);
37076 fputs (":\n", file);
37077 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37078 &data, true);
37079 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
37080 RS6000_OUTPUT_BASENAME (file, buffer);
37081 fputs (", TOC[tc0], 0\n", file);
37082 in_section = NULL;
37083 switch_to_section (function_section (decl));
37084 putc ('.', file);
37085 RS6000_OUTPUT_BASENAME (file, buffer);
37086 fputs (":\n", file);
37087 data.function_descriptor = true;
37088 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37089 &data, true);
37090 if (!DECL_IGNORED_P (decl))
37091 {
37092 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
37093 xcoffout_declare_function (file, decl, buffer);
37094 else if (write_symbols == DWARF2_DEBUG)
37095 {
37096 name = (*targetm.strip_name_encoding) (name);
37097 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
37098 }
37099 }
37100 return;
37101 }
37102
37103
37104 /* Output assembly language to globalize a symbol from a DECL,
37105 possibly with visibility. */
37106
37107 void
37108 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
37109 {
37110 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
37111 fputs (GLOBAL_ASM_OP, stream);
37112 RS6000_OUTPUT_BASENAME (stream, name);
37113 #ifdef HAVE_GAS_HIDDEN
37114 fputs (rs6000_xcoff_visibility (decl), stream);
37115 #endif
37116 putc ('\n', stream);
37117 }
37118
37119 /* Output assembly language to define a symbol as COMMON from a DECL,
37120 possibly with visibility. */
37121
37122 void
37123 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
37124 tree decl ATTRIBUTE_UNUSED,
37125 const char *name,
37126 unsigned HOST_WIDE_INT size,
37127 unsigned HOST_WIDE_INT align)
37128 {
37129 unsigned HOST_WIDE_INT align2 = 2;
37130
37131 if (align > 32)
37132 align2 = floor_log2 (align / BITS_PER_UNIT);
37133 else if (size > 4)
37134 align2 = 3;
37135
37136 fputs (COMMON_ASM_OP, stream);
37137 RS6000_OUTPUT_BASENAME (stream, name);
37138
37139 fprintf (stream,
37140 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
37141 size, align2);
37142
37143 #ifdef HAVE_GAS_HIDDEN
37144 fputs (rs6000_xcoff_visibility (decl), stream);
37145 #endif
37146 putc ('\n', stream);
37147 }
37148
37149 /* This macro produces the initial definition of a object (variable) name.
37150 Because AIX assembler's .set command has unexpected semantics, we output
37151 all aliases as alternative labels in front of the definition. */
37152
37153 void
37154 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
37155 {
37156 struct declare_alias_data data = {file, false};
37157 RS6000_OUTPUT_BASENAME (file, name);
37158 fputs (":\n", file);
37159 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
37160 &data, true);
37161 }
37162
37163 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
37164
37165 void
37166 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
37167 {
37168 fputs (integer_asm_op (size, FALSE), file);
37169 assemble_name (file, label);
37170 fputs ("-$", file);
37171 }
37172
37173 /* Output a symbol offset relative to the dbase for the current object.
37174 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
37175 signed offsets.
37176
37177 __gcc_unwind_dbase is embedded in all executables/libraries through
37178 libgcc/config/rs6000/crtdbase.S. */
37179
37180 void
37181 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
37182 {
37183 fputs (integer_asm_op (size, FALSE), file);
37184 assemble_name (file, label);
37185 fputs("-__gcc_unwind_dbase", file);
37186 }
37187
37188 #ifdef HAVE_AS_TLS
37189 static void
37190 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
37191 {
37192 rtx symbol;
37193 int flags;
37194 const char *symname;
37195
37196 default_encode_section_info (decl, rtl, first);
37197
37198 /* Careful not to prod global register variables. */
37199 if (!MEM_P (rtl))
37200 return;
37201 symbol = XEXP (rtl, 0);
37202 if (GET_CODE (symbol) != SYMBOL_REF)
37203 return;
37204
37205 flags = SYMBOL_REF_FLAGS (symbol);
37206
37207 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
37208 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
37209
37210 SYMBOL_REF_FLAGS (symbol) = flags;
37211
37212 /* Append mapping class to extern decls. */
37213 symname = XSTR (symbol, 0);
37214 if (decl /* sync condition with assemble_external () */
37215 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
37216 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
37217 || TREE_CODE (decl) == FUNCTION_DECL)
37218 && symname[strlen (symname) - 1] != ']')
37219 {
37220 char *newname = (char *) alloca (strlen (symname) + 5);
37221 strcpy (newname, symname);
37222 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
37223 ? "[DS]" : "[UA]"));
37224 XSTR (symbol, 0) = ggc_strdup (newname);
37225 }
37226 }
37227 #endif /* HAVE_AS_TLS */
37228 #endif /* TARGET_XCOFF */
37229
37230 void
37231 rs6000_asm_weaken_decl (FILE *stream, tree decl,
37232 const char *name, const char *val)
37233 {
37234 fputs ("\t.weak\t", stream);
37235 RS6000_OUTPUT_BASENAME (stream, name);
37236 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37237 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37238 {
37239 if (TARGET_XCOFF)
37240 fputs ("[DS]", stream);
37241 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37242 if (TARGET_XCOFF)
37243 fputs (rs6000_xcoff_visibility (decl), stream);
37244 #endif
37245 fputs ("\n\t.weak\t.", stream);
37246 RS6000_OUTPUT_BASENAME (stream, name);
37247 }
37248 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
37249 if (TARGET_XCOFF)
37250 fputs (rs6000_xcoff_visibility (decl), stream);
37251 #endif
37252 fputc ('\n', stream);
37253 if (val)
37254 {
37255 #ifdef ASM_OUTPUT_DEF
37256 ASM_OUTPUT_DEF (stream, name, val);
37257 #endif
37258 if (decl && TREE_CODE (decl) == FUNCTION_DECL
37259 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
37260 {
37261 fputs ("\t.set\t.", stream);
37262 RS6000_OUTPUT_BASENAME (stream, name);
37263 fputs (",.", stream);
37264 RS6000_OUTPUT_BASENAME (stream, val);
37265 fputc ('\n', stream);
37266 }
37267 }
37268 }
37269
37270
37271 /* Return true if INSN should not be copied. */
37272
37273 static bool
37274 rs6000_cannot_copy_insn_p (rtx_insn *insn)
37275 {
37276 return recog_memoized (insn) >= 0
37277 && get_attr_cannot_copy (insn);
37278 }
37279
37280 /* Compute a (partial) cost for rtx X. Return true if the complete
37281 cost has been computed, and false if subexpressions should be
37282 scanned. In either case, *TOTAL contains the cost result. */
37283
37284 static bool
37285 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
37286 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
37287 {
37288 int code = GET_CODE (x);
37289
37290 switch (code)
37291 {
37292 /* On the RS/6000, if it is valid in the insn, it is free. */
37293 case CONST_INT:
37294 if (((outer_code == SET
37295 || outer_code == PLUS
37296 || outer_code == MINUS)
37297 && (satisfies_constraint_I (x)
37298 || satisfies_constraint_L (x)))
37299 || (outer_code == AND
37300 && (satisfies_constraint_K (x)
37301 || (mode == SImode
37302 ? satisfies_constraint_L (x)
37303 : satisfies_constraint_J (x))))
37304 || ((outer_code == IOR || outer_code == XOR)
37305 && (satisfies_constraint_K (x)
37306 || (mode == SImode
37307 ? satisfies_constraint_L (x)
37308 : satisfies_constraint_J (x))))
37309 || outer_code == ASHIFT
37310 || outer_code == ASHIFTRT
37311 || outer_code == LSHIFTRT
37312 || outer_code == ROTATE
37313 || outer_code == ROTATERT
37314 || outer_code == ZERO_EXTRACT
37315 || (outer_code == MULT
37316 && satisfies_constraint_I (x))
37317 || ((outer_code == DIV || outer_code == UDIV
37318 || outer_code == MOD || outer_code == UMOD)
37319 && exact_log2 (INTVAL (x)) >= 0)
37320 || (outer_code == COMPARE
37321 && (satisfies_constraint_I (x)
37322 || satisfies_constraint_K (x)))
37323 || ((outer_code == EQ || outer_code == NE)
37324 && (satisfies_constraint_I (x)
37325 || satisfies_constraint_K (x)
37326 || (mode == SImode
37327 ? satisfies_constraint_L (x)
37328 : satisfies_constraint_J (x))))
37329 || (outer_code == GTU
37330 && satisfies_constraint_I (x))
37331 || (outer_code == LTU
37332 && satisfies_constraint_P (x)))
37333 {
37334 *total = 0;
37335 return true;
37336 }
37337 else if ((outer_code == PLUS
37338 && reg_or_add_cint_operand (x, VOIDmode))
37339 || (outer_code == MINUS
37340 && reg_or_sub_cint_operand (x, VOIDmode))
37341 || ((outer_code == SET
37342 || outer_code == IOR
37343 || outer_code == XOR)
37344 && (INTVAL (x)
37345 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
37346 {
37347 *total = COSTS_N_INSNS (1);
37348 return true;
37349 }
37350 /* FALLTHRU */
37351
37352 case CONST_DOUBLE:
37353 case CONST_WIDE_INT:
37354 case CONST:
37355 case HIGH:
37356 case SYMBOL_REF:
37357 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37358 return true;
37359
37360 case MEM:
37361 /* When optimizing for size, MEM should be slightly more expensive
37362 than generating address, e.g., (plus (reg) (const)).
37363 L1 cache latency is about two instructions. */
37364 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
37365 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
37366 *total += COSTS_N_INSNS (100);
37367 return true;
37368
37369 case LABEL_REF:
37370 *total = 0;
37371 return true;
37372
37373 case PLUS:
37374 case MINUS:
37375 if (FLOAT_MODE_P (mode))
37376 *total = rs6000_cost->fp;
37377 else
37378 *total = COSTS_N_INSNS (1);
37379 return false;
37380
37381 case MULT:
37382 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37383 && satisfies_constraint_I (XEXP (x, 1)))
37384 {
37385 if (INTVAL (XEXP (x, 1)) >= -256
37386 && INTVAL (XEXP (x, 1)) <= 255)
37387 *total = rs6000_cost->mulsi_const9;
37388 else
37389 *total = rs6000_cost->mulsi_const;
37390 }
37391 else if (mode == SFmode)
37392 *total = rs6000_cost->fp;
37393 else if (FLOAT_MODE_P (mode))
37394 *total = rs6000_cost->dmul;
37395 else if (mode == DImode)
37396 *total = rs6000_cost->muldi;
37397 else
37398 *total = rs6000_cost->mulsi;
37399 return false;
37400
37401 case FMA:
37402 if (mode == SFmode)
37403 *total = rs6000_cost->fp;
37404 else
37405 *total = rs6000_cost->dmul;
37406 break;
37407
37408 case DIV:
37409 case MOD:
37410 if (FLOAT_MODE_P (mode))
37411 {
37412 *total = mode == DFmode ? rs6000_cost->ddiv
37413 : rs6000_cost->sdiv;
37414 return false;
37415 }
37416 /* FALLTHRU */
37417
37418 case UDIV:
37419 case UMOD:
37420 if (GET_CODE (XEXP (x, 1)) == CONST_INT
37421 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
37422 {
37423 if (code == DIV || code == MOD)
37424 /* Shift, addze */
37425 *total = COSTS_N_INSNS (2);
37426 else
37427 /* Shift */
37428 *total = COSTS_N_INSNS (1);
37429 }
37430 else
37431 {
37432 if (GET_MODE (XEXP (x, 1)) == DImode)
37433 *total = rs6000_cost->divdi;
37434 else
37435 *total = rs6000_cost->divsi;
37436 }
37437 /* Add in shift and subtract for MOD unless we have a mod instruction. */
37438 if (!TARGET_MODULO && (code == MOD || code == UMOD))
37439 *total += COSTS_N_INSNS (2);
37440 return false;
37441
37442 case CTZ:
37443 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
37444 return false;
37445
37446 case FFS:
37447 *total = COSTS_N_INSNS (4);
37448 return false;
37449
37450 case POPCOUNT:
37451 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
37452 return false;
37453
37454 case PARITY:
37455 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
37456 return false;
37457
37458 case NOT:
37459 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
37460 *total = 0;
37461 else
37462 *total = COSTS_N_INSNS (1);
37463 return false;
37464
37465 case AND:
37466 if (CONST_INT_P (XEXP (x, 1)))
37467 {
37468 rtx left = XEXP (x, 0);
37469 rtx_code left_code = GET_CODE (left);
37470
37471 /* rotate-and-mask: 1 insn. */
37472 if ((left_code == ROTATE
37473 || left_code == ASHIFT
37474 || left_code == LSHIFTRT)
37475 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
37476 {
37477 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
37478 if (!CONST_INT_P (XEXP (left, 1)))
37479 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
37480 *total += COSTS_N_INSNS (1);
37481 return true;
37482 }
37483
37484 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
37485 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
37486 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
37487 || (val & 0xffff) == val
37488 || (val & 0xffff0000) == val
37489 || ((val & 0xffff) == 0 && mode == SImode))
37490 {
37491 *total = rtx_cost (left, mode, AND, 0, speed);
37492 *total += COSTS_N_INSNS (1);
37493 return true;
37494 }
37495
37496 /* 2 insns. */
37497 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
37498 {
37499 *total = rtx_cost (left, mode, AND, 0, speed);
37500 *total += COSTS_N_INSNS (2);
37501 return true;
37502 }
37503 }
37504
37505 *total = COSTS_N_INSNS (1);
37506 return false;
37507
37508 case IOR:
37509 /* FIXME */
37510 *total = COSTS_N_INSNS (1);
37511 return true;
37512
37513 case CLZ:
37514 case XOR:
37515 case ZERO_EXTRACT:
37516 *total = COSTS_N_INSNS (1);
37517 return false;
37518
37519 case ASHIFT:
37520 /* The EXTSWSLI instruction is a combined instruction. Don't count both
37521 the sign extend and shift separately within the insn. */
37522 if (TARGET_EXTSWSLI && mode == DImode
37523 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
37524 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
37525 {
37526 *total = 0;
37527 return false;
37528 }
37529 /* fall through */
37530
37531 case ASHIFTRT:
37532 case LSHIFTRT:
37533 case ROTATE:
37534 case ROTATERT:
37535 /* Handle mul_highpart. */
37536 if (outer_code == TRUNCATE
37537 && GET_CODE (XEXP (x, 0)) == MULT)
37538 {
37539 if (mode == DImode)
37540 *total = rs6000_cost->muldi;
37541 else
37542 *total = rs6000_cost->mulsi;
37543 return true;
37544 }
37545 else if (outer_code == AND)
37546 *total = 0;
37547 else
37548 *total = COSTS_N_INSNS (1);
37549 return false;
37550
37551 case SIGN_EXTEND:
37552 case ZERO_EXTEND:
37553 if (GET_CODE (XEXP (x, 0)) == MEM)
37554 *total = 0;
37555 else
37556 *total = COSTS_N_INSNS (1);
37557 return false;
37558
37559 case COMPARE:
37560 case NEG:
37561 case ABS:
37562 if (!FLOAT_MODE_P (mode))
37563 {
37564 *total = COSTS_N_INSNS (1);
37565 return false;
37566 }
37567 /* FALLTHRU */
37568
37569 case FLOAT:
37570 case UNSIGNED_FLOAT:
37571 case FIX:
37572 case UNSIGNED_FIX:
37573 case FLOAT_TRUNCATE:
37574 *total = rs6000_cost->fp;
37575 return false;
37576
37577 case FLOAT_EXTEND:
37578 if (mode == DFmode)
37579 *total = rs6000_cost->sfdf_convert;
37580 else
37581 *total = rs6000_cost->fp;
37582 return false;
37583
37584 case UNSPEC:
37585 switch (XINT (x, 1))
37586 {
37587 case UNSPEC_FRSP:
37588 *total = rs6000_cost->fp;
37589 return true;
37590
37591 default:
37592 break;
37593 }
37594 break;
37595
37596 case CALL:
37597 case IF_THEN_ELSE:
37598 if (!speed)
37599 {
37600 *total = COSTS_N_INSNS (1);
37601 return true;
37602 }
37603 else if (FLOAT_MODE_P (mode)
37604 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
37605 {
37606 *total = rs6000_cost->fp;
37607 return false;
37608 }
37609 break;
37610
37611 case NE:
37612 case EQ:
37613 case GTU:
37614 case LTU:
37615 /* Carry bit requires mode == Pmode.
37616 NEG or PLUS already counted so only add one. */
37617 if (mode == Pmode
37618 && (outer_code == NEG || outer_code == PLUS))
37619 {
37620 *total = COSTS_N_INSNS (1);
37621 return true;
37622 }
37623 if (outer_code == SET)
37624 {
37625 if (XEXP (x, 1) == const0_rtx)
37626 {
37627 if (TARGET_ISEL && !TARGET_MFCRF)
37628 *total = COSTS_N_INSNS (8);
37629 else
37630 *total = COSTS_N_INSNS (2);
37631 return true;
37632 }
37633 else
37634 {
37635 *total = COSTS_N_INSNS (3);
37636 return false;
37637 }
37638 }
37639 /* FALLTHRU */
37640
37641 case GT:
37642 case LT:
37643 case UNORDERED:
37644 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
37645 {
37646 if (TARGET_ISEL && !TARGET_MFCRF)
37647 *total = COSTS_N_INSNS (8);
37648 else
37649 *total = COSTS_N_INSNS (2);
37650 return true;
37651 }
37652 /* CC COMPARE. */
37653 if (outer_code == COMPARE)
37654 {
37655 *total = 0;
37656 return true;
37657 }
37658 break;
37659
37660 default:
37661 break;
37662 }
37663
37664 return false;
37665 }
37666
37667 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
37668
37669 static bool
37670 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
37671 int opno, int *total, bool speed)
37672 {
37673 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
37674
37675 fprintf (stderr,
37676 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
37677 "opno = %d, total = %d, speed = %s, x:\n",
37678 ret ? "complete" : "scan inner",
37679 GET_MODE_NAME (mode),
37680 GET_RTX_NAME (outer_code),
37681 opno,
37682 *total,
37683 speed ? "true" : "false");
37684
37685 debug_rtx (x);
37686
37687 return ret;
37688 }
37689
37690 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
37691
37692 static int
37693 rs6000_debug_address_cost (rtx x, machine_mode mode,
37694 addr_space_t as, bool speed)
37695 {
37696 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
37697
37698 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
37699 ret, speed ? "true" : "false");
37700 debug_rtx (x);
37701
37702 return ret;
37703 }
37704
37705
37706 /* A C expression returning the cost of moving data from a register of class
37707 CLASS1 to one of CLASS2. */
37708
37709 static int
37710 rs6000_register_move_cost (machine_mode mode,
37711 reg_class_t from, reg_class_t to)
37712 {
37713 int ret;
37714
37715 if (TARGET_DEBUG_COST)
37716 dbg_cost_ctrl++;
37717
37718 /* Moves from/to GENERAL_REGS. */
37719 if (reg_classes_intersect_p (to, GENERAL_REGS)
37720 || reg_classes_intersect_p (from, GENERAL_REGS))
37721 {
37722 reg_class_t rclass = from;
37723
37724 if (! reg_classes_intersect_p (to, GENERAL_REGS))
37725 rclass = to;
37726
37727 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
37728 ret = (rs6000_memory_move_cost (mode, rclass, false)
37729 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
37730
37731 /* It's more expensive to move CR_REGS than CR0_REGS because of the
37732 shift. */
37733 else if (rclass == CR_REGS)
37734 ret = 4;
37735
37736 /* For those processors that have slow LR/CTR moves, make them more
37737 expensive than memory in order to bias spills to memory .*/
37738 else if ((rs6000_cpu == PROCESSOR_POWER6
37739 || rs6000_cpu == PROCESSOR_POWER7
37740 || rs6000_cpu == PROCESSOR_POWER8
37741 || rs6000_cpu == PROCESSOR_POWER9)
37742 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
37743 ret = 6 * hard_regno_nregs[0][mode];
37744
37745 else
37746 /* A move will cost one instruction per GPR moved. */
37747 ret = 2 * hard_regno_nregs[0][mode];
37748 }
37749
37750 /* If we have VSX, we can easily move between FPR or Altivec registers. */
37751 else if (VECTOR_MEM_VSX_P (mode)
37752 && reg_classes_intersect_p (to, VSX_REGS)
37753 && reg_classes_intersect_p (from, VSX_REGS))
37754 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
37755
37756 /* Moving between two similar registers is just one instruction. */
37757 else if (reg_classes_intersect_p (to, from))
37758 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
37759
37760 /* Everything else has to go through GENERAL_REGS. */
37761 else
37762 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
37763 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
37764
37765 if (TARGET_DEBUG_COST)
37766 {
37767 if (dbg_cost_ctrl == 1)
37768 fprintf (stderr,
37769 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
37770 ret, GET_MODE_NAME (mode), reg_class_names[from],
37771 reg_class_names[to]);
37772 dbg_cost_ctrl--;
37773 }
37774
37775 return ret;
37776 }
37777
37778 /* A C expressions returning the cost of moving data of MODE from a register to
37779 or from memory. */
37780
37781 static int
37782 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
37783 bool in ATTRIBUTE_UNUSED)
37784 {
37785 int ret;
37786
37787 if (TARGET_DEBUG_COST)
37788 dbg_cost_ctrl++;
37789
37790 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
37791 ret = 4 * hard_regno_nregs[0][mode];
37792 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
37793 || reg_classes_intersect_p (rclass, VSX_REGS)))
37794 ret = 4 * hard_regno_nregs[32][mode];
37795 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
37796 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
37797 else
37798 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
37799
37800 if (TARGET_DEBUG_COST)
37801 {
37802 if (dbg_cost_ctrl == 1)
37803 fprintf (stderr,
37804 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
37805 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
37806 dbg_cost_ctrl--;
37807 }
37808
37809 return ret;
37810 }
37811
37812 /* Returns a code for a target-specific builtin that implements
37813 reciprocal of the function, or NULL_TREE if not available. */
37814
37815 static tree
37816 rs6000_builtin_reciprocal (tree fndecl)
37817 {
37818 switch (DECL_FUNCTION_CODE (fndecl))
37819 {
37820 case VSX_BUILTIN_XVSQRTDP:
37821 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
37822 return NULL_TREE;
37823
37824 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
37825
37826 case VSX_BUILTIN_XVSQRTSP:
37827 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
37828 return NULL_TREE;
37829
37830 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
37831
37832 default:
37833 return NULL_TREE;
37834 }
37835 }
37836
37837 /* Load up a constant. If the mode is a vector mode, splat the value across
37838 all of the vector elements. */
37839
37840 static rtx
37841 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
37842 {
37843 rtx reg;
37844
37845 if (mode == SFmode || mode == DFmode)
37846 {
37847 rtx d = const_double_from_real_value (dconst, mode);
37848 reg = force_reg (mode, d);
37849 }
37850 else if (mode == V4SFmode)
37851 {
37852 rtx d = const_double_from_real_value (dconst, SFmode);
37853 rtvec v = gen_rtvec (4, d, d, d, d);
37854 reg = gen_reg_rtx (mode);
37855 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37856 }
37857 else if (mode == V2DFmode)
37858 {
37859 rtx d = const_double_from_real_value (dconst, DFmode);
37860 rtvec v = gen_rtvec (2, d, d);
37861 reg = gen_reg_rtx (mode);
37862 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
37863 }
37864 else
37865 gcc_unreachable ();
37866
37867 return reg;
37868 }
37869
37870 /* Generate an FMA instruction. */
37871
37872 static void
37873 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
37874 {
37875 machine_mode mode = GET_MODE (target);
37876 rtx dst;
37877
37878 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
37879 gcc_assert (dst != NULL);
37880
37881 if (dst != target)
37882 emit_move_insn (target, dst);
37883 }
37884
37885 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
37886
37887 static void
37888 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
37889 {
37890 machine_mode mode = GET_MODE (dst);
37891 rtx r;
37892
37893 /* This is a tad more complicated, since the fnma_optab is for
37894 a different expression: fma(-m1, m2, a), which is the same
37895 thing except in the case of signed zeros.
37896
37897 Fortunately we know that if FMA is supported that FNMSUB is
37898 also supported in the ISA. Just expand it directly. */
37899
37900 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
37901
37902 r = gen_rtx_NEG (mode, a);
37903 r = gen_rtx_FMA (mode, m1, m2, r);
37904 r = gen_rtx_NEG (mode, r);
37905 emit_insn (gen_rtx_SET (dst, r));
37906 }
37907
37908 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
37909 add a reg_note saying that this was a division. Support both scalar and
37910 vector divide. Assumes no trapping math and finite arguments. */
37911
37912 void
37913 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
37914 {
37915 machine_mode mode = GET_MODE (dst);
37916 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
37917 int i;
37918
37919 /* Low precision estimates guarantee 5 bits of accuracy. High
37920 precision estimates guarantee 14 bits of accuracy. SFmode
37921 requires 23 bits of accuracy. DFmode requires 52 bits of
37922 accuracy. Each pass at least doubles the accuracy, leading
37923 to the following. */
37924 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
37925 if (mode == DFmode || mode == V2DFmode)
37926 passes++;
37927
37928 enum insn_code code = optab_handler (smul_optab, mode);
37929 insn_gen_fn gen_mul = GEN_FCN (code);
37930
37931 gcc_assert (code != CODE_FOR_nothing);
37932
37933 one = rs6000_load_constant_and_splat (mode, dconst1);
37934
37935 /* x0 = 1./d estimate */
37936 x0 = gen_reg_rtx (mode);
37937 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
37938 UNSPEC_FRES)));
37939
37940 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
37941 if (passes > 1) {
37942
37943 /* e0 = 1. - d * x0 */
37944 e0 = gen_reg_rtx (mode);
37945 rs6000_emit_nmsub (e0, d, x0, one);
37946
37947 /* x1 = x0 + e0 * x0 */
37948 x1 = gen_reg_rtx (mode);
37949 rs6000_emit_madd (x1, e0, x0, x0);
37950
37951 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
37952 ++i, xprev = xnext, eprev = enext) {
37953
37954 /* enext = eprev * eprev */
37955 enext = gen_reg_rtx (mode);
37956 emit_insn (gen_mul (enext, eprev, eprev));
37957
37958 /* xnext = xprev + enext * xprev */
37959 xnext = gen_reg_rtx (mode);
37960 rs6000_emit_madd (xnext, enext, xprev, xprev);
37961 }
37962
37963 } else
37964 xprev = x0;
37965
37966 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
37967
37968 /* u = n * xprev */
37969 u = gen_reg_rtx (mode);
37970 emit_insn (gen_mul (u, n, xprev));
37971
37972 /* v = n - (d * u) */
37973 v = gen_reg_rtx (mode);
37974 rs6000_emit_nmsub (v, d, u, n);
37975
37976 /* dst = (v * xprev) + u */
37977 rs6000_emit_madd (dst, v, xprev, u);
37978
37979 if (note_p)
37980 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
37981 }
37982
37983 /* Goldschmidt's Algorithm for single/double-precision floating point
37984 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
37985
37986 void
37987 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
37988 {
37989 machine_mode mode = GET_MODE (src);
37990 rtx e = gen_reg_rtx (mode);
37991 rtx g = gen_reg_rtx (mode);
37992 rtx h = gen_reg_rtx (mode);
37993
37994 /* Low precision estimates guarantee 5 bits of accuracy. High
37995 precision estimates guarantee 14 bits of accuracy. SFmode
37996 requires 23 bits of accuracy. DFmode requires 52 bits of
37997 accuracy. Each pass at least doubles the accuracy, leading
37998 to the following. */
37999 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
38000 if (mode == DFmode || mode == V2DFmode)
38001 passes++;
38002
38003 int i;
38004 rtx mhalf;
38005 enum insn_code code = optab_handler (smul_optab, mode);
38006 insn_gen_fn gen_mul = GEN_FCN (code);
38007
38008 gcc_assert (code != CODE_FOR_nothing);
38009
38010 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
38011
38012 /* e = rsqrt estimate */
38013 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
38014 UNSPEC_RSQRT)));
38015
38016 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
38017 if (!recip)
38018 {
38019 rtx zero = force_reg (mode, CONST0_RTX (mode));
38020
38021 if (mode == SFmode)
38022 {
38023 rtx target = emit_conditional_move (e, GT, src, zero, mode,
38024 e, zero, mode, 0);
38025 if (target != e)
38026 emit_move_insn (e, target);
38027 }
38028 else
38029 {
38030 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
38031 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
38032 }
38033 }
38034
38035 /* g = sqrt estimate. */
38036 emit_insn (gen_mul (g, e, src));
38037 /* h = 1/(2*sqrt) estimate. */
38038 emit_insn (gen_mul (h, e, mhalf));
38039
38040 if (recip)
38041 {
38042 if (passes == 1)
38043 {
38044 rtx t = gen_reg_rtx (mode);
38045 rs6000_emit_nmsub (t, g, h, mhalf);
38046 /* Apply correction directly to 1/rsqrt estimate. */
38047 rs6000_emit_madd (dst, e, t, e);
38048 }
38049 else
38050 {
38051 for (i = 0; i < passes; i++)
38052 {
38053 rtx t1 = gen_reg_rtx (mode);
38054 rtx g1 = gen_reg_rtx (mode);
38055 rtx h1 = gen_reg_rtx (mode);
38056
38057 rs6000_emit_nmsub (t1, g, h, mhalf);
38058 rs6000_emit_madd (g1, g, t1, g);
38059 rs6000_emit_madd (h1, h, t1, h);
38060
38061 g = g1;
38062 h = h1;
38063 }
38064 /* Multiply by 2 for 1/rsqrt. */
38065 emit_insn (gen_add3_insn (dst, h, h));
38066 }
38067 }
38068 else
38069 {
38070 rtx t = gen_reg_rtx (mode);
38071 rs6000_emit_nmsub (t, g, h, mhalf);
38072 rs6000_emit_madd (dst, g, t, g);
38073 }
38074
38075 return;
38076 }
38077
38078 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
38079 (Power7) targets. DST is the target, and SRC is the argument operand. */
38080
38081 void
38082 rs6000_emit_popcount (rtx dst, rtx src)
38083 {
38084 machine_mode mode = GET_MODE (dst);
38085 rtx tmp1, tmp2;
38086
38087 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
38088 if (TARGET_POPCNTD)
38089 {
38090 if (mode == SImode)
38091 emit_insn (gen_popcntdsi2 (dst, src));
38092 else
38093 emit_insn (gen_popcntddi2 (dst, src));
38094 return;
38095 }
38096
38097 tmp1 = gen_reg_rtx (mode);
38098
38099 if (mode == SImode)
38100 {
38101 emit_insn (gen_popcntbsi2 (tmp1, src));
38102 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
38103 NULL_RTX, 0);
38104 tmp2 = force_reg (SImode, tmp2);
38105 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
38106 }
38107 else
38108 {
38109 emit_insn (gen_popcntbdi2 (tmp1, src));
38110 tmp2 = expand_mult (DImode, tmp1,
38111 GEN_INT ((HOST_WIDE_INT)
38112 0x01010101 << 32 | 0x01010101),
38113 NULL_RTX, 0);
38114 tmp2 = force_reg (DImode, tmp2);
38115 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
38116 }
38117 }
38118
38119
38120 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
38121 target, and SRC is the argument operand. */
38122
38123 void
38124 rs6000_emit_parity (rtx dst, rtx src)
38125 {
38126 machine_mode mode = GET_MODE (dst);
38127 rtx tmp;
38128
38129 tmp = gen_reg_rtx (mode);
38130
38131 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
38132 if (TARGET_CMPB)
38133 {
38134 if (mode == SImode)
38135 {
38136 emit_insn (gen_popcntbsi2 (tmp, src));
38137 emit_insn (gen_paritysi2_cmpb (dst, tmp));
38138 }
38139 else
38140 {
38141 emit_insn (gen_popcntbdi2 (tmp, src));
38142 emit_insn (gen_paritydi2_cmpb (dst, tmp));
38143 }
38144 return;
38145 }
38146
38147 if (mode == SImode)
38148 {
38149 /* Is mult+shift >= shift+xor+shift+xor? */
38150 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
38151 {
38152 rtx tmp1, tmp2, tmp3, tmp4;
38153
38154 tmp1 = gen_reg_rtx (SImode);
38155 emit_insn (gen_popcntbsi2 (tmp1, src));
38156
38157 tmp2 = gen_reg_rtx (SImode);
38158 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
38159 tmp3 = gen_reg_rtx (SImode);
38160 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
38161
38162 tmp4 = gen_reg_rtx (SImode);
38163 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
38164 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
38165 }
38166 else
38167 rs6000_emit_popcount (tmp, src);
38168 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
38169 }
38170 else
38171 {
38172 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
38173 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
38174 {
38175 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
38176
38177 tmp1 = gen_reg_rtx (DImode);
38178 emit_insn (gen_popcntbdi2 (tmp1, src));
38179
38180 tmp2 = gen_reg_rtx (DImode);
38181 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
38182 tmp3 = gen_reg_rtx (DImode);
38183 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
38184
38185 tmp4 = gen_reg_rtx (DImode);
38186 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
38187 tmp5 = gen_reg_rtx (DImode);
38188 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
38189
38190 tmp6 = gen_reg_rtx (DImode);
38191 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
38192 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
38193 }
38194 else
38195 rs6000_emit_popcount (tmp, src);
38196 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
38197 }
38198 }
38199
38200 /* Expand an Altivec constant permutation for little endian mode.
38201 There are two issues: First, the two input operands must be
38202 swapped so that together they form a double-wide array in LE
38203 order. Second, the vperm instruction has surprising behavior
38204 in LE mode: it interprets the elements of the source vectors
38205 in BE mode ("left to right") and interprets the elements of
38206 the destination vector in LE mode ("right to left"). To
38207 correct for this, we must subtract each element of the permute
38208 control vector from 31.
38209
38210 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
38211 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
38212 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
38213 serve as the permute control vector. Then, in BE mode,
38214
38215 vperm 9,10,11,12
38216
38217 places the desired result in vr9. However, in LE mode the
38218 vector contents will be
38219
38220 vr10 = 00000003 00000002 00000001 00000000
38221 vr11 = 00000007 00000006 00000005 00000004
38222
38223 The result of the vperm using the same permute control vector is
38224
38225 vr9 = 05000000 07000000 01000000 03000000
38226
38227 That is, the leftmost 4 bytes of vr10 are interpreted as the
38228 source for the rightmost 4 bytes of vr9, and so on.
38229
38230 If we change the permute control vector to
38231
38232 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
38233
38234 and issue
38235
38236 vperm 9,11,10,12
38237
38238 we get the desired
38239
38240 vr9 = 00000006 00000004 00000002 00000000. */
38241
38242 void
38243 altivec_expand_vec_perm_const_le (rtx operands[4])
38244 {
38245 unsigned int i;
38246 rtx perm[16];
38247 rtx constv, unspec;
38248 rtx target = operands[0];
38249 rtx op0 = operands[1];
38250 rtx op1 = operands[2];
38251 rtx sel = operands[3];
38252
38253 /* Unpack and adjust the constant selector. */
38254 for (i = 0; i < 16; ++i)
38255 {
38256 rtx e = XVECEXP (sel, 0, i);
38257 unsigned int elt = 31 - (INTVAL (e) & 31);
38258 perm[i] = GEN_INT (elt);
38259 }
38260
38261 /* Expand to a permute, swapping the inputs and using the
38262 adjusted selector. */
38263 if (!REG_P (op0))
38264 op0 = force_reg (V16QImode, op0);
38265 if (!REG_P (op1))
38266 op1 = force_reg (V16QImode, op1);
38267
38268 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
38269 constv = force_reg (V16QImode, constv);
38270 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
38271 UNSPEC_VPERM);
38272 if (!REG_P (target))
38273 {
38274 rtx tmp = gen_reg_rtx (V16QImode);
38275 emit_move_insn (tmp, unspec);
38276 unspec = tmp;
38277 }
38278
38279 emit_move_insn (target, unspec);
38280 }
38281
38282 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
38283 permute control vector. But here it's not a constant, so we must
38284 generate a vector NAND or NOR to do the adjustment. */
38285
38286 void
38287 altivec_expand_vec_perm_le (rtx operands[4])
38288 {
38289 rtx notx, iorx, unspec;
38290 rtx target = operands[0];
38291 rtx op0 = operands[1];
38292 rtx op1 = operands[2];
38293 rtx sel = operands[3];
38294 rtx tmp = target;
38295 rtx norreg = gen_reg_rtx (V16QImode);
38296 machine_mode mode = GET_MODE (target);
38297
38298 /* Get everything in regs so the pattern matches. */
38299 if (!REG_P (op0))
38300 op0 = force_reg (mode, op0);
38301 if (!REG_P (op1))
38302 op1 = force_reg (mode, op1);
38303 if (!REG_P (sel))
38304 sel = force_reg (V16QImode, sel);
38305 if (!REG_P (target))
38306 tmp = gen_reg_rtx (mode);
38307
38308 if (TARGET_P9_VECTOR)
38309 {
38310 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
38311 UNSPEC_VPERMR);
38312 }
38313 else
38314 {
38315 /* Invert the selector with a VNAND if available, else a VNOR.
38316 The VNAND is preferred for future fusion opportunities. */
38317 notx = gen_rtx_NOT (V16QImode, sel);
38318 iorx = (TARGET_P8_VECTOR
38319 ? gen_rtx_IOR (V16QImode, notx, notx)
38320 : gen_rtx_AND (V16QImode, notx, notx));
38321 emit_insn (gen_rtx_SET (norreg, iorx));
38322
38323 /* Permute with operands reversed and adjusted selector. */
38324 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
38325 UNSPEC_VPERM);
38326 }
38327
38328 /* Copy into target, possibly by way of a register. */
38329 if (!REG_P (target))
38330 {
38331 emit_move_insn (tmp, unspec);
38332 unspec = tmp;
38333 }
38334
38335 emit_move_insn (target, unspec);
38336 }
38337
38338 /* Expand an Altivec constant permutation. Return true if we match
38339 an efficient implementation; false to fall back to VPERM. */
38340
38341 bool
38342 altivec_expand_vec_perm_const (rtx operands[4])
38343 {
38344 struct altivec_perm_insn {
38345 HOST_WIDE_INT mask;
38346 enum insn_code impl;
38347 unsigned char perm[16];
38348 };
38349 static const struct altivec_perm_insn patterns[] = {
38350 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
38351 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
38352 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
38353 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
38354 { OPTION_MASK_ALTIVEC,
38355 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
38356 : CODE_FOR_altivec_vmrglb_direct),
38357 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
38358 { OPTION_MASK_ALTIVEC,
38359 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
38360 : CODE_FOR_altivec_vmrglh_direct),
38361 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
38362 { OPTION_MASK_ALTIVEC,
38363 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
38364 : CODE_FOR_altivec_vmrglw_direct),
38365 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
38366 { OPTION_MASK_ALTIVEC,
38367 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
38368 : CODE_FOR_altivec_vmrghb_direct),
38369 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
38370 { OPTION_MASK_ALTIVEC,
38371 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
38372 : CODE_FOR_altivec_vmrghh_direct),
38373 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
38374 { OPTION_MASK_ALTIVEC,
38375 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
38376 : CODE_FOR_altivec_vmrghw_direct),
38377 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
38378 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
38379 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
38380 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
38381 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
38382 };
38383
38384 unsigned int i, j, elt, which;
38385 unsigned char perm[16];
38386 rtx target, op0, op1, sel, x;
38387 bool one_vec;
38388
38389 target = operands[0];
38390 op0 = operands[1];
38391 op1 = operands[2];
38392 sel = operands[3];
38393
38394 /* Unpack the constant selector. */
38395 for (i = which = 0; i < 16; ++i)
38396 {
38397 rtx e = XVECEXP (sel, 0, i);
38398 elt = INTVAL (e) & 31;
38399 which |= (elt < 16 ? 1 : 2);
38400 perm[i] = elt;
38401 }
38402
38403 /* Simplify the constant selector based on operands. */
38404 switch (which)
38405 {
38406 default:
38407 gcc_unreachable ();
38408
38409 case 3:
38410 one_vec = false;
38411 if (!rtx_equal_p (op0, op1))
38412 break;
38413 /* FALLTHRU */
38414
38415 case 2:
38416 for (i = 0; i < 16; ++i)
38417 perm[i] &= 15;
38418 op0 = op1;
38419 one_vec = true;
38420 break;
38421
38422 case 1:
38423 op1 = op0;
38424 one_vec = true;
38425 break;
38426 }
38427
38428 /* Look for splat patterns. */
38429 if (one_vec)
38430 {
38431 elt = perm[0];
38432
38433 for (i = 0; i < 16; ++i)
38434 if (perm[i] != elt)
38435 break;
38436 if (i == 16)
38437 {
38438 if (!BYTES_BIG_ENDIAN)
38439 elt = 15 - elt;
38440 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
38441 return true;
38442 }
38443
38444 if (elt % 2 == 0)
38445 {
38446 for (i = 0; i < 16; i += 2)
38447 if (perm[i] != elt || perm[i + 1] != elt + 1)
38448 break;
38449 if (i == 16)
38450 {
38451 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
38452 x = gen_reg_rtx (V8HImode);
38453 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
38454 GEN_INT (field)));
38455 emit_move_insn (target, gen_lowpart (V16QImode, x));
38456 return true;
38457 }
38458 }
38459
38460 if (elt % 4 == 0)
38461 {
38462 for (i = 0; i < 16; i += 4)
38463 if (perm[i] != elt
38464 || perm[i + 1] != elt + 1
38465 || perm[i + 2] != elt + 2
38466 || perm[i + 3] != elt + 3)
38467 break;
38468 if (i == 16)
38469 {
38470 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
38471 x = gen_reg_rtx (V4SImode);
38472 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
38473 GEN_INT (field)));
38474 emit_move_insn (target, gen_lowpart (V16QImode, x));
38475 return true;
38476 }
38477 }
38478 }
38479
38480 /* Look for merge and pack patterns. */
38481 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
38482 {
38483 bool swapped;
38484
38485 if ((patterns[j].mask & rs6000_isa_flags) == 0)
38486 continue;
38487
38488 elt = patterns[j].perm[0];
38489 if (perm[0] == elt)
38490 swapped = false;
38491 else if (perm[0] == elt + 16)
38492 swapped = true;
38493 else
38494 continue;
38495 for (i = 1; i < 16; ++i)
38496 {
38497 elt = patterns[j].perm[i];
38498 if (swapped)
38499 elt = (elt >= 16 ? elt - 16 : elt + 16);
38500 else if (one_vec && elt >= 16)
38501 elt -= 16;
38502 if (perm[i] != elt)
38503 break;
38504 }
38505 if (i == 16)
38506 {
38507 enum insn_code icode = patterns[j].impl;
38508 machine_mode omode = insn_data[icode].operand[0].mode;
38509 machine_mode imode = insn_data[icode].operand[1].mode;
38510
38511 /* For little-endian, don't use vpkuwum and vpkuhum if the
38512 underlying vector type is not V4SI and V8HI, respectively.
38513 For example, using vpkuwum with a V8HI picks up the even
38514 halfwords (BE numbering) when the even halfwords (LE
38515 numbering) are what we need. */
38516 if (!BYTES_BIG_ENDIAN
38517 && icode == CODE_FOR_altivec_vpkuwum_direct
38518 && ((GET_CODE (op0) == REG
38519 && GET_MODE (op0) != V4SImode)
38520 || (GET_CODE (op0) == SUBREG
38521 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
38522 continue;
38523 if (!BYTES_BIG_ENDIAN
38524 && icode == CODE_FOR_altivec_vpkuhum_direct
38525 && ((GET_CODE (op0) == REG
38526 && GET_MODE (op0) != V8HImode)
38527 || (GET_CODE (op0) == SUBREG
38528 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
38529 continue;
38530
38531 /* For little-endian, the two input operands must be swapped
38532 (or swapped back) to ensure proper right-to-left numbering
38533 from 0 to 2N-1. */
38534 if (swapped ^ !BYTES_BIG_ENDIAN)
38535 std::swap (op0, op1);
38536 if (imode != V16QImode)
38537 {
38538 op0 = gen_lowpart (imode, op0);
38539 op1 = gen_lowpart (imode, op1);
38540 }
38541 if (omode == V16QImode)
38542 x = target;
38543 else
38544 x = gen_reg_rtx (omode);
38545 emit_insn (GEN_FCN (icode) (x, op0, op1));
38546 if (omode != V16QImode)
38547 emit_move_insn (target, gen_lowpart (V16QImode, x));
38548 return true;
38549 }
38550 }
38551
38552 if (!BYTES_BIG_ENDIAN)
38553 {
38554 altivec_expand_vec_perm_const_le (operands);
38555 return true;
38556 }
38557
38558 return false;
38559 }
38560
38561 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
38562 Return true if we match an efficient implementation. */
38563
38564 static bool
38565 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
38566 unsigned char perm0, unsigned char perm1)
38567 {
38568 rtx x;
38569
38570 /* If both selectors come from the same operand, fold to single op. */
38571 if ((perm0 & 2) == (perm1 & 2))
38572 {
38573 if (perm0 & 2)
38574 op0 = op1;
38575 else
38576 op1 = op0;
38577 }
38578 /* If both operands are equal, fold to simpler permutation. */
38579 if (rtx_equal_p (op0, op1))
38580 {
38581 perm0 = perm0 & 1;
38582 perm1 = (perm1 & 1) + 2;
38583 }
38584 /* If the first selector comes from the second operand, swap. */
38585 else if (perm0 & 2)
38586 {
38587 if (perm1 & 2)
38588 return false;
38589 perm0 -= 2;
38590 perm1 += 2;
38591 std::swap (op0, op1);
38592 }
38593 /* If the second selector does not come from the second operand, fail. */
38594 else if ((perm1 & 2) == 0)
38595 return false;
38596
38597 /* Success! */
38598 if (target != NULL)
38599 {
38600 machine_mode vmode, dmode;
38601 rtvec v;
38602
38603 vmode = GET_MODE (target);
38604 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
38605 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
38606 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
38607 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
38608 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
38609 emit_insn (gen_rtx_SET (target, x));
38610 }
38611 return true;
38612 }
38613
38614 bool
38615 rs6000_expand_vec_perm_const (rtx operands[4])
38616 {
38617 rtx target, op0, op1, sel;
38618 unsigned char perm0, perm1;
38619
38620 target = operands[0];
38621 op0 = operands[1];
38622 op1 = operands[2];
38623 sel = operands[3];
38624
38625 /* Unpack the constant selector. */
38626 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
38627 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
38628
38629 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
38630 }
38631
38632 /* Test whether a constant permutation is supported. */
38633
38634 static bool
38635 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
38636 const unsigned char *sel)
38637 {
38638 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
38639 if (TARGET_ALTIVEC)
38640 return true;
38641
38642 /* Check for ps_merge* or evmerge* insns. */
38643 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
38644 || (TARGET_SPE && vmode == V2SImode))
38645 {
38646 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
38647 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
38648 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
38649 }
38650
38651 return false;
38652 }
38653
38654 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
38655
38656 static void
38657 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
38658 machine_mode vmode, unsigned nelt, rtx perm[])
38659 {
38660 machine_mode imode;
38661 rtx x;
38662
38663 imode = vmode;
38664 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
38665 imode = mode_for_vector
38666 (int_mode_for_mode (GET_MODE_INNER (vmode)).require (), nelt);
38667
38668 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
38669 x = expand_vec_perm (vmode, op0, op1, x, target);
38670 if (x != target)
38671 emit_move_insn (target, x);
38672 }
38673
38674 /* Expand an extract even operation. */
38675
38676 void
38677 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
38678 {
38679 machine_mode vmode = GET_MODE (target);
38680 unsigned i, nelt = GET_MODE_NUNITS (vmode);
38681 rtx perm[16];
38682
38683 for (i = 0; i < nelt; i++)
38684 perm[i] = GEN_INT (i * 2);
38685
38686 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38687 }
38688
38689 /* Expand a vector interleave operation. */
38690
38691 void
38692 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
38693 {
38694 machine_mode vmode = GET_MODE (target);
38695 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
38696 rtx perm[16];
38697
38698 high = (highp ? 0 : nelt / 2);
38699 for (i = 0; i < nelt / 2; i++)
38700 {
38701 perm[i * 2] = GEN_INT (i + high);
38702 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
38703 }
38704
38705 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
38706 }
38707
38708 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
38709 void
38710 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
38711 {
38712 HOST_WIDE_INT hwi_scale (scale);
38713 REAL_VALUE_TYPE r_pow;
38714 rtvec v = rtvec_alloc (2);
38715 rtx elt;
38716 rtx scale_vec = gen_reg_rtx (V2DFmode);
38717 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
38718 elt = const_double_from_real_value (r_pow, DFmode);
38719 RTVEC_ELT (v, 0) = elt;
38720 RTVEC_ELT (v, 1) = elt;
38721 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
38722 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
38723 }
38724
38725 /* Return an RTX representing where to find the function value of a
38726 function returning MODE. */
38727 static rtx
38728 rs6000_complex_function_value (machine_mode mode)
38729 {
38730 unsigned int regno;
38731 rtx r1, r2;
38732 machine_mode inner = GET_MODE_INNER (mode);
38733 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
38734
38735 if (TARGET_FLOAT128_TYPE
38736 && (mode == KCmode
38737 || (mode == TCmode && TARGET_IEEEQUAD)))
38738 regno = ALTIVEC_ARG_RETURN;
38739
38740 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38741 regno = FP_ARG_RETURN;
38742
38743 else
38744 {
38745 regno = GP_ARG_RETURN;
38746
38747 /* 32-bit is OK since it'll go in r3/r4. */
38748 if (TARGET_32BIT && inner_bytes >= 4)
38749 return gen_rtx_REG (mode, regno);
38750 }
38751
38752 if (inner_bytes >= 8)
38753 return gen_rtx_REG (mode, regno);
38754
38755 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
38756 const0_rtx);
38757 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
38758 GEN_INT (inner_bytes));
38759 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
38760 }
38761
38762 /* Return an rtx describing a return value of MODE as a PARALLEL
38763 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
38764 stride REG_STRIDE. */
38765
38766 static rtx
38767 rs6000_parallel_return (machine_mode mode,
38768 int n_elts, machine_mode elt_mode,
38769 unsigned int regno, unsigned int reg_stride)
38770 {
38771 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
38772
38773 int i;
38774 for (i = 0; i < n_elts; i++)
38775 {
38776 rtx r = gen_rtx_REG (elt_mode, regno);
38777 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
38778 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
38779 regno += reg_stride;
38780 }
38781
38782 return par;
38783 }
38784
38785 /* Target hook for TARGET_FUNCTION_VALUE.
38786
38787 On the SPE, both FPs and vectors are returned in r3.
38788
38789 On RS/6000 an integer value is in r3 and a floating-point value is in
38790 fp1, unless -msoft-float. */
38791
38792 static rtx
38793 rs6000_function_value (const_tree valtype,
38794 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
38795 bool outgoing ATTRIBUTE_UNUSED)
38796 {
38797 machine_mode mode;
38798 unsigned int regno;
38799 machine_mode elt_mode;
38800 int n_elts;
38801
38802 /* Special handling for structs in darwin64. */
38803 if (TARGET_MACHO
38804 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
38805 {
38806 CUMULATIVE_ARGS valcum;
38807 rtx valret;
38808
38809 valcum.words = 0;
38810 valcum.fregno = FP_ARG_MIN_REG;
38811 valcum.vregno = ALTIVEC_ARG_MIN_REG;
38812 /* Do a trial code generation as if this were going to be passed as
38813 an argument; if any part goes in memory, we return NULL. */
38814 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
38815 if (valret)
38816 return valret;
38817 /* Otherwise fall through to standard ABI rules. */
38818 }
38819
38820 mode = TYPE_MODE (valtype);
38821
38822 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
38823 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
38824 {
38825 int first_reg, n_regs;
38826
38827 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
38828 {
38829 /* _Decimal128 must use even/odd register pairs. */
38830 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38831 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
38832 }
38833 else
38834 {
38835 first_reg = ALTIVEC_ARG_RETURN;
38836 n_regs = 1;
38837 }
38838
38839 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
38840 }
38841
38842 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
38843 if (TARGET_32BIT && TARGET_POWERPC64)
38844 switch (mode)
38845 {
38846 default:
38847 break;
38848 case E_DImode:
38849 case E_SCmode:
38850 case E_DCmode:
38851 case E_TCmode:
38852 int count = GET_MODE_SIZE (mode) / 4;
38853 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
38854 }
38855
38856 if ((INTEGRAL_TYPE_P (valtype)
38857 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
38858 || POINTER_TYPE_P (valtype))
38859 mode = TARGET_32BIT ? SImode : DImode;
38860
38861 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38862 /* _Decimal128 must use an even/odd register pair. */
38863 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38864 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
38865 && !FLOAT128_VECTOR_P (mode)
38866 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
38867 regno = FP_ARG_RETURN;
38868 else if (TREE_CODE (valtype) == COMPLEX_TYPE
38869 && targetm.calls.split_complex_arg)
38870 return rs6000_complex_function_value (mode);
38871 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38872 return register is used in both cases, and we won't see V2DImode/V2DFmode
38873 for pure altivec, combine the two cases. */
38874 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
38875 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
38876 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
38877 regno = ALTIVEC_ARG_RETURN;
38878 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38879 && (mode == DFmode || mode == DCmode
38880 || FLOAT128_IBM_P (mode) || mode == TCmode))
38881 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38882 else
38883 regno = GP_ARG_RETURN;
38884
38885 return gen_rtx_REG (mode, regno);
38886 }
38887
38888 /* Define how to find the value returned by a library function
38889 assuming the value has mode MODE. */
38890 rtx
38891 rs6000_libcall_value (machine_mode mode)
38892 {
38893 unsigned int regno;
38894
38895 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
38896 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
38897 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
38898
38899 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
38900 /* _Decimal128 must use an even/odd register pair. */
38901 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
38902 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
38903 && TARGET_HARD_FLOAT && TARGET_FPRS
38904 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
38905 regno = FP_ARG_RETURN;
38906 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
38907 return register is used in both cases, and we won't see V2DImode/V2DFmode
38908 for pure altivec, combine the two cases. */
38909 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
38910 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
38911 regno = ALTIVEC_ARG_RETURN;
38912 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
38913 return rs6000_complex_function_value (mode);
38914 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
38915 && (mode == DFmode || mode == DCmode
38916 || FLOAT128_IBM_P (mode) || mode == TCmode))
38917 return spe_build_register_parallel (mode, GP_ARG_RETURN);
38918 else
38919 regno = GP_ARG_RETURN;
38920
38921 return gen_rtx_REG (mode, regno);
38922 }
38923
38924
38925 /* Return true if we use LRA instead of reload pass. */
38926 static bool
38927 rs6000_lra_p (void)
38928 {
38929 return TARGET_LRA;
38930 }
38931
38932 /* Compute register pressure classes. We implement the target hook to avoid
38933 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
38934 lead to incorrect estimates of number of available registers and therefor
38935 increased register pressure/spill. */
38936 static int
38937 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
38938 {
38939 int n;
38940
38941 n = 0;
38942 pressure_classes[n++] = GENERAL_REGS;
38943 if (TARGET_VSX)
38944 pressure_classes[n++] = VSX_REGS;
38945 else
38946 {
38947 if (TARGET_ALTIVEC)
38948 pressure_classes[n++] = ALTIVEC_REGS;
38949 if (TARGET_HARD_FLOAT && TARGET_FPRS)
38950 pressure_classes[n++] = FLOAT_REGS;
38951 }
38952 pressure_classes[n++] = CR_REGS;
38953 pressure_classes[n++] = SPECIAL_REGS;
38954
38955 return n;
38956 }
38957
38958 /* Given FROM and TO register numbers, say whether this elimination is allowed.
38959 Frame pointer elimination is automatically handled.
38960
38961 For the RS/6000, if frame pointer elimination is being done, we would like
38962 to convert ap into fp, not sp.
38963
38964 We need r30 if -mminimal-toc was specified, and there are constant pool
38965 references. */
38966
38967 static bool
38968 rs6000_can_eliminate (const int from, const int to)
38969 {
38970 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
38971 ? ! frame_pointer_needed
38972 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
38973 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
38974 || constant_pool_empty_p ()
38975 : true);
38976 }
38977
38978 /* Define the offset between two registers, FROM to be eliminated and its
38979 replacement TO, at the start of a routine. */
38980 HOST_WIDE_INT
38981 rs6000_initial_elimination_offset (int from, int to)
38982 {
38983 rs6000_stack_t *info = rs6000_stack_info ();
38984 HOST_WIDE_INT offset;
38985
38986 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38987 offset = info->push_p ? 0 : -info->total_size;
38988 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
38989 {
38990 offset = info->push_p ? 0 : -info->total_size;
38991 if (FRAME_GROWS_DOWNWARD)
38992 offset += info->fixed_size + info->vars_size + info->parm_size;
38993 }
38994 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38995 offset = FRAME_GROWS_DOWNWARD
38996 ? info->fixed_size + info->vars_size + info->parm_size
38997 : 0;
38998 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
38999 offset = info->total_size;
39000 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
39001 offset = info->push_p ? info->total_size : 0;
39002 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
39003 offset = 0;
39004 else
39005 gcc_unreachable ();
39006
39007 return offset;
39008 }
39009
39010 static rtx
39011 rs6000_dwarf_register_span (rtx reg)
39012 {
39013 rtx parts[8];
39014 int i, words;
39015 unsigned regno = REGNO (reg);
39016 machine_mode mode = GET_MODE (reg);
39017
39018 if (TARGET_SPE
39019 && regno < 32
39020 && (SPE_VECTOR_MODE (GET_MODE (reg))
39021 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
39022 && mode != SFmode && mode != SDmode && mode != SCmode)))
39023 ;
39024 else
39025 return NULL_RTX;
39026
39027 regno = REGNO (reg);
39028
39029 /* The duality of the SPE register size wreaks all kinds of havoc.
39030 This is a way of distinguishing r0 in 32-bits from r0 in
39031 64-bits. */
39032 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
39033 gcc_assert (words <= 4);
39034 for (i = 0; i < words; i++, regno++)
39035 {
39036 if (BYTES_BIG_ENDIAN)
39037 {
39038 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39039 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
39040 }
39041 else
39042 {
39043 parts[2 * i] = gen_rtx_REG (SImode, regno);
39044 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
39045 }
39046 }
39047
39048 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
39049 }
39050
39051 /* Fill in sizes for SPE register high parts in table used by unwinder. */
39052
39053 static void
39054 rs6000_init_dwarf_reg_sizes_extra (tree address)
39055 {
39056 if (TARGET_SPE)
39057 {
39058 int i;
39059 machine_mode mode = TYPE_MODE (char_type_node);
39060 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39061 rtx mem = gen_rtx_MEM (BLKmode, addr);
39062 rtx value = gen_int_mode (4, mode);
39063
39064 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
39065 {
39066 int column = DWARF_REG_TO_UNWIND_COLUMN
39067 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39068 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39069
39070 emit_move_insn (adjust_address (mem, mode, offset), value);
39071 }
39072 }
39073
39074 if (TARGET_MACHO && ! TARGET_ALTIVEC)
39075 {
39076 int i;
39077 machine_mode mode = TYPE_MODE (char_type_node);
39078 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
39079 rtx mem = gen_rtx_MEM (BLKmode, addr);
39080 rtx value = gen_int_mode (16, mode);
39081
39082 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
39083 The unwinder still needs to know the size of Altivec registers. */
39084
39085 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
39086 {
39087 int column = DWARF_REG_TO_UNWIND_COLUMN
39088 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
39089 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
39090
39091 emit_move_insn (adjust_address (mem, mode, offset), value);
39092 }
39093 }
39094 }
39095
39096 /* Map internal gcc register numbers to debug format register numbers.
39097 FORMAT specifies the type of debug register number to use:
39098 0 -- debug information, except for frame-related sections
39099 1 -- DWARF .debug_frame section
39100 2 -- DWARF .eh_frame section */
39101
39102 unsigned int
39103 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
39104 {
39105 /* We never use the GCC internal number for SPE high registers.
39106 Those are mapped to the 1200..1231 range for all debug formats. */
39107 if (SPE_HIGH_REGNO_P (regno))
39108 return regno - FIRST_SPE_HIGH_REGNO + 1200;
39109
39110 /* Except for the above, we use the internal number for non-DWARF
39111 debug information, and also for .eh_frame. */
39112 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
39113 return regno;
39114
39115 /* On some platforms, we use the standard DWARF register
39116 numbering for .debug_info and .debug_frame. */
39117 #ifdef RS6000_USE_DWARF_NUMBERING
39118 if (regno <= 63)
39119 return regno;
39120 if (regno == LR_REGNO)
39121 return 108;
39122 if (regno == CTR_REGNO)
39123 return 109;
39124 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
39125 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
39126 The actual code emitted saves the whole of CR, so we map CR2_REGNO
39127 to the DWARF reg for CR. */
39128 if (format == 1 && regno == CR2_REGNO)
39129 return 64;
39130 if (CR_REGNO_P (regno))
39131 return regno - CR0_REGNO + 86;
39132 if (regno == CA_REGNO)
39133 return 101; /* XER */
39134 if (ALTIVEC_REGNO_P (regno))
39135 return regno - FIRST_ALTIVEC_REGNO + 1124;
39136 if (regno == VRSAVE_REGNO)
39137 return 356;
39138 if (regno == VSCR_REGNO)
39139 return 67;
39140 if (regno == SPE_ACC_REGNO)
39141 return 99;
39142 if (regno == SPEFSCR_REGNO)
39143 return 612;
39144 #endif
39145 return regno;
39146 }
39147
39148 /* target hook eh_return_filter_mode */
39149 static scalar_int_mode
39150 rs6000_eh_return_filter_mode (void)
39151 {
39152 return TARGET_32BIT ? SImode : word_mode;
39153 }
39154
39155 /* Target hook for scalar_mode_supported_p. */
39156 static bool
39157 rs6000_scalar_mode_supported_p (scalar_mode mode)
39158 {
39159 /* -m32 does not support TImode. This is the default, from
39160 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
39161 same ABI as for -m32. But default_scalar_mode_supported_p allows
39162 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
39163 for -mpowerpc64. */
39164 if (TARGET_32BIT && mode == TImode)
39165 return false;
39166
39167 if (DECIMAL_FLOAT_MODE_P (mode))
39168 return default_decimal_float_supported_p ();
39169 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
39170 return true;
39171 else
39172 return default_scalar_mode_supported_p (mode);
39173 }
39174
39175 /* Target hook for vector_mode_supported_p. */
39176 static bool
39177 rs6000_vector_mode_supported_p (machine_mode mode)
39178 {
39179
39180 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
39181 return true;
39182
39183 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
39184 return true;
39185
39186 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
39187 128-bit, the compiler might try to widen IEEE 128-bit to IBM
39188 double-double. */
39189 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
39190 return true;
39191
39192 else
39193 return false;
39194 }
39195
39196 /* Target hook for floatn_mode. */
39197 static opt_scalar_float_mode
39198 rs6000_floatn_mode (int n, bool extended)
39199 {
39200 if (extended)
39201 {
39202 switch (n)
39203 {
39204 case 32:
39205 return DFmode;
39206
39207 case 64:
39208 if (TARGET_FLOAT128_KEYWORD)
39209 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39210 else
39211 return opt_scalar_float_mode ();
39212
39213 case 128:
39214 return opt_scalar_float_mode ();
39215
39216 default:
39217 /* Those are the only valid _FloatNx types. */
39218 gcc_unreachable ();
39219 }
39220 }
39221 else
39222 {
39223 switch (n)
39224 {
39225 case 32:
39226 return SFmode;
39227
39228 case 64:
39229 return DFmode;
39230
39231 case 128:
39232 if (TARGET_FLOAT128_KEYWORD)
39233 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39234 else
39235 return opt_scalar_float_mode ();
39236
39237 default:
39238 return opt_scalar_float_mode ();
39239 }
39240 }
39241
39242 }
39243
39244 /* Target hook for c_mode_for_suffix. */
39245 static machine_mode
39246 rs6000_c_mode_for_suffix (char suffix)
39247 {
39248 if (TARGET_FLOAT128_TYPE)
39249 {
39250 if (suffix == 'q' || suffix == 'Q')
39251 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
39252
39253 /* At the moment, we are not defining a suffix for IBM extended double.
39254 If/when the default for -mabi=ieeelongdouble is changed, and we want
39255 to support __ibm128 constants in legacy library code, we may need to
39256 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
39257 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
39258 __float80 constants. */
39259 }
39260
39261 return VOIDmode;
39262 }
39263
39264 /* Target hook for invalid_arg_for_unprototyped_fn. */
39265 static const char *
39266 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
39267 {
39268 return (!rs6000_darwin64_abi
39269 && typelist == 0
39270 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
39271 && (funcdecl == NULL_TREE
39272 || (TREE_CODE (funcdecl) == FUNCTION_DECL
39273 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
39274 ? N_("AltiVec argument passed to unprototyped function")
39275 : NULL;
39276 }
39277
39278 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
39279 setup by using __stack_chk_fail_local hidden function instead of
39280 calling __stack_chk_fail directly. Otherwise it is better to call
39281 __stack_chk_fail directly. */
39282
39283 static tree ATTRIBUTE_UNUSED
39284 rs6000_stack_protect_fail (void)
39285 {
39286 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
39287 ? default_hidden_stack_protect_fail ()
39288 : default_external_stack_protect_fail ();
39289 }
39290
39291 void
39292 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
39293 int num_operands ATTRIBUTE_UNUSED)
39294 {
39295 if (rs6000_warn_cell_microcode)
39296 {
39297 const char *temp;
39298 int insn_code_number = recog_memoized (insn);
39299 location_t location = INSN_LOCATION (insn);
39300
39301 /* Punt on insns we cannot recognize. */
39302 if (insn_code_number < 0)
39303 return;
39304
39305 /* get_insn_template can modify recog_data, so save and restore it. */
39306 struct recog_data_d recog_data_save = recog_data;
39307 for (int i = 0; i < recog_data.n_operands; i++)
39308 recog_data.operand[i] = copy_rtx (recog_data.operand[i]);
39309 temp = get_insn_template (insn_code_number, insn);
39310 recog_data = recog_data_save;
39311
39312 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
39313 warning_at (location, OPT_mwarn_cell_microcode,
39314 "emitting microcode insn %s\t[%s] #%d",
39315 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39316 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
39317 warning_at (location, OPT_mwarn_cell_microcode,
39318 "emitting conditional microcode insn %s\t[%s] #%d",
39319 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
39320 }
39321 }
39322
39323 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
39324
39325 #if TARGET_ELF
39326 static unsigned HOST_WIDE_INT
39327 rs6000_asan_shadow_offset (void)
39328 {
39329 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
39330 }
39331 #endif
39332 \f
39333 /* Mask options that we want to support inside of attribute((target)) and
39334 #pragma GCC target operations. Note, we do not include things like
39335 64/32-bit, endianness, hard/soft floating point, etc. that would have
39336 different calling sequences. */
39337
39338 struct rs6000_opt_mask {
39339 const char *name; /* option name */
39340 HOST_WIDE_INT mask; /* mask to set */
39341 bool invert; /* invert sense of mask */
39342 bool valid_target; /* option is a target option */
39343 };
39344
39345 static struct rs6000_opt_mask const rs6000_opt_masks[] =
39346 {
39347 { "altivec", OPTION_MASK_ALTIVEC, false, true },
39348 { "cmpb", OPTION_MASK_CMPB, false, true },
39349 { "crypto", OPTION_MASK_CRYPTO, false, true },
39350 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
39351 { "dlmzb", OPTION_MASK_DLMZB, false, true },
39352 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
39353 false, true },
39354 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
39355 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
39356 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
39357 { "fprnd", OPTION_MASK_FPRND, false, true },
39358 { "hard-dfp", OPTION_MASK_DFP, false, true },
39359 { "htm", OPTION_MASK_HTM, false, true },
39360 { "isel", OPTION_MASK_ISEL, false, true },
39361 { "mfcrf", OPTION_MASK_MFCRF, false, true },
39362 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
39363 { "modulo", OPTION_MASK_MODULO, false, true },
39364 { "mulhw", OPTION_MASK_MULHW, false, true },
39365 { "multiple", OPTION_MASK_MULTIPLE, false, true },
39366 { "popcntb", OPTION_MASK_POPCNTB, false, true },
39367 { "popcntd", OPTION_MASK_POPCNTD, false, true },
39368 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
39369 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
39370 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
39371 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
39372 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
39373 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
39374 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
39375 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
39376 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
39377 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
39378 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
39379 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
39380 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
39381 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
39382 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
39383 { "string", OPTION_MASK_STRING, false, true },
39384 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
39385 { "update", OPTION_MASK_NO_UPDATE, true , true },
39386 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
39387 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
39388 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
39389 { "vsx", OPTION_MASK_VSX, false, true },
39390 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
39391 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
39392 #ifdef OPTION_MASK_64BIT
39393 #if TARGET_AIX_OS
39394 { "aix64", OPTION_MASK_64BIT, false, false },
39395 { "aix32", OPTION_MASK_64BIT, true, false },
39396 #else
39397 { "64", OPTION_MASK_64BIT, false, false },
39398 { "32", OPTION_MASK_64BIT, true, false },
39399 #endif
39400 #endif
39401 #ifdef OPTION_MASK_EABI
39402 { "eabi", OPTION_MASK_EABI, false, false },
39403 #endif
39404 #ifdef OPTION_MASK_LITTLE_ENDIAN
39405 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
39406 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
39407 #endif
39408 #ifdef OPTION_MASK_RELOCATABLE
39409 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
39410 #endif
39411 #ifdef OPTION_MASK_STRICT_ALIGN
39412 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
39413 #endif
39414 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
39415 { "string", OPTION_MASK_STRING, false, false },
39416 };
39417
39418 /* Builtin mask mapping for printing the flags. */
39419 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
39420 {
39421 { "altivec", RS6000_BTM_ALTIVEC, false, false },
39422 { "vsx", RS6000_BTM_VSX, false, false },
39423 { "spe", RS6000_BTM_SPE, false, false },
39424 { "paired", RS6000_BTM_PAIRED, false, false },
39425 { "fre", RS6000_BTM_FRE, false, false },
39426 { "fres", RS6000_BTM_FRES, false, false },
39427 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
39428 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
39429 { "popcntd", RS6000_BTM_POPCNTD, false, false },
39430 { "cell", RS6000_BTM_CELL, false, false },
39431 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
39432 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
39433 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
39434 { "crypto", RS6000_BTM_CRYPTO, false, false },
39435 { "htm", RS6000_BTM_HTM, false, false },
39436 { "hard-dfp", RS6000_BTM_DFP, false, false },
39437 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
39438 { "long-double-128", RS6000_BTM_LDBL128, false, false },
39439 { "float128", RS6000_BTM_FLOAT128, false, false },
39440 };
39441
39442 /* Option variables that we want to support inside attribute((target)) and
39443 #pragma GCC target operations. */
39444
39445 struct rs6000_opt_var {
39446 const char *name; /* option name */
39447 size_t global_offset; /* offset of the option in global_options. */
39448 size_t target_offset; /* offset of the option in target options. */
39449 };
39450
39451 static struct rs6000_opt_var const rs6000_opt_vars[] =
39452 {
39453 { "friz",
39454 offsetof (struct gcc_options, x_TARGET_FRIZ),
39455 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
39456 { "avoid-indexed-addresses",
39457 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
39458 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
39459 { "paired",
39460 offsetof (struct gcc_options, x_rs6000_paired_float),
39461 offsetof (struct cl_target_option, x_rs6000_paired_float), },
39462 { "longcall",
39463 offsetof (struct gcc_options, x_rs6000_default_long_calls),
39464 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
39465 { "optimize-swaps",
39466 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
39467 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
39468 { "allow-movmisalign",
39469 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
39470 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
39471 { "allow-df-permute",
39472 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
39473 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
39474 { "sched-groups",
39475 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
39476 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
39477 { "always-hint",
39478 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
39479 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
39480 { "align-branch-targets",
39481 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
39482 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
39483 { "vectorize-builtins",
39484 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
39485 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
39486 { "tls-markers",
39487 offsetof (struct gcc_options, x_tls_markers),
39488 offsetof (struct cl_target_option, x_tls_markers), },
39489 { "sched-prolog",
39490 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39491 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39492 { "sched-epilog",
39493 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
39494 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
39495 { "gen-cell-microcode",
39496 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
39497 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
39498 { "warn-cell-microcode",
39499 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
39500 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
39501 };
39502
39503 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
39504 parsing. Return true if there were no errors. */
39505
39506 static bool
39507 rs6000_inner_target_options (tree args, bool attr_p)
39508 {
39509 bool ret = true;
39510
39511 if (args == NULL_TREE)
39512 ;
39513
39514 else if (TREE_CODE (args) == STRING_CST)
39515 {
39516 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39517 char *q;
39518
39519 while ((q = strtok (p, ",")) != NULL)
39520 {
39521 bool error_p = false;
39522 bool not_valid_p = false;
39523 const char *cpu_opt = NULL;
39524
39525 p = NULL;
39526 if (strncmp (q, "cpu=", 4) == 0)
39527 {
39528 int cpu_index = rs6000_cpu_name_lookup (q+4);
39529 if (cpu_index >= 0)
39530 rs6000_cpu_index = cpu_index;
39531 else
39532 {
39533 error_p = true;
39534 cpu_opt = q+4;
39535 }
39536 }
39537 else if (strncmp (q, "tune=", 5) == 0)
39538 {
39539 int tune_index = rs6000_cpu_name_lookup (q+5);
39540 if (tune_index >= 0)
39541 rs6000_tune_index = tune_index;
39542 else
39543 {
39544 error_p = true;
39545 cpu_opt = q+5;
39546 }
39547 }
39548 else
39549 {
39550 size_t i;
39551 bool invert = false;
39552 char *r = q;
39553
39554 error_p = true;
39555 if (strncmp (r, "no-", 3) == 0)
39556 {
39557 invert = true;
39558 r += 3;
39559 }
39560
39561 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
39562 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
39563 {
39564 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
39565
39566 if (!rs6000_opt_masks[i].valid_target)
39567 not_valid_p = true;
39568 else
39569 {
39570 error_p = false;
39571 rs6000_isa_flags_explicit |= mask;
39572
39573 /* VSX needs altivec, so -mvsx automagically sets
39574 altivec and disables -mavoid-indexed-addresses. */
39575 if (!invert)
39576 {
39577 if (mask == OPTION_MASK_VSX)
39578 {
39579 mask |= OPTION_MASK_ALTIVEC;
39580 TARGET_AVOID_XFORM = 0;
39581 }
39582 }
39583
39584 if (rs6000_opt_masks[i].invert)
39585 invert = !invert;
39586
39587 if (invert)
39588 rs6000_isa_flags &= ~mask;
39589 else
39590 rs6000_isa_flags |= mask;
39591 }
39592 break;
39593 }
39594
39595 if (error_p && !not_valid_p)
39596 {
39597 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
39598 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
39599 {
39600 size_t j = rs6000_opt_vars[i].global_offset;
39601 *((int *) ((char *)&global_options + j)) = !invert;
39602 error_p = false;
39603 not_valid_p = false;
39604 break;
39605 }
39606 }
39607 }
39608
39609 if (error_p)
39610 {
39611 const char *eprefix, *esuffix;
39612
39613 ret = false;
39614 if (attr_p)
39615 {
39616 eprefix = "__attribute__((__target__(";
39617 esuffix = ")))";
39618 }
39619 else
39620 {
39621 eprefix = "#pragma GCC target ";
39622 esuffix = "";
39623 }
39624
39625 if (cpu_opt)
39626 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
39627 q, esuffix);
39628 else if (not_valid_p)
39629 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
39630 else
39631 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
39632 }
39633 }
39634 }
39635
39636 else if (TREE_CODE (args) == TREE_LIST)
39637 {
39638 do
39639 {
39640 tree value = TREE_VALUE (args);
39641 if (value)
39642 {
39643 bool ret2 = rs6000_inner_target_options (value, attr_p);
39644 if (!ret2)
39645 ret = false;
39646 }
39647 args = TREE_CHAIN (args);
39648 }
39649 while (args != NULL_TREE);
39650 }
39651
39652 else
39653 {
39654 error ("attribute %<target%> argument not a string");
39655 return false;
39656 }
39657
39658 return ret;
39659 }
39660
39661 /* Print out the target options as a list for -mdebug=target. */
39662
39663 static void
39664 rs6000_debug_target_options (tree args, const char *prefix)
39665 {
39666 if (args == NULL_TREE)
39667 fprintf (stderr, "%s<NULL>", prefix);
39668
39669 else if (TREE_CODE (args) == STRING_CST)
39670 {
39671 char *p = ASTRDUP (TREE_STRING_POINTER (args));
39672 char *q;
39673
39674 while ((q = strtok (p, ",")) != NULL)
39675 {
39676 p = NULL;
39677 fprintf (stderr, "%s\"%s\"", prefix, q);
39678 prefix = ", ";
39679 }
39680 }
39681
39682 else if (TREE_CODE (args) == TREE_LIST)
39683 {
39684 do
39685 {
39686 tree value = TREE_VALUE (args);
39687 if (value)
39688 {
39689 rs6000_debug_target_options (value, prefix);
39690 prefix = ", ";
39691 }
39692 args = TREE_CHAIN (args);
39693 }
39694 while (args != NULL_TREE);
39695 }
39696
39697 else
39698 gcc_unreachable ();
39699
39700 return;
39701 }
39702
39703 \f
39704 /* Hook to validate attribute((target("..."))). */
39705
39706 static bool
39707 rs6000_valid_attribute_p (tree fndecl,
39708 tree ARG_UNUSED (name),
39709 tree args,
39710 int flags)
39711 {
39712 struct cl_target_option cur_target;
39713 bool ret;
39714 tree old_optimize = build_optimization_node (&global_options);
39715 tree new_target, new_optimize;
39716 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39717
39718 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
39719
39720 if (TARGET_DEBUG_TARGET)
39721 {
39722 tree tname = DECL_NAME (fndecl);
39723 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
39724 if (tname)
39725 fprintf (stderr, "function: %.*s\n",
39726 (int) IDENTIFIER_LENGTH (tname),
39727 IDENTIFIER_POINTER (tname));
39728 else
39729 fprintf (stderr, "function: unknown\n");
39730
39731 fprintf (stderr, "args:");
39732 rs6000_debug_target_options (args, " ");
39733 fprintf (stderr, "\n");
39734
39735 if (flags)
39736 fprintf (stderr, "flags: 0x%x\n", flags);
39737
39738 fprintf (stderr, "--------------------\n");
39739 }
39740
39741 old_optimize = build_optimization_node (&global_options);
39742 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
39743
39744 /* If the function changed the optimization levels as well as setting target
39745 options, start with the optimizations specified. */
39746 if (func_optimize && func_optimize != old_optimize)
39747 cl_optimization_restore (&global_options,
39748 TREE_OPTIMIZATION (func_optimize));
39749
39750 /* The target attributes may also change some optimization flags, so update
39751 the optimization options if necessary. */
39752 cl_target_option_save (&cur_target, &global_options);
39753 rs6000_cpu_index = rs6000_tune_index = -1;
39754 ret = rs6000_inner_target_options (args, true);
39755
39756 /* Set up any additional state. */
39757 if (ret)
39758 {
39759 ret = rs6000_option_override_internal (false);
39760 new_target = build_target_option_node (&global_options);
39761 }
39762 else
39763 new_target = NULL;
39764
39765 new_optimize = build_optimization_node (&global_options);
39766
39767 if (!new_target)
39768 ret = false;
39769
39770 else if (fndecl)
39771 {
39772 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
39773
39774 if (old_optimize != new_optimize)
39775 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
39776 }
39777
39778 cl_target_option_restore (&global_options, &cur_target);
39779
39780 if (old_optimize != new_optimize)
39781 cl_optimization_restore (&global_options,
39782 TREE_OPTIMIZATION (old_optimize));
39783
39784 return ret;
39785 }
39786
39787 \f
39788 /* Hook to validate the current #pragma GCC target and set the state, and
39789 update the macros based on what was changed. If ARGS is NULL, then
39790 POP_TARGET is used to reset the options. */
39791
39792 bool
39793 rs6000_pragma_target_parse (tree args, tree pop_target)
39794 {
39795 tree prev_tree = build_target_option_node (&global_options);
39796 tree cur_tree;
39797 struct cl_target_option *prev_opt, *cur_opt;
39798 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
39799 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
39800
39801 if (TARGET_DEBUG_TARGET)
39802 {
39803 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
39804 fprintf (stderr, "args:");
39805 rs6000_debug_target_options (args, " ");
39806 fprintf (stderr, "\n");
39807
39808 if (pop_target)
39809 {
39810 fprintf (stderr, "pop_target:\n");
39811 debug_tree (pop_target);
39812 }
39813 else
39814 fprintf (stderr, "pop_target: <NULL>\n");
39815
39816 fprintf (stderr, "--------------------\n");
39817 }
39818
39819 if (! args)
39820 {
39821 cur_tree = ((pop_target)
39822 ? pop_target
39823 : target_option_default_node);
39824 cl_target_option_restore (&global_options,
39825 TREE_TARGET_OPTION (cur_tree));
39826 }
39827 else
39828 {
39829 rs6000_cpu_index = rs6000_tune_index = -1;
39830 if (!rs6000_inner_target_options (args, false)
39831 || !rs6000_option_override_internal (false)
39832 || (cur_tree = build_target_option_node (&global_options))
39833 == NULL_TREE)
39834 {
39835 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
39836 fprintf (stderr, "invalid pragma\n");
39837
39838 return false;
39839 }
39840 }
39841
39842 target_option_current_node = cur_tree;
39843
39844 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
39845 change the macros that are defined. */
39846 if (rs6000_target_modify_macros_ptr)
39847 {
39848 prev_opt = TREE_TARGET_OPTION (prev_tree);
39849 prev_bumask = prev_opt->x_rs6000_builtin_mask;
39850 prev_flags = prev_opt->x_rs6000_isa_flags;
39851
39852 cur_opt = TREE_TARGET_OPTION (cur_tree);
39853 cur_flags = cur_opt->x_rs6000_isa_flags;
39854 cur_bumask = cur_opt->x_rs6000_builtin_mask;
39855
39856 diff_bumask = (prev_bumask ^ cur_bumask);
39857 diff_flags = (prev_flags ^ cur_flags);
39858
39859 if ((diff_flags != 0) || (diff_bumask != 0))
39860 {
39861 /* Delete old macros. */
39862 rs6000_target_modify_macros_ptr (false,
39863 prev_flags & diff_flags,
39864 prev_bumask & diff_bumask);
39865
39866 /* Define new macros. */
39867 rs6000_target_modify_macros_ptr (true,
39868 cur_flags & diff_flags,
39869 cur_bumask & diff_bumask);
39870 }
39871 }
39872
39873 return true;
39874 }
39875
39876 \f
39877 /* Remember the last target of rs6000_set_current_function. */
39878 static GTY(()) tree rs6000_previous_fndecl;
39879
39880 /* Establish appropriate back-end context for processing the function
39881 FNDECL. The argument might be NULL to indicate processing at top
39882 level, outside of any function scope. */
39883 static void
39884 rs6000_set_current_function (tree fndecl)
39885 {
39886 tree old_tree = (rs6000_previous_fndecl
39887 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
39888 : NULL_TREE);
39889
39890 tree new_tree = (fndecl
39891 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
39892 : NULL_TREE);
39893
39894 if (TARGET_DEBUG_TARGET)
39895 {
39896 bool print_final = false;
39897 fprintf (stderr, "\n==================== rs6000_set_current_function");
39898
39899 if (fndecl)
39900 fprintf (stderr, ", fndecl %s (%p)",
39901 (DECL_NAME (fndecl)
39902 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
39903 : "<unknown>"), (void *)fndecl);
39904
39905 if (rs6000_previous_fndecl)
39906 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
39907
39908 fprintf (stderr, "\n");
39909 if (new_tree)
39910 {
39911 fprintf (stderr, "\nnew fndecl target specific options:\n");
39912 debug_tree (new_tree);
39913 print_final = true;
39914 }
39915
39916 if (old_tree)
39917 {
39918 fprintf (stderr, "\nold fndecl target specific options:\n");
39919 debug_tree (old_tree);
39920 print_final = true;
39921 }
39922
39923 if (print_final)
39924 fprintf (stderr, "--------------------\n");
39925 }
39926
39927 /* Only change the context if the function changes. This hook is called
39928 several times in the course of compiling a function, and we don't want to
39929 slow things down too much or call target_reinit when it isn't safe. */
39930 if (fndecl && fndecl != rs6000_previous_fndecl)
39931 {
39932 rs6000_previous_fndecl = fndecl;
39933 if (old_tree == new_tree)
39934 ;
39935
39936 else if (new_tree && new_tree != target_option_default_node)
39937 {
39938 cl_target_option_restore (&global_options,
39939 TREE_TARGET_OPTION (new_tree));
39940 if (TREE_TARGET_GLOBALS (new_tree))
39941 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39942 else
39943 TREE_TARGET_GLOBALS (new_tree)
39944 = save_target_globals_default_opts ();
39945 }
39946
39947 else if (old_tree && old_tree != target_option_default_node)
39948 {
39949 new_tree = target_option_current_node;
39950 cl_target_option_restore (&global_options,
39951 TREE_TARGET_OPTION (new_tree));
39952 if (TREE_TARGET_GLOBALS (new_tree))
39953 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
39954 else if (new_tree == target_option_default_node)
39955 restore_target_globals (&default_target_globals);
39956 else
39957 TREE_TARGET_GLOBALS (new_tree)
39958 = save_target_globals_default_opts ();
39959 }
39960 }
39961 }
39962
39963 \f
39964 /* Save the current options */
39965
39966 static void
39967 rs6000_function_specific_save (struct cl_target_option *ptr,
39968 struct gcc_options *opts)
39969 {
39970 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
39971 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
39972 }
39973
39974 /* Restore the current options */
39975
39976 static void
39977 rs6000_function_specific_restore (struct gcc_options *opts,
39978 struct cl_target_option *ptr)
39979
39980 {
39981 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
39982 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
39983 (void) rs6000_option_override_internal (false);
39984 }
39985
39986 /* Print the current options */
39987
39988 static void
39989 rs6000_function_specific_print (FILE *file, int indent,
39990 struct cl_target_option *ptr)
39991 {
39992 rs6000_print_isa_options (file, indent, "Isa options set",
39993 ptr->x_rs6000_isa_flags);
39994
39995 rs6000_print_isa_options (file, indent, "Isa options explicit",
39996 ptr->x_rs6000_isa_flags_explicit);
39997 }
39998
39999 /* Helper function to print the current isa or misc options on a line. */
40000
40001 static void
40002 rs6000_print_options_internal (FILE *file,
40003 int indent,
40004 const char *string,
40005 HOST_WIDE_INT flags,
40006 const char *prefix,
40007 const struct rs6000_opt_mask *opts,
40008 size_t num_elements)
40009 {
40010 size_t i;
40011 size_t start_column = 0;
40012 size_t cur_column;
40013 size_t max_column = 120;
40014 size_t prefix_len = strlen (prefix);
40015 size_t comma_len = 0;
40016 const char *comma = "";
40017
40018 if (indent)
40019 start_column += fprintf (file, "%*s", indent, "");
40020
40021 if (!flags)
40022 {
40023 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
40024 return;
40025 }
40026
40027 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
40028
40029 /* Print the various mask options. */
40030 cur_column = start_column;
40031 for (i = 0; i < num_elements; i++)
40032 {
40033 bool invert = opts[i].invert;
40034 const char *name = opts[i].name;
40035 const char *no_str = "";
40036 HOST_WIDE_INT mask = opts[i].mask;
40037 size_t len = comma_len + prefix_len + strlen (name);
40038
40039 if (!invert)
40040 {
40041 if ((flags & mask) == 0)
40042 {
40043 no_str = "no-";
40044 len += sizeof ("no-") - 1;
40045 }
40046
40047 flags &= ~mask;
40048 }
40049
40050 else
40051 {
40052 if ((flags & mask) != 0)
40053 {
40054 no_str = "no-";
40055 len += sizeof ("no-") - 1;
40056 }
40057
40058 flags |= mask;
40059 }
40060
40061 cur_column += len;
40062 if (cur_column > max_column)
40063 {
40064 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
40065 cur_column = start_column + len;
40066 comma = "";
40067 }
40068
40069 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
40070 comma = ", ";
40071 comma_len = sizeof (", ") - 1;
40072 }
40073
40074 fputs ("\n", file);
40075 }
40076
40077 /* Helper function to print the current isa options on a line. */
40078
40079 static void
40080 rs6000_print_isa_options (FILE *file, int indent, const char *string,
40081 HOST_WIDE_INT flags)
40082 {
40083 rs6000_print_options_internal (file, indent, string, flags, "-m",
40084 &rs6000_opt_masks[0],
40085 ARRAY_SIZE (rs6000_opt_masks));
40086 }
40087
40088 static void
40089 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
40090 HOST_WIDE_INT flags)
40091 {
40092 rs6000_print_options_internal (file, indent, string, flags, "",
40093 &rs6000_builtin_mask_names[0],
40094 ARRAY_SIZE (rs6000_builtin_mask_names));
40095 }
40096
40097 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
40098 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
40099 -mvsx-timode, -mupper-regs-df).
40100
40101 If the user used -mno-power8-vector, we need to turn off all of the implicit
40102 ISA 2.07 and 3.0 options that relate to the vector unit.
40103
40104 If the user used -mno-power9-vector, we need to turn off all of the implicit
40105 ISA 3.0 options that relate to the vector unit.
40106
40107 This function does not handle explicit options such as the user specifying
40108 -mdirect-move. These are handled in rs6000_option_override_internal, and
40109 the appropriate error is given if needed.
40110
40111 We return a mask of all of the implicit options that should not be enabled
40112 by default. */
40113
40114 static HOST_WIDE_INT
40115 rs6000_disable_incompatible_switches (void)
40116 {
40117 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
40118 size_t i, j;
40119
40120 static const struct {
40121 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
40122 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
40123 const char *const name; /* name of the switch. */
40124 } flags[] = {
40125 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
40126 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
40127 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
40128 };
40129
40130 for (i = 0; i < ARRAY_SIZE (flags); i++)
40131 {
40132 HOST_WIDE_INT no_flag = flags[i].no_flag;
40133
40134 if ((rs6000_isa_flags & no_flag) == 0
40135 && (rs6000_isa_flags_explicit & no_flag) != 0)
40136 {
40137 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
40138 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
40139 & rs6000_isa_flags
40140 & dep_flags);
40141
40142 if (set_flags)
40143 {
40144 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
40145 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
40146 {
40147 set_flags &= ~rs6000_opt_masks[j].mask;
40148 error ("-mno-%s turns off -m%s",
40149 flags[i].name,
40150 rs6000_opt_masks[j].name);
40151 }
40152
40153 gcc_assert (!set_flags);
40154 }
40155
40156 rs6000_isa_flags &= ~dep_flags;
40157 ignore_masks |= no_flag | dep_flags;
40158 }
40159 }
40160
40161 if (!TARGET_P9_VECTOR
40162 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
40163 && TARGET_P9_DFORM_BOTH > 0)
40164 {
40165 error ("-mno-power9-vector turns off -mpower9-dform");
40166 TARGET_P9_DFORM_BOTH = 0;
40167 }
40168
40169 return ignore_masks;
40170 }
40171
40172 \f
40173 /* Hook to determine if one function can safely inline another. */
40174
40175 static bool
40176 rs6000_can_inline_p (tree caller, tree callee)
40177 {
40178 bool ret = false;
40179 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
40180 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
40181
40182 /* If callee has no option attributes, then it is ok to inline. */
40183 if (!callee_tree)
40184 ret = true;
40185
40186 /* If caller has no option attributes, but callee does then it is not ok to
40187 inline. */
40188 else if (!caller_tree)
40189 ret = false;
40190
40191 else
40192 {
40193 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
40194 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
40195
40196 /* Callee's options should a subset of the caller's, i.e. a vsx function
40197 can inline an altivec function but a non-vsx function can't inline a
40198 vsx function. */
40199 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
40200 == callee_opts->x_rs6000_isa_flags)
40201 ret = true;
40202 }
40203
40204 if (TARGET_DEBUG_TARGET)
40205 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
40206 (DECL_NAME (caller)
40207 ? IDENTIFIER_POINTER (DECL_NAME (caller))
40208 : "<unknown>"),
40209 (DECL_NAME (callee)
40210 ? IDENTIFIER_POINTER (DECL_NAME (callee))
40211 : "<unknown>"),
40212 (ret ? "can" : "cannot"));
40213
40214 return ret;
40215 }
40216 \f
40217 /* Allocate a stack temp and fixup the address so it meets the particular
40218 memory requirements (either offetable or REG+REG addressing). */
40219
40220 rtx
40221 rs6000_allocate_stack_temp (machine_mode mode,
40222 bool offsettable_p,
40223 bool reg_reg_p)
40224 {
40225 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40226 rtx addr = XEXP (stack, 0);
40227 int strict_p = (reload_in_progress || reload_completed);
40228
40229 if (!legitimate_indirect_address_p (addr, strict_p))
40230 {
40231 if (offsettable_p
40232 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
40233 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40234
40235 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
40236 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
40237 }
40238
40239 return stack;
40240 }
40241
40242 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
40243 to such a form to deal with memory reference instructions like STFIWX that
40244 only take reg+reg addressing. */
40245
40246 rtx
40247 rs6000_address_for_fpconvert (rtx x)
40248 {
40249 int strict_p = (reload_in_progress || reload_completed);
40250 rtx addr;
40251
40252 gcc_assert (MEM_P (x));
40253 addr = XEXP (x, 0);
40254 if (! legitimate_indirect_address_p (addr, strict_p)
40255 && ! legitimate_indexed_address_p (addr, strict_p))
40256 {
40257 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
40258 {
40259 rtx reg = XEXP (addr, 0);
40260 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
40261 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
40262 gcc_assert (REG_P (reg));
40263 emit_insn (gen_add3_insn (reg, reg, size_rtx));
40264 addr = reg;
40265 }
40266 else if (GET_CODE (addr) == PRE_MODIFY)
40267 {
40268 rtx reg = XEXP (addr, 0);
40269 rtx expr = XEXP (addr, 1);
40270 gcc_assert (REG_P (reg));
40271 gcc_assert (GET_CODE (expr) == PLUS);
40272 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
40273 addr = reg;
40274 }
40275
40276 x = replace_equiv_address (x, copy_addr_to_reg (addr));
40277 }
40278
40279 return x;
40280 }
40281
40282 /* Given a memory reference, if it is not in the form for altivec memory
40283 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
40284 convert to the altivec format. */
40285
40286 rtx
40287 rs6000_address_for_altivec (rtx x)
40288 {
40289 gcc_assert (MEM_P (x));
40290 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
40291 {
40292 rtx addr = XEXP (x, 0);
40293 int strict_p = (reload_in_progress || reload_completed);
40294
40295 if (!legitimate_indexed_address_p (addr, strict_p)
40296 && !legitimate_indirect_address_p (addr, strict_p))
40297 addr = copy_to_mode_reg (Pmode, addr);
40298
40299 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
40300 x = change_address (x, GET_MODE (x), addr);
40301 }
40302
40303 return x;
40304 }
40305
40306 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
40307
40308 On the RS/6000, all integer constants are acceptable, most won't be valid
40309 for particular insns, though. Only easy FP constants are acceptable. */
40310
40311 static bool
40312 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
40313 {
40314 if (TARGET_ELF && tls_referenced_p (x))
40315 return false;
40316
40317 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
40318 || GET_MODE (x) == VOIDmode
40319 || (TARGET_POWERPC64 && mode == DImode)
40320 || easy_fp_constant (x, mode)
40321 || easy_vector_constant (x, mode));
40322 }
40323
40324 \f
40325 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
40326
40327 static bool
40328 chain_already_loaded (rtx_insn *last)
40329 {
40330 for (; last != NULL; last = PREV_INSN (last))
40331 {
40332 if (NONJUMP_INSN_P (last))
40333 {
40334 rtx patt = PATTERN (last);
40335
40336 if (GET_CODE (patt) == SET)
40337 {
40338 rtx lhs = XEXP (patt, 0);
40339
40340 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
40341 return true;
40342 }
40343 }
40344 }
40345 return false;
40346 }
40347
40348 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
40349
40350 void
40351 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40352 {
40353 const bool direct_call_p
40354 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
40355 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
40356 rtx toc_load = NULL_RTX;
40357 rtx toc_restore = NULL_RTX;
40358 rtx func_addr;
40359 rtx abi_reg = NULL_RTX;
40360 rtx call[4];
40361 int n_call;
40362 rtx insn;
40363
40364 /* Handle longcall attributes. */
40365 if (INTVAL (cookie) & CALL_LONG)
40366 func_desc = rs6000_longcall_ref (func_desc);
40367
40368 /* Handle indirect calls. */
40369 if (GET_CODE (func_desc) != SYMBOL_REF
40370 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
40371 {
40372 /* Save the TOC into its reserved slot before the call,
40373 and prepare to restore it after the call. */
40374 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
40375 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
40376 rtx stack_toc_mem = gen_frame_mem (Pmode,
40377 gen_rtx_PLUS (Pmode, stack_ptr,
40378 stack_toc_offset));
40379 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
40380 gen_rtvec (1, stack_toc_offset),
40381 UNSPEC_TOCSLOT);
40382 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
40383
40384 /* Can we optimize saving the TOC in the prologue or
40385 do we need to do it at every call? */
40386 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
40387 cfun->machine->save_toc_in_prologue = true;
40388 else
40389 {
40390 MEM_VOLATILE_P (stack_toc_mem) = 1;
40391 emit_move_insn (stack_toc_mem, toc_reg);
40392 }
40393
40394 if (DEFAULT_ABI == ABI_ELFv2)
40395 {
40396 /* A function pointer in the ELFv2 ABI is just a plain address, but
40397 the ABI requires it to be loaded into r12 before the call. */
40398 func_addr = gen_rtx_REG (Pmode, 12);
40399 emit_move_insn (func_addr, func_desc);
40400 abi_reg = func_addr;
40401 }
40402 else
40403 {
40404 /* A function pointer under AIX is a pointer to a data area whose
40405 first word contains the actual address of the function, whose
40406 second word contains a pointer to its TOC, and whose third word
40407 contains a value to place in the static chain register (r11).
40408 Note that if we load the static chain, our "trampoline" need
40409 not have any executable code. */
40410
40411 /* Load up address of the actual function. */
40412 func_desc = force_reg (Pmode, func_desc);
40413 func_addr = gen_reg_rtx (Pmode);
40414 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
40415
40416 /* Prepare to load the TOC of the called function. Note that the
40417 TOC load must happen immediately before the actual call so
40418 that unwinding the TOC registers works correctly. See the
40419 comment in frob_update_context. */
40420 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
40421 rtx func_toc_mem = gen_rtx_MEM (Pmode,
40422 gen_rtx_PLUS (Pmode, func_desc,
40423 func_toc_offset));
40424 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
40425
40426 /* If we have a static chain, load it up. But, if the call was
40427 originally direct, the 3rd word has not been written since no
40428 trampoline has been built, so we ought not to load it, lest we
40429 override a static chain value. */
40430 if (!direct_call_p
40431 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
40432 && !chain_already_loaded (get_current_sequence ()->next->last))
40433 {
40434 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
40435 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
40436 rtx func_sc_mem = gen_rtx_MEM (Pmode,
40437 gen_rtx_PLUS (Pmode, func_desc,
40438 func_sc_offset));
40439 emit_move_insn (sc_reg, func_sc_mem);
40440 abi_reg = sc_reg;
40441 }
40442 }
40443 }
40444 else
40445 {
40446 /* Direct calls use the TOC: for local calls, the callee will
40447 assume the TOC register is set; for non-local calls, the
40448 PLT stub needs the TOC register. */
40449 abi_reg = toc_reg;
40450 func_addr = func_desc;
40451 }
40452
40453 /* Create the call. */
40454 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
40455 if (value != NULL_RTX)
40456 call[0] = gen_rtx_SET (value, call[0]);
40457 n_call = 1;
40458
40459 if (toc_load)
40460 call[n_call++] = toc_load;
40461 if (toc_restore)
40462 call[n_call++] = toc_restore;
40463
40464 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
40465
40466 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
40467 insn = emit_call_insn (insn);
40468
40469 /* Mention all registers defined by the ABI to hold information
40470 as uses in CALL_INSN_FUNCTION_USAGE. */
40471 if (abi_reg)
40472 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
40473 }
40474
40475 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
40476
40477 void
40478 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
40479 {
40480 rtx call[2];
40481 rtx insn;
40482
40483 gcc_assert (INTVAL (cookie) == 0);
40484
40485 /* Create the call. */
40486 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
40487 if (value != NULL_RTX)
40488 call[0] = gen_rtx_SET (value, call[0]);
40489
40490 call[1] = simple_return_rtx;
40491
40492 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
40493 insn = emit_call_insn (insn);
40494
40495 /* Note use of the TOC register. */
40496 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
40497 }
40498
40499 /* Return whether we need to always update the saved TOC pointer when we update
40500 the stack pointer. */
40501
40502 static bool
40503 rs6000_save_toc_in_prologue_p (void)
40504 {
40505 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
40506 }
40507
40508 #ifdef HAVE_GAS_HIDDEN
40509 # define USE_HIDDEN_LINKONCE 1
40510 #else
40511 # define USE_HIDDEN_LINKONCE 0
40512 #endif
40513
40514 /* Fills in the label name that should be used for a 476 link stack thunk. */
40515
40516 void
40517 get_ppc476_thunk_name (char name[32])
40518 {
40519 gcc_assert (TARGET_LINK_STACK);
40520
40521 if (USE_HIDDEN_LINKONCE)
40522 sprintf (name, "__ppc476.get_thunk");
40523 else
40524 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
40525 }
40526
40527 /* This function emits the simple thunk routine that is used to preserve
40528 the link stack on the 476 cpu. */
40529
40530 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
40531 static void
40532 rs6000_code_end (void)
40533 {
40534 char name[32];
40535 tree decl;
40536
40537 if (!TARGET_LINK_STACK)
40538 return;
40539
40540 get_ppc476_thunk_name (name);
40541
40542 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
40543 build_function_type_list (void_type_node, NULL_TREE));
40544 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
40545 NULL_TREE, void_type_node);
40546 TREE_PUBLIC (decl) = 1;
40547 TREE_STATIC (decl) = 1;
40548
40549 #if RS6000_WEAK
40550 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
40551 {
40552 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
40553 targetm.asm_out.unique_section (decl, 0);
40554 switch_to_section (get_named_section (decl, NULL, 0));
40555 DECL_WEAK (decl) = 1;
40556 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
40557 targetm.asm_out.globalize_label (asm_out_file, name);
40558 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
40559 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
40560 }
40561 else
40562 #endif
40563 {
40564 switch_to_section (text_section);
40565 ASM_OUTPUT_LABEL (asm_out_file, name);
40566 }
40567
40568 DECL_INITIAL (decl) = make_node (BLOCK);
40569 current_function_decl = decl;
40570 allocate_struct_function (decl, false);
40571 init_function_start (decl);
40572 first_function_block_is_cold = false;
40573 /* Make sure unwind info is emitted for the thunk if needed. */
40574 final_start_function (emit_barrier (), asm_out_file, 1);
40575
40576 fputs ("\tblr\n", asm_out_file);
40577
40578 final_end_function ();
40579 init_insn_lengths ();
40580 free_after_compilation (cfun);
40581 set_cfun (NULL);
40582 current_function_decl = NULL;
40583 }
40584
40585 /* Add r30 to hard reg set if the prologue sets it up and it is not
40586 pic_offset_table_rtx. */
40587
40588 static void
40589 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
40590 {
40591 if (!TARGET_SINGLE_PIC_BASE
40592 && TARGET_TOC
40593 && TARGET_MINIMAL_TOC
40594 && !constant_pool_empty_p ())
40595 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
40596 if (cfun->machine->split_stack_argp_used)
40597 add_to_hard_reg_set (&set->set, Pmode, 12);
40598 }
40599
40600 \f
40601 /* Helper function for rs6000_split_logical to emit a logical instruction after
40602 spliting the operation to single GPR registers.
40603
40604 DEST is the destination register.
40605 OP1 and OP2 are the input source registers.
40606 CODE is the base operation (AND, IOR, XOR, NOT).
40607 MODE is the machine mode.
40608 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40609 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40610 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40611
40612 static void
40613 rs6000_split_logical_inner (rtx dest,
40614 rtx op1,
40615 rtx op2,
40616 enum rtx_code code,
40617 machine_mode mode,
40618 bool complement_final_p,
40619 bool complement_op1_p,
40620 bool complement_op2_p)
40621 {
40622 rtx bool_rtx;
40623
40624 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
40625 if (op2 && GET_CODE (op2) == CONST_INT
40626 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
40627 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40628 {
40629 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
40630 HOST_WIDE_INT value = INTVAL (op2) & mask;
40631
40632 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
40633 if (code == AND)
40634 {
40635 if (value == 0)
40636 {
40637 emit_insn (gen_rtx_SET (dest, const0_rtx));
40638 return;
40639 }
40640
40641 else if (value == mask)
40642 {
40643 if (!rtx_equal_p (dest, op1))
40644 emit_insn (gen_rtx_SET (dest, op1));
40645 return;
40646 }
40647 }
40648
40649 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
40650 into separate ORI/ORIS or XORI/XORIS instrucitons. */
40651 else if (code == IOR || code == XOR)
40652 {
40653 if (value == 0)
40654 {
40655 if (!rtx_equal_p (dest, op1))
40656 emit_insn (gen_rtx_SET (dest, op1));
40657 return;
40658 }
40659 }
40660 }
40661
40662 if (code == AND && mode == SImode
40663 && !complement_final_p && !complement_op1_p && !complement_op2_p)
40664 {
40665 emit_insn (gen_andsi3 (dest, op1, op2));
40666 return;
40667 }
40668
40669 if (complement_op1_p)
40670 op1 = gen_rtx_NOT (mode, op1);
40671
40672 if (complement_op2_p)
40673 op2 = gen_rtx_NOT (mode, op2);
40674
40675 /* For canonical RTL, if only one arm is inverted it is the first. */
40676 if (!complement_op1_p && complement_op2_p)
40677 std::swap (op1, op2);
40678
40679 bool_rtx = ((code == NOT)
40680 ? gen_rtx_NOT (mode, op1)
40681 : gen_rtx_fmt_ee (code, mode, op1, op2));
40682
40683 if (complement_final_p)
40684 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
40685
40686 emit_insn (gen_rtx_SET (dest, bool_rtx));
40687 }
40688
40689 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
40690 operations are split immediately during RTL generation to allow for more
40691 optimizations of the AND/IOR/XOR.
40692
40693 OPERANDS is an array containing the destination and two input operands.
40694 CODE is the base operation (AND, IOR, XOR, NOT).
40695 MODE is the machine mode.
40696 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40697 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40698 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
40699 CLOBBER_REG is either NULL or a scratch register of type CC to allow
40700 formation of the AND instructions. */
40701
40702 static void
40703 rs6000_split_logical_di (rtx operands[3],
40704 enum rtx_code code,
40705 bool complement_final_p,
40706 bool complement_op1_p,
40707 bool complement_op2_p)
40708 {
40709 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
40710 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
40711 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
40712 enum hi_lo { hi = 0, lo = 1 };
40713 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
40714 size_t i;
40715
40716 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
40717 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
40718 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
40719 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
40720
40721 if (code == NOT)
40722 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
40723 else
40724 {
40725 if (GET_CODE (operands[2]) != CONST_INT)
40726 {
40727 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
40728 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
40729 }
40730 else
40731 {
40732 HOST_WIDE_INT value = INTVAL (operands[2]);
40733 HOST_WIDE_INT value_hi_lo[2];
40734
40735 gcc_assert (!complement_final_p);
40736 gcc_assert (!complement_op1_p);
40737 gcc_assert (!complement_op2_p);
40738
40739 value_hi_lo[hi] = value >> 32;
40740 value_hi_lo[lo] = value & lower_32bits;
40741
40742 for (i = 0; i < 2; i++)
40743 {
40744 HOST_WIDE_INT sub_value = value_hi_lo[i];
40745
40746 if (sub_value & sign_bit)
40747 sub_value |= upper_32bits;
40748
40749 op2_hi_lo[i] = GEN_INT (sub_value);
40750
40751 /* If this is an AND instruction, check to see if we need to load
40752 the value in a register. */
40753 if (code == AND && sub_value != -1 && sub_value != 0
40754 && !and_operand (op2_hi_lo[i], SImode))
40755 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
40756 }
40757 }
40758 }
40759
40760 for (i = 0; i < 2; i++)
40761 {
40762 /* Split large IOR/XOR operations. */
40763 if ((code == IOR || code == XOR)
40764 && GET_CODE (op2_hi_lo[i]) == CONST_INT
40765 && !complement_final_p
40766 && !complement_op1_p
40767 && !complement_op2_p
40768 && !logical_const_operand (op2_hi_lo[i], SImode))
40769 {
40770 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
40771 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
40772 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
40773 rtx tmp = gen_reg_rtx (SImode);
40774
40775 /* Make sure the constant is sign extended. */
40776 if ((hi_16bits & sign_bit) != 0)
40777 hi_16bits |= upper_32bits;
40778
40779 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
40780 code, SImode, false, false, false);
40781
40782 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
40783 code, SImode, false, false, false);
40784 }
40785 else
40786 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
40787 code, SImode, complement_final_p,
40788 complement_op1_p, complement_op2_p);
40789 }
40790
40791 return;
40792 }
40793
40794 /* Split the insns that make up boolean operations operating on multiple GPR
40795 registers. The boolean MD patterns ensure that the inputs either are
40796 exactly the same as the output registers, or there is no overlap.
40797
40798 OPERANDS is an array containing the destination and two input operands.
40799 CODE is the base operation (AND, IOR, XOR, NOT).
40800 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
40801 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
40802 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
40803
40804 void
40805 rs6000_split_logical (rtx operands[3],
40806 enum rtx_code code,
40807 bool complement_final_p,
40808 bool complement_op1_p,
40809 bool complement_op2_p)
40810 {
40811 machine_mode mode = GET_MODE (operands[0]);
40812 machine_mode sub_mode;
40813 rtx op0, op1, op2;
40814 int sub_size, regno0, regno1, nregs, i;
40815
40816 /* If this is DImode, use the specialized version that can run before
40817 register allocation. */
40818 if (mode == DImode && !TARGET_POWERPC64)
40819 {
40820 rs6000_split_logical_di (operands, code, complement_final_p,
40821 complement_op1_p, complement_op2_p);
40822 return;
40823 }
40824
40825 op0 = operands[0];
40826 op1 = operands[1];
40827 op2 = (code == NOT) ? NULL_RTX : operands[2];
40828 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
40829 sub_size = GET_MODE_SIZE (sub_mode);
40830 regno0 = REGNO (op0);
40831 regno1 = REGNO (op1);
40832
40833 gcc_assert (reload_completed);
40834 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40835 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
40836
40837 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
40838 gcc_assert (nregs > 1);
40839
40840 if (op2 && REG_P (op2))
40841 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
40842
40843 for (i = 0; i < nregs; i++)
40844 {
40845 int offset = i * sub_size;
40846 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
40847 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
40848 rtx sub_op2 = ((code == NOT)
40849 ? NULL_RTX
40850 : simplify_subreg (sub_mode, op2, mode, offset));
40851
40852 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
40853 complement_final_p, complement_op1_p,
40854 complement_op2_p);
40855 }
40856
40857 return;
40858 }
40859
40860 \f
40861 /* Return true if the peephole2 can combine a load involving a combination of
40862 an addis instruction and a load with an offset that can be fused together on
40863 a power8. */
40864
40865 bool
40866 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
40867 rtx addis_value, /* addis value. */
40868 rtx target, /* target register that is loaded. */
40869 rtx mem) /* bottom part of the memory addr. */
40870 {
40871 rtx addr;
40872 rtx base_reg;
40873
40874 /* Validate arguments. */
40875 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
40876 return false;
40877
40878 if (!base_reg_operand (target, GET_MODE (target)))
40879 return false;
40880
40881 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
40882 return false;
40883
40884 /* Allow sign/zero extension. */
40885 if (GET_CODE (mem) == ZERO_EXTEND
40886 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
40887 mem = XEXP (mem, 0);
40888
40889 if (!MEM_P (mem))
40890 return false;
40891
40892 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
40893 return false;
40894
40895 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
40896 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
40897 return false;
40898
40899 /* Validate that the register used to load the high value is either the
40900 register being loaded, or we can safely replace its use.
40901
40902 This function is only called from the peephole2 pass and we assume that
40903 there are 2 instructions in the peephole (addis and load), so we want to
40904 check if the target register was not used in the memory address and the
40905 register to hold the addis result is dead after the peephole. */
40906 if (REGNO (addis_reg) != REGNO (target))
40907 {
40908 if (reg_mentioned_p (target, mem))
40909 return false;
40910
40911 if (!peep2_reg_dead_p (2, addis_reg))
40912 return false;
40913
40914 /* If the target register being loaded is the stack pointer, we must
40915 avoid loading any other value into it, even temporarily. */
40916 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
40917 return false;
40918 }
40919
40920 base_reg = XEXP (addr, 0);
40921 return REGNO (addis_reg) == REGNO (base_reg);
40922 }
40923
40924 /* During the peephole2 pass, adjust and expand the insns for a load fusion
40925 sequence. We adjust the addis register to use the target register. If the
40926 load sign extends, we adjust the code to do the zero extending load, and an
40927 explicit sign extension later since the fusion only covers zero extending
40928 loads.
40929
40930 The operands are:
40931 operands[0] register set with addis (to be replaced with target)
40932 operands[1] value set via addis
40933 operands[2] target register being loaded
40934 operands[3] D-form memory reference using operands[0]. */
40935
40936 void
40937 expand_fusion_gpr_load (rtx *operands)
40938 {
40939 rtx addis_value = operands[1];
40940 rtx target = operands[2];
40941 rtx orig_mem = operands[3];
40942 rtx new_addr, new_mem, orig_addr, offset;
40943 enum rtx_code plus_or_lo_sum;
40944 machine_mode target_mode = GET_MODE (target);
40945 machine_mode extend_mode = target_mode;
40946 machine_mode ptr_mode = Pmode;
40947 enum rtx_code extend = UNKNOWN;
40948
40949 if (GET_CODE (orig_mem) == ZERO_EXTEND
40950 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
40951 {
40952 extend = GET_CODE (orig_mem);
40953 orig_mem = XEXP (orig_mem, 0);
40954 target_mode = GET_MODE (orig_mem);
40955 }
40956
40957 gcc_assert (MEM_P (orig_mem));
40958
40959 orig_addr = XEXP (orig_mem, 0);
40960 plus_or_lo_sum = GET_CODE (orig_addr);
40961 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
40962
40963 offset = XEXP (orig_addr, 1);
40964 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
40965 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
40966
40967 if (extend != UNKNOWN)
40968 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
40969
40970 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
40971 UNSPEC_FUSION_GPR);
40972 emit_insn (gen_rtx_SET (target, new_mem));
40973
40974 if (extend == SIGN_EXTEND)
40975 {
40976 int sub_off = ((BYTES_BIG_ENDIAN)
40977 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
40978 : 0);
40979 rtx sign_reg
40980 = simplify_subreg (target_mode, target, extend_mode, sub_off);
40981
40982 emit_insn (gen_rtx_SET (target,
40983 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
40984 }
40985
40986 return;
40987 }
40988
40989 /* Emit the addis instruction that will be part of a fused instruction
40990 sequence. */
40991
40992 void
40993 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
40994 const char *mode_name)
40995 {
40996 rtx fuse_ops[10];
40997 char insn_template[80];
40998 const char *addis_str = NULL;
40999 const char *comment_str = ASM_COMMENT_START;
41000
41001 if (*comment_str == ' ')
41002 comment_str++;
41003
41004 /* Emit the addis instruction. */
41005 fuse_ops[0] = target;
41006 if (satisfies_constraint_L (addis_value))
41007 {
41008 fuse_ops[1] = addis_value;
41009 addis_str = "lis %0,%v1";
41010 }
41011
41012 else if (GET_CODE (addis_value) == PLUS)
41013 {
41014 rtx op0 = XEXP (addis_value, 0);
41015 rtx op1 = XEXP (addis_value, 1);
41016
41017 if (REG_P (op0) && CONST_INT_P (op1)
41018 && satisfies_constraint_L (op1))
41019 {
41020 fuse_ops[1] = op0;
41021 fuse_ops[2] = op1;
41022 addis_str = "addis %0,%1,%v2";
41023 }
41024 }
41025
41026 else if (GET_CODE (addis_value) == HIGH)
41027 {
41028 rtx value = XEXP (addis_value, 0);
41029 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
41030 {
41031 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
41032 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
41033 if (TARGET_ELF)
41034 addis_str = "addis %0,%2,%1@toc@ha";
41035
41036 else if (TARGET_XCOFF)
41037 addis_str = "addis %0,%1@u(%2)";
41038
41039 else
41040 gcc_unreachable ();
41041 }
41042
41043 else if (GET_CODE (value) == PLUS)
41044 {
41045 rtx op0 = XEXP (value, 0);
41046 rtx op1 = XEXP (value, 1);
41047
41048 if (GET_CODE (op0) == UNSPEC
41049 && XINT (op0, 1) == UNSPEC_TOCREL
41050 && CONST_INT_P (op1))
41051 {
41052 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
41053 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
41054 fuse_ops[3] = op1;
41055 if (TARGET_ELF)
41056 addis_str = "addis %0,%2,%1+%3@toc@ha";
41057
41058 else if (TARGET_XCOFF)
41059 addis_str = "addis %0,%1+%3@u(%2)";
41060
41061 else
41062 gcc_unreachable ();
41063 }
41064 }
41065
41066 else if (satisfies_constraint_L (value))
41067 {
41068 fuse_ops[1] = value;
41069 addis_str = "lis %0,%v1";
41070 }
41071
41072 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
41073 {
41074 fuse_ops[1] = value;
41075 addis_str = "lis %0,%1@ha";
41076 }
41077 }
41078
41079 if (!addis_str)
41080 fatal_insn ("Could not generate addis value for fusion", addis_value);
41081
41082 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
41083 comment, mode_name);
41084 output_asm_insn (insn_template, fuse_ops);
41085 }
41086
41087 /* Emit a D-form load or store instruction that is the second instruction
41088 of a fusion sequence. */
41089
41090 void
41091 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
41092 const char *insn_str)
41093 {
41094 rtx fuse_ops[10];
41095 char insn_template[80];
41096
41097 fuse_ops[0] = load_store_reg;
41098 fuse_ops[1] = addis_reg;
41099
41100 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
41101 {
41102 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
41103 fuse_ops[2] = offset;
41104 output_asm_insn (insn_template, fuse_ops);
41105 }
41106
41107 else if (GET_CODE (offset) == UNSPEC
41108 && XINT (offset, 1) == UNSPEC_TOCREL)
41109 {
41110 if (TARGET_ELF)
41111 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
41112
41113 else if (TARGET_XCOFF)
41114 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41115
41116 else
41117 gcc_unreachable ();
41118
41119 fuse_ops[2] = XVECEXP (offset, 0, 0);
41120 output_asm_insn (insn_template, fuse_ops);
41121 }
41122
41123 else if (GET_CODE (offset) == PLUS
41124 && GET_CODE (XEXP (offset, 0)) == UNSPEC
41125 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
41126 && CONST_INT_P (XEXP (offset, 1)))
41127 {
41128 rtx tocrel_unspec = XEXP (offset, 0);
41129 if (TARGET_ELF)
41130 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
41131
41132 else if (TARGET_XCOFF)
41133 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
41134
41135 else
41136 gcc_unreachable ();
41137
41138 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
41139 fuse_ops[3] = XEXP (offset, 1);
41140 output_asm_insn (insn_template, fuse_ops);
41141 }
41142
41143 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
41144 {
41145 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
41146
41147 fuse_ops[2] = offset;
41148 output_asm_insn (insn_template, fuse_ops);
41149 }
41150
41151 else
41152 fatal_insn ("Unable to generate load/store offset for fusion", offset);
41153
41154 return;
41155 }
41156
41157 /* Wrap a TOC address that can be fused to indicate that special fusion
41158 processing is needed. */
41159
41160 rtx
41161 fusion_wrap_memory_address (rtx old_mem)
41162 {
41163 rtx old_addr = XEXP (old_mem, 0);
41164 rtvec v = gen_rtvec (1, old_addr);
41165 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
41166 return replace_equiv_address_nv (old_mem, new_addr, false);
41167 }
41168
41169 /* Given an address, convert it into the addis and load offset parts. Addresses
41170 created during the peephole2 process look like:
41171 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
41172 (unspec [(...)] UNSPEC_TOCREL))
41173
41174 Addresses created via toc fusion look like:
41175 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
41176
41177 static void
41178 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
41179 {
41180 rtx hi, lo;
41181
41182 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
41183 {
41184 lo = XVECEXP (addr, 0, 0);
41185 hi = gen_rtx_HIGH (Pmode, lo);
41186 }
41187 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
41188 {
41189 hi = XEXP (addr, 0);
41190 lo = XEXP (addr, 1);
41191 }
41192 else
41193 gcc_unreachable ();
41194
41195 *p_hi = hi;
41196 *p_lo = lo;
41197 }
41198
41199 /* Return a string to fuse an addis instruction with a gpr load to the same
41200 register that we loaded up the addis instruction. The address that is used
41201 is the logical address that was formed during peephole2:
41202 (lo_sum (high) (low-part))
41203
41204 Or the address is the TOC address that is wrapped before register allocation:
41205 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
41206
41207 The code is complicated, so we call output_asm_insn directly, and just
41208 return "". */
41209
41210 const char *
41211 emit_fusion_gpr_load (rtx target, rtx mem)
41212 {
41213 rtx addis_value;
41214 rtx addr;
41215 rtx load_offset;
41216 const char *load_str = NULL;
41217 const char *mode_name = NULL;
41218 machine_mode mode;
41219
41220 if (GET_CODE (mem) == ZERO_EXTEND)
41221 mem = XEXP (mem, 0);
41222
41223 gcc_assert (REG_P (target) && MEM_P (mem));
41224
41225 addr = XEXP (mem, 0);
41226 fusion_split_address (addr, &addis_value, &load_offset);
41227
41228 /* Now emit the load instruction to the same register. */
41229 mode = GET_MODE (mem);
41230 switch (mode)
41231 {
41232 case E_QImode:
41233 mode_name = "char";
41234 load_str = "lbz";
41235 break;
41236
41237 case E_HImode:
41238 mode_name = "short";
41239 load_str = "lhz";
41240 break;
41241
41242 case E_SImode:
41243 case E_SFmode:
41244 mode_name = (mode == SFmode) ? "float" : "int";
41245 load_str = "lwz";
41246 break;
41247
41248 case E_DImode:
41249 case E_DFmode:
41250 gcc_assert (TARGET_POWERPC64);
41251 mode_name = (mode == DFmode) ? "double" : "long";
41252 load_str = "ld";
41253 break;
41254
41255 default:
41256 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
41257 }
41258
41259 /* Emit the addis instruction. */
41260 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
41261
41262 /* Emit the D-form load instruction. */
41263 emit_fusion_load_store (target, target, load_offset, load_str);
41264
41265 return "";
41266 }
41267 \f
41268
41269 /* Return true if the peephole2 can combine a load/store involving a
41270 combination of an addis instruction and the memory operation. This was
41271 added to the ISA 3.0 (power9) hardware. */
41272
41273 bool
41274 fusion_p9_p (rtx addis_reg, /* register set via addis. */
41275 rtx addis_value, /* addis value. */
41276 rtx dest, /* destination (memory or register). */
41277 rtx src) /* source (register or memory). */
41278 {
41279 rtx addr, mem, offset;
41280 machine_mode mode = GET_MODE (src);
41281
41282 /* Validate arguments. */
41283 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
41284 return false;
41285
41286 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
41287 return false;
41288
41289 /* Ignore extend operations that are part of the load. */
41290 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
41291 src = XEXP (src, 0);
41292
41293 /* Test for memory<-register or register<-memory. */
41294 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
41295 {
41296 if (!MEM_P (dest))
41297 return false;
41298
41299 mem = dest;
41300 }
41301
41302 else if (MEM_P (src))
41303 {
41304 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
41305 return false;
41306
41307 mem = src;
41308 }
41309
41310 else
41311 return false;
41312
41313 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
41314 if (GET_CODE (addr) == PLUS)
41315 {
41316 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41317 return false;
41318
41319 return satisfies_constraint_I (XEXP (addr, 1));
41320 }
41321
41322 else if (GET_CODE (addr) == LO_SUM)
41323 {
41324 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
41325 return false;
41326
41327 offset = XEXP (addr, 1);
41328 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
41329 return small_toc_ref (offset, GET_MODE (offset));
41330
41331 else if (TARGET_ELF && !TARGET_POWERPC64)
41332 return CONSTANT_P (offset);
41333 }
41334
41335 return false;
41336 }
41337
41338 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41339 load sequence.
41340
41341 The operands are:
41342 operands[0] register set with addis
41343 operands[1] value set via addis
41344 operands[2] target register being loaded
41345 operands[3] D-form memory reference using operands[0].
41346
41347 This is similar to the fusion introduced with power8, except it scales to
41348 both loads/stores and does not require the result register to be the same as
41349 the base register. At the moment, we only do this if register set with addis
41350 is dead. */
41351
41352 void
41353 expand_fusion_p9_load (rtx *operands)
41354 {
41355 rtx tmp_reg = operands[0];
41356 rtx addis_value = operands[1];
41357 rtx target = operands[2];
41358 rtx orig_mem = operands[3];
41359 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
41360 enum rtx_code plus_or_lo_sum;
41361 machine_mode target_mode = GET_MODE (target);
41362 machine_mode extend_mode = target_mode;
41363 machine_mode ptr_mode = Pmode;
41364 enum rtx_code extend = UNKNOWN;
41365
41366 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
41367 {
41368 extend = GET_CODE (orig_mem);
41369 orig_mem = XEXP (orig_mem, 0);
41370 target_mode = GET_MODE (orig_mem);
41371 }
41372
41373 gcc_assert (MEM_P (orig_mem));
41374
41375 orig_addr = XEXP (orig_mem, 0);
41376 plus_or_lo_sum = GET_CODE (orig_addr);
41377 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41378
41379 offset = XEXP (orig_addr, 1);
41380 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41381 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41382
41383 if (extend != UNKNOWN)
41384 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
41385
41386 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
41387 UNSPEC_FUSION_P9);
41388
41389 set = gen_rtx_SET (target, new_mem);
41390 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41391 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41392 emit_insn (insn);
41393
41394 return;
41395 }
41396
41397 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
41398 store sequence.
41399
41400 The operands are:
41401 operands[0] register set with addis
41402 operands[1] value set via addis
41403 operands[2] target D-form memory being stored to
41404 operands[3] register being stored
41405
41406 This is similar to the fusion introduced with power8, except it scales to
41407 both loads/stores and does not require the result register to be the same as
41408 the base register. At the moment, we only do this if register set with addis
41409 is dead. */
41410
41411 void
41412 expand_fusion_p9_store (rtx *operands)
41413 {
41414 rtx tmp_reg = operands[0];
41415 rtx addis_value = operands[1];
41416 rtx orig_mem = operands[2];
41417 rtx src = operands[3];
41418 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
41419 enum rtx_code plus_or_lo_sum;
41420 machine_mode target_mode = GET_MODE (orig_mem);
41421 machine_mode ptr_mode = Pmode;
41422
41423 gcc_assert (MEM_P (orig_mem));
41424
41425 orig_addr = XEXP (orig_mem, 0);
41426 plus_or_lo_sum = GET_CODE (orig_addr);
41427 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
41428
41429 offset = XEXP (orig_addr, 1);
41430 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
41431 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
41432
41433 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
41434 UNSPEC_FUSION_P9);
41435
41436 set = gen_rtx_SET (new_mem, new_src);
41437 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
41438 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
41439 emit_insn (insn);
41440
41441 return;
41442 }
41443
41444 /* Return a string to fuse an addis instruction with a load using extended
41445 fusion. The address that is used is the logical address that was formed
41446 during peephole2: (lo_sum (high) (low-part))
41447
41448 The code is complicated, so we call output_asm_insn directly, and just
41449 return "". */
41450
41451 const char *
41452 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
41453 {
41454 machine_mode mode = GET_MODE (reg);
41455 rtx hi;
41456 rtx lo;
41457 rtx addr;
41458 const char *load_string;
41459 int r;
41460
41461 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
41462 {
41463 mem = XEXP (mem, 0);
41464 mode = GET_MODE (mem);
41465 }
41466
41467 if (GET_CODE (reg) == SUBREG)
41468 {
41469 gcc_assert (SUBREG_BYTE (reg) == 0);
41470 reg = SUBREG_REG (reg);
41471 }
41472
41473 if (!REG_P (reg))
41474 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
41475
41476 r = REGNO (reg);
41477 if (FP_REGNO_P (r))
41478 {
41479 if (mode == SFmode)
41480 load_string = "lfs";
41481 else if (mode == DFmode || mode == DImode)
41482 load_string = "lfd";
41483 else
41484 gcc_unreachable ();
41485 }
41486 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41487 {
41488 if (mode == SFmode)
41489 load_string = "lxssp";
41490 else if (mode == DFmode || mode == DImode)
41491 load_string = "lxsd";
41492 else
41493 gcc_unreachable ();
41494 }
41495 else if (INT_REGNO_P (r))
41496 {
41497 switch (mode)
41498 {
41499 case E_QImode:
41500 load_string = "lbz";
41501 break;
41502 case E_HImode:
41503 load_string = "lhz";
41504 break;
41505 case E_SImode:
41506 case E_SFmode:
41507 load_string = "lwz";
41508 break;
41509 case E_DImode:
41510 case E_DFmode:
41511 if (!TARGET_POWERPC64)
41512 gcc_unreachable ();
41513 load_string = "ld";
41514 break;
41515 default:
41516 gcc_unreachable ();
41517 }
41518 }
41519 else
41520 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
41521
41522 if (!MEM_P (mem))
41523 fatal_insn ("emit_fusion_p9_load not MEM", mem);
41524
41525 addr = XEXP (mem, 0);
41526 fusion_split_address (addr, &hi, &lo);
41527
41528 /* Emit the addis instruction. */
41529 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
41530
41531 /* Emit the D-form load instruction. */
41532 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
41533
41534 return "";
41535 }
41536
41537 /* Return a string to fuse an addis instruction with a store using extended
41538 fusion. The address that is used is the logical address that was formed
41539 during peephole2: (lo_sum (high) (low-part))
41540
41541 The code is complicated, so we call output_asm_insn directly, and just
41542 return "". */
41543
41544 const char *
41545 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
41546 {
41547 machine_mode mode = GET_MODE (reg);
41548 rtx hi;
41549 rtx lo;
41550 rtx addr;
41551 const char *store_string;
41552 int r;
41553
41554 if (GET_CODE (reg) == SUBREG)
41555 {
41556 gcc_assert (SUBREG_BYTE (reg) == 0);
41557 reg = SUBREG_REG (reg);
41558 }
41559
41560 if (!REG_P (reg))
41561 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
41562
41563 r = REGNO (reg);
41564 if (FP_REGNO_P (r))
41565 {
41566 if (mode == SFmode)
41567 store_string = "stfs";
41568 else if (mode == DFmode)
41569 store_string = "stfd";
41570 else
41571 gcc_unreachable ();
41572 }
41573 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
41574 {
41575 if (mode == SFmode)
41576 store_string = "stxssp";
41577 else if (mode == DFmode || mode == DImode)
41578 store_string = "stxsd";
41579 else
41580 gcc_unreachable ();
41581 }
41582 else if (INT_REGNO_P (r))
41583 {
41584 switch (mode)
41585 {
41586 case E_QImode:
41587 store_string = "stb";
41588 break;
41589 case E_HImode:
41590 store_string = "sth";
41591 break;
41592 case E_SImode:
41593 case E_SFmode:
41594 store_string = "stw";
41595 break;
41596 case E_DImode:
41597 case E_DFmode:
41598 if (!TARGET_POWERPC64)
41599 gcc_unreachable ();
41600 store_string = "std";
41601 break;
41602 default:
41603 gcc_unreachable ();
41604 }
41605 }
41606 else
41607 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
41608
41609 if (!MEM_P (mem))
41610 fatal_insn ("emit_fusion_p9_store not MEM", mem);
41611
41612 addr = XEXP (mem, 0);
41613 fusion_split_address (addr, &hi, &lo);
41614
41615 /* Emit the addis instruction. */
41616 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
41617
41618 /* Emit the D-form load instruction. */
41619 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
41620
41621 return "";
41622 }
41623
41624 \f
41625 /* Analyze vector computations and remove unnecessary doubleword
41626 swaps (xxswapdi instructions). This pass is performed only
41627 for little-endian VSX code generation.
41628
41629 For this specific case, loads and stores of 4x32 and 2x64 vectors
41630 are inefficient. These are implemented using the lvx2dx and
41631 stvx2dx instructions, which invert the order of doublewords in
41632 a vector register. Thus the code generation inserts an xxswapdi
41633 after each such load, and prior to each such store. (For spill
41634 code after register assignment, an additional xxswapdi is inserted
41635 following each store in order to return a hard register to its
41636 unpermuted value.)
41637
41638 The extra xxswapdi instructions reduce performance. This can be
41639 particularly bad for vectorized code. The purpose of this pass
41640 is to reduce the number of xxswapdi instructions required for
41641 correctness.
41642
41643 The primary insight is that much code that operates on vectors
41644 does not care about the relative order of elements in a register,
41645 so long as the correct memory order is preserved. If we have
41646 a computation where all input values are provided by lvxd2x/xxswapdi
41647 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
41648 and all intermediate computations are pure SIMD (independent of
41649 element order), then all the xxswapdi's associated with the loads
41650 and stores may be removed.
41651
41652 This pass uses some of the infrastructure and logical ideas from
41653 the "web" pass in web.c. We create maximal webs of computations
41654 fitting the description above using union-find. Each such web is
41655 then optimized by removing its unnecessary xxswapdi instructions.
41656
41657 The pass is placed prior to global optimization so that we can
41658 perform the optimization in the safest and simplest way possible;
41659 that is, by replacing each xxswapdi insn with a register copy insn.
41660 Subsequent forward propagation will remove copies where possible.
41661
41662 There are some operations sensitive to element order for which we
41663 can still allow the operation, provided we modify those operations.
41664 These include CONST_VECTORs, for which we must swap the first and
41665 second halves of the constant vector; and SUBREGs, for which we
41666 must adjust the byte offset to account for the swapped doublewords.
41667 A remaining opportunity would be non-immediate-form splats, for
41668 which we should adjust the selected lane of the input. We should
41669 also make code generation adjustments for sum-across operations,
41670 since this is a common vectorizer reduction.
41671
41672 Because we run prior to the first split, we can see loads and stores
41673 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
41674 vector loads and stores that have not yet been split into a permuting
41675 load/store and a swap. (One way this can happen is with a builtin
41676 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
41677 than deleting a swap, we convert the load/store into a permuting
41678 load/store (which effectively removes the swap). */
41679
41680 /* Notes on Permutes
41681
41682 We do not currently handle computations that contain permutes. There
41683 is a general transformation that can be performed correctly, but it
41684 may introduce more expensive code than it replaces. To handle these
41685 would require a cost model to determine when to perform the optimization.
41686 This commentary records how this could be done if desired.
41687
41688 The most general permute is something like this (example for V16QI):
41689
41690 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
41691 (parallel [(const_int a0) (const_int a1)
41692 ...
41693 (const_int a14) (const_int a15)]))
41694
41695 where a0,...,a15 are in [0,31] and select elements from op1 and op2
41696 to produce in the result.
41697
41698 Regardless of mode, we can convert the PARALLEL to a mask of 16
41699 byte-element selectors. Let's call this M, with M[i] representing
41700 the ith byte-element selector value. Then if we swap doublewords
41701 throughout the computation, we can get correct behavior by replacing
41702 M with M' as follows:
41703
41704 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
41705 { ((M[i]+8)%16)+16 : M[i] in [16,31]
41706
41707 This seems promising at first, since we are just replacing one mask
41708 with another. But certain masks are preferable to others. If M
41709 is a mask that matches a vmrghh pattern, for example, M' certainly
41710 will not. Instead of a single vmrghh, we would generate a load of
41711 M' and a vperm. So we would need to know how many xxswapd's we can
41712 remove as a result of this transformation to determine if it's
41713 profitable; and preferably the logic would need to be aware of all
41714 the special preferable masks.
41715
41716 Another form of permute is an UNSPEC_VPERM, in which the mask is
41717 already in a register. In some cases, this mask may be a constant
41718 that we can discover with ud-chains, in which case the above
41719 transformation is ok. However, the common usage here is for the
41720 mask to be produced by an UNSPEC_LVSL, in which case the mask
41721 cannot be known at compile time. In such a case we would have to
41722 generate several instructions to compute M' as above at run time,
41723 and a cost model is needed again.
41724
41725 However, when the mask M for an UNSPEC_VPERM is loaded from the
41726 constant pool, we can replace M with M' as above at no cost
41727 beyond adding a constant pool entry. */
41728
41729 /* This is based on the union-find logic in web.c. web_entry_base is
41730 defined in df.h. */
41731 class swap_web_entry : public web_entry_base
41732 {
41733 public:
41734 /* Pointer to the insn. */
41735 rtx_insn *insn;
41736 /* Set if insn contains a mention of a vector register. All other
41737 fields are undefined if this field is unset. */
41738 unsigned int is_relevant : 1;
41739 /* Set if insn is a load. */
41740 unsigned int is_load : 1;
41741 /* Set if insn is a store. */
41742 unsigned int is_store : 1;
41743 /* Set if insn is a doubleword swap. This can either be a register swap
41744 or a permuting load or store (test is_load and is_store for this). */
41745 unsigned int is_swap : 1;
41746 /* Set if the insn has a live-in use of a parameter register. */
41747 unsigned int is_live_in : 1;
41748 /* Set if the insn has a live-out def of a return register. */
41749 unsigned int is_live_out : 1;
41750 /* Set if the insn contains a subreg reference of a vector register. */
41751 unsigned int contains_subreg : 1;
41752 /* Set if the insn contains a 128-bit integer operand. */
41753 unsigned int is_128_int : 1;
41754 /* Set if this is a call-insn. */
41755 unsigned int is_call : 1;
41756 /* Set if this insn does not perform a vector operation for which
41757 element order matters, or if we know how to fix it up if it does.
41758 Undefined if is_swap is set. */
41759 unsigned int is_swappable : 1;
41760 /* A nonzero value indicates what kind of special handling for this
41761 insn is required if doublewords are swapped. Undefined if
41762 is_swappable is not set. */
41763 unsigned int special_handling : 4;
41764 /* Set if the web represented by this entry cannot be optimized. */
41765 unsigned int web_not_optimizable : 1;
41766 /* Set if this insn should be deleted. */
41767 unsigned int will_delete : 1;
41768 };
41769
41770 enum special_handling_values {
41771 SH_NONE = 0,
41772 SH_CONST_VECTOR,
41773 SH_SUBREG,
41774 SH_NOSWAP_LD,
41775 SH_NOSWAP_ST,
41776 SH_EXTRACT,
41777 SH_SPLAT,
41778 SH_XXPERMDI,
41779 SH_CONCAT,
41780 SH_VPERM
41781 };
41782
41783 /* Union INSN with all insns containing definitions that reach USE.
41784 Detect whether USE is live-in to the current function. */
41785 static void
41786 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
41787 {
41788 struct df_link *link = DF_REF_CHAIN (use);
41789
41790 if (!link)
41791 insn_entry[INSN_UID (insn)].is_live_in = 1;
41792
41793 while (link)
41794 {
41795 if (DF_REF_IS_ARTIFICIAL (link->ref))
41796 insn_entry[INSN_UID (insn)].is_live_in = 1;
41797
41798 if (DF_REF_INSN_INFO (link->ref))
41799 {
41800 rtx def_insn = DF_REF_INSN (link->ref);
41801 (void)unionfind_union (insn_entry + INSN_UID (insn),
41802 insn_entry + INSN_UID (def_insn));
41803 }
41804
41805 link = link->next;
41806 }
41807 }
41808
41809 /* Union INSN with all insns containing uses reached from DEF.
41810 Detect whether DEF is live-out from the current function. */
41811 static void
41812 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
41813 {
41814 struct df_link *link = DF_REF_CHAIN (def);
41815
41816 if (!link)
41817 insn_entry[INSN_UID (insn)].is_live_out = 1;
41818
41819 while (link)
41820 {
41821 /* This could be an eh use or some other artificial use;
41822 we treat these all the same (killing the optimization). */
41823 if (DF_REF_IS_ARTIFICIAL (link->ref))
41824 insn_entry[INSN_UID (insn)].is_live_out = 1;
41825
41826 if (DF_REF_INSN_INFO (link->ref))
41827 {
41828 rtx use_insn = DF_REF_INSN (link->ref);
41829 (void)unionfind_union (insn_entry + INSN_UID (insn),
41830 insn_entry + INSN_UID (use_insn));
41831 }
41832
41833 link = link->next;
41834 }
41835 }
41836
41837 /* Return 1 iff INSN is a load insn, including permuting loads that
41838 represent an lvxd2x instruction; else return 0. */
41839 static unsigned int
41840 insn_is_load_p (rtx insn)
41841 {
41842 rtx body = PATTERN (insn);
41843
41844 if (GET_CODE (body) == SET)
41845 {
41846 if (GET_CODE (SET_SRC (body)) == MEM)
41847 return 1;
41848
41849 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
41850 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
41851 return 1;
41852
41853 return 0;
41854 }
41855
41856 if (GET_CODE (body) != PARALLEL)
41857 return 0;
41858
41859 rtx set = XVECEXP (body, 0, 0);
41860
41861 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
41862 return 1;
41863
41864 return 0;
41865 }
41866
41867 /* Return 1 iff INSN is a store insn, including permuting stores that
41868 represent an stvxd2x instruction; else return 0. */
41869 static unsigned int
41870 insn_is_store_p (rtx insn)
41871 {
41872 rtx body = PATTERN (insn);
41873 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
41874 return 1;
41875 if (GET_CODE (body) != PARALLEL)
41876 return 0;
41877 rtx set = XVECEXP (body, 0, 0);
41878 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
41879 return 1;
41880 return 0;
41881 }
41882
41883 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
41884 a permuting load, or a permuting store. */
41885 static unsigned int
41886 insn_is_swap_p (rtx insn)
41887 {
41888 rtx body = PATTERN (insn);
41889 if (GET_CODE (body) != SET)
41890 return 0;
41891 rtx rhs = SET_SRC (body);
41892 if (GET_CODE (rhs) != VEC_SELECT)
41893 return 0;
41894 rtx parallel = XEXP (rhs, 1);
41895 if (GET_CODE (parallel) != PARALLEL)
41896 return 0;
41897 unsigned int len = XVECLEN (parallel, 0);
41898 if (len != 2 && len != 4 && len != 8 && len != 16)
41899 return 0;
41900 for (unsigned int i = 0; i < len / 2; ++i)
41901 {
41902 rtx op = XVECEXP (parallel, 0, i);
41903 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
41904 return 0;
41905 }
41906 for (unsigned int i = len / 2; i < len; ++i)
41907 {
41908 rtx op = XVECEXP (parallel, 0, i);
41909 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
41910 return 0;
41911 }
41912 return 1;
41913 }
41914
41915 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
41916 static bool
41917 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
41918 {
41919 unsigned uid = INSN_UID (insn);
41920 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
41921 return false;
41922
41923 /* Find the unique use in the swap and locate its def. If the def
41924 isn't unique, punt. */
41925 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
41926 df_ref use;
41927 FOR_EACH_INSN_INFO_USE (use, insn_info)
41928 {
41929 struct df_link *def_link = DF_REF_CHAIN (use);
41930 if (!def_link || def_link->next)
41931 return false;
41932
41933 rtx def_insn = DF_REF_INSN (def_link->ref);
41934 unsigned uid2 = INSN_UID (def_insn);
41935 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
41936 return false;
41937
41938 rtx body = PATTERN (def_insn);
41939 if (GET_CODE (body) != SET
41940 || GET_CODE (SET_SRC (body)) != VEC_SELECT
41941 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
41942 return false;
41943
41944 rtx mem = XEXP (SET_SRC (body), 0);
41945 rtx base_reg = XEXP (mem, 0);
41946
41947 df_ref base_use;
41948 insn_info = DF_INSN_INFO_GET (def_insn);
41949 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
41950 {
41951 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
41952 continue;
41953
41954 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
41955 if (!base_def_link || base_def_link->next)
41956 return false;
41957
41958 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
41959 rtx tocrel_body = PATTERN (tocrel_insn);
41960 rtx base, offset;
41961 if (GET_CODE (tocrel_body) != SET)
41962 return false;
41963 /* There is an extra level of indirection for small/large
41964 code models. */
41965 rtx tocrel_expr = SET_SRC (tocrel_body);
41966 if (GET_CODE (tocrel_expr) == MEM)
41967 tocrel_expr = XEXP (tocrel_expr, 0);
41968 if (!toc_relative_expr_p (tocrel_expr, false))
41969 return false;
41970 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
41971 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
41972 return false;
41973 }
41974 }
41975 return true;
41976 }
41977
41978 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
41979 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
41980 static bool
41981 v2df_reduction_p (rtx op)
41982 {
41983 if (GET_MODE (op) != V2DFmode)
41984 return false;
41985
41986 enum rtx_code code = GET_CODE (op);
41987 if (code != PLUS && code != SMIN && code != SMAX)
41988 return false;
41989
41990 rtx concat = XEXP (op, 0);
41991 if (GET_CODE (concat) != VEC_CONCAT)
41992 return false;
41993
41994 rtx select0 = XEXP (concat, 0);
41995 rtx select1 = XEXP (concat, 1);
41996 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
41997 return false;
41998
41999 rtx reg0 = XEXP (select0, 0);
42000 rtx reg1 = XEXP (select1, 0);
42001 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
42002 return false;
42003
42004 rtx parallel0 = XEXP (select0, 1);
42005 rtx parallel1 = XEXP (select1, 1);
42006 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
42007 return false;
42008
42009 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
42010 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
42011 return false;
42012
42013 return true;
42014 }
42015
42016 /* Return 1 iff OP is an operand that will not be affected by having
42017 vector doublewords swapped in memory. */
42018 static unsigned int
42019 rtx_is_swappable_p (rtx op, unsigned int *special)
42020 {
42021 enum rtx_code code = GET_CODE (op);
42022 int i, j;
42023 rtx parallel;
42024
42025 switch (code)
42026 {
42027 case LABEL_REF:
42028 case SYMBOL_REF:
42029 case CLOBBER:
42030 case REG:
42031 return 1;
42032
42033 case VEC_CONCAT:
42034 case ASM_INPUT:
42035 case ASM_OPERANDS:
42036 return 0;
42037
42038 case CONST_VECTOR:
42039 {
42040 *special = SH_CONST_VECTOR;
42041 return 1;
42042 }
42043
42044 case VEC_DUPLICATE:
42045 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
42046 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
42047 it represents a vector splat for which we can do special
42048 handling. */
42049 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
42050 return 1;
42051 else if (REG_P (XEXP (op, 0))
42052 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42053 /* This catches V2DF and V2DI splat, at a minimum. */
42054 return 1;
42055 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
42056 && REG_P (XEXP (XEXP (op, 0), 0))
42057 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
42058 /* This catches splat of a truncated value. */
42059 return 1;
42060 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
42061 /* If the duplicated item is from a select, defer to the select
42062 processing to see if we can change the lane for the splat. */
42063 return rtx_is_swappable_p (XEXP (op, 0), special);
42064 else
42065 return 0;
42066
42067 case VEC_SELECT:
42068 /* A vec_extract operation is ok if we change the lane. */
42069 if (GET_CODE (XEXP (op, 0)) == REG
42070 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
42071 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42072 && XVECLEN (parallel, 0) == 1
42073 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
42074 {
42075 *special = SH_EXTRACT;
42076 return 1;
42077 }
42078 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
42079 XXPERMDI is a swap operation, it will be identified by
42080 insn_is_swap_p and therefore we won't get here. */
42081 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
42082 && (GET_MODE (XEXP (op, 0)) == V4DFmode
42083 || GET_MODE (XEXP (op, 0)) == V4DImode)
42084 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
42085 && XVECLEN (parallel, 0) == 2
42086 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
42087 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
42088 {
42089 *special = SH_XXPERMDI;
42090 return 1;
42091 }
42092 else if (v2df_reduction_p (op))
42093 return 1;
42094 else
42095 return 0;
42096
42097 case UNSPEC:
42098 {
42099 /* Various operations are unsafe for this optimization, at least
42100 without significant additional work. Permutes are obviously
42101 problematic, as both the permute control vector and the ordering
42102 of the target values are invalidated by doubleword swapping.
42103 Vector pack and unpack modify the number of vector lanes.
42104 Merge-high/low will not operate correctly on swapped operands.
42105 Vector shifts across element boundaries are clearly uncool,
42106 as are vector select and concatenate operations. Vector
42107 sum-across instructions define one operand with a specific
42108 order-dependent element, so additional fixup code would be
42109 needed to make those work. Vector set and non-immediate-form
42110 vector splat are element-order sensitive. A few of these
42111 cases might be workable with special handling if required.
42112 Adding cost modeling would be appropriate in some cases. */
42113 int val = XINT (op, 1);
42114 switch (val)
42115 {
42116 default:
42117 break;
42118 case UNSPEC_VMRGH_DIRECT:
42119 case UNSPEC_VMRGL_DIRECT:
42120 case UNSPEC_VPACK_SIGN_SIGN_SAT:
42121 case UNSPEC_VPACK_SIGN_UNS_SAT:
42122 case UNSPEC_VPACK_UNS_UNS_MOD:
42123 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
42124 case UNSPEC_VPACK_UNS_UNS_SAT:
42125 case UNSPEC_VPERM:
42126 case UNSPEC_VPERM_UNS:
42127 case UNSPEC_VPERMHI:
42128 case UNSPEC_VPERMSI:
42129 case UNSPEC_VPKPX:
42130 case UNSPEC_VSLDOI:
42131 case UNSPEC_VSLO:
42132 case UNSPEC_VSRO:
42133 case UNSPEC_VSUM2SWS:
42134 case UNSPEC_VSUM4S:
42135 case UNSPEC_VSUM4UBS:
42136 case UNSPEC_VSUMSWS:
42137 case UNSPEC_VSUMSWS_DIRECT:
42138 case UNSPEC_VSX_CONCAT:
42139 case UNSPEC_VSX_SET:
42140 case UNSPEC_VSX_SLDWI:
42141 case UNSPEC_VUNPACK_HI_SIGN:
42142 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
42143 case UNSPEC_VUNPACK_LO_SIGN:
42144 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
42145 case UNSPEC_VUPKHPX:
42146 case UNSPEC_VUPKHS_V4SF:
42147 case UNSPEC_VUPKHU_V4SF:
42148 case UNSPEC_VUPKLPX:
42149 case UNSPEC_VUPKLS_V4SF:
42150 case UNSPEC_VUPKLU_V4SF:
42151 case UNSPEC_VSX_CVDPSPN:
42152 case UNSPEC_VSX_CVSPDP:
42153 case UNSPEC_VSX_CVSPDPN:
42154 case UNSPEC_VSX_EXTRACT:
42155 case UNSPEC_VSX_VSLO:
42156 case UNSPEC_VSX_VEC_INIT:
42157 return 0;
42158 case UNSPEC_VSPLT_DIRECT:
42159 case UNSPEC_VSX_XXSPLTD:
42160 *special = SH_SPLAT;
42161 return 1;
42162 case UNSPEC_REDUC_PLUS:
42163 case UNSPEC_REDUC:
42164 return 1;
42165 }
42166 }
42167
42168 default:
42169 break;
42170 }
42171
42172 const char *fmt = GET_RTX_FORMAT (code);
42173 int ok = 1;
42174
42175 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42176 if (fmt[i] == 'e' || fmt[i] == 'u')
42177 {
42178 unsigned int special_op = SH_NONE;
42179 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
42180 if (special_op == SH_NONE)
42181 continue;
42182 /* Ensure we never have two kinds of special handling
42183 for the same insn. */
42184 if (*special != SH_NONE && *special != special_op)
42185 return 0;
42186 *special = special_op;
42187 }
42188 else if (fmt[i] == 'E')
42189 for (j = 0; j < XVECLEN (op, i); ++j)
42190 {
42191 unsigned int special_op = SH_NONE;
42192 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
42193 if (special_op == SH_NONE)
42194 continue;
42195 /* Ensure we never have two kinds of special handling
42196 for the same insn. */
42197 if (*special != SH_NONE && *special != special_op)
42198 return 0;
42199 *special = special_op;
42200 }
42201
42202 return ok;
42203 }
42204
42205 /* Return 1 iff INSN is an operand that will not be affected by
42206 having vector doublewords swapped in memory (in which case
42207 *SPECIAL is unchanged), or that can be modified to be correct
42208 if vector doublewords are swapped in memory (in which case
42209 *SPECIAL is changed to a value indicating how). */
42210 static unsigned int
42211 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
42212 unsigned int *special)
42213 {
42214 /* Calls are always bad. */
42215 if (GET_CODE (insn) == CALL_INSN)
42216 return 0;
42217
42218 /* Loads and stores seen here are not permuting, but we can still
42219 fix them up by converting them to permuting ones. Exceptions:
42220 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
42221 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
42222 for the SET source. Also we must now make an exception for lvx
42223 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
42224 explicit "& -16") since this leads to unrecognizable insns. */
42225 rtx body = PATTERN (insn);
42226 int i = INSN_UID (insn);
42227
42228 if (insn_entry[i].is_load)
42229 {
42230 if (GET_CODE (body) == SET)
42231 {
42232 rtx rhs = SET_SRC (body);
42233 /* Even without a swap, the RHS might be a vec_select for, say,
42234 a byte-reversing load. */
42235 if (GET_CODE (rhs) != MEM)
42236 return 0;
42237 if (GET_CODE (XEXP (rhs, 0)) == AND)
42238 return 0;
42239
42240 *special = SH_NOSWAP_LD;
42241 return 1;
42242 }
42243 else
42244 return 0;
42245 }
42246
42247 if (insn_entry[i].is_store)
42248 {
42249 if (GET_CODE (body) == SET
42250 && GET_CODE (SET_SRC (body)) != UNSPEC)
42251 {
42252 rtx lhs = SET_DEST (body);
42253 /* Even without a swap, the LHS might be a vec_select for, say,
42254 a byte-reversing store. */
42255 if (GET_CODE (lhs) != MEM)
42256 return 0;
42257 if (GET_CODE (XEXP (lhs, 0)) == AND)
42258 return 0;
42259
42260 *special = SH_NOSWAP_ST;
42261 return 1;
42262 }
42263 else
42264 return 0;
42265 }
42266
42267 /* A convert to single precision can be left as is provided that
42268 all of its uses are in xxspltw instructions that splat BE element
42269 zero. */
42270 if (GET_CODE (body) == SET
42271 && GET_CODE (SET_SRC (body)) == UNSPEC
42272 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
42273 {
42274 df_ref def;
42275 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42276
42277 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42278 {
42279 struct df_link *link = DF_REF_CHAIN (def);
42280 if (!link)
42281 return 0;
42282
42283 for (; link; link = link->next) {
42284 rtx use_insn = DF_REF_INSN (link->ref);
42285 rtx use_body = PATTERN (use_insn);
42286 if (GET_CODE (use_body) != SET
42287 || GET_CODE (SET_SRC (use_body)) != UNSPEC
42288 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
42289 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
42290 return 0;
42291 }
42292 }
42293
42294 return 1;
42295 }
42296
42297 /* A concatenation of two doublewords is ok if we reverse the
42298 order of the inputs. */
42299 if (GET_CODE (body) == SET
42300 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
42301 && (GET_MODE (SET_SRC (body)) == V2DFmode
42302 || GET_MODE (SET_SRC (body)) == V2DImode))
42303 {
42304 *special = SH_CONCAT;
42305 return 1;
42306 }
42307
42308 /* V2DF reductions are always swappable. */
42309 if (GET_CODE (body) == PARALLEL)
42310 {
42311 rtx expr = XVECEXP (body, 0, 0);
42312 if (GET_CODE (expr) == SET
42313 && v2df_reduction_p (SET_SRC (expr)))
42314 return 1;
42315 }
42316
42317 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
42318 constant pool. */
42319 if (GET_CODE (body) == SET
42320 && GET_CODE (SET_SRC (body)) == UNSPEC
42321 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
42322 && XVECLEN (SET_SRC (body), 0) == 3
42323 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
42324 {
42325 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
42326 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42327 df_ref use;
42328 FOR_EACH_INSN_INFO_USE (use, insn_info)
42329 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42330 {
42331 struct df_link *def_link = DF_REF_CHAIN (use);
42332 /* Punt if multiple definitions for this reg. */
42333 if (def_link && !def_link->next &&
42334 const_load_sequence_p (insn_entry,
42335 DF_REF_INSN (def_link->ref)))
42336 {
42337 *special = SH_VPERM;
42338 return 1;
42339 }
42340 }
42341 }
42342
42343 /* Otherwise check the operands for vector lane violations. */
42344 return rtx_is_swappable_p (body, special);
42345 }
42346
42347 enum chain_purpose { FOR_LOADS, FOR_STORES };
42348
42349 /* Return true if the UD or DU chain headed by LINK is non-empty,
42350 and every entry on the chain references an insn that is a
42351 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
42352 register swap must have only permuting loads as reaching defs.
42353 If PURPOSE is FOR_STORES, each such register swap must have only
42354 register swaps or permuting stores as reached uses. */
42355 static bool
42356 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
42357 enum chain_purpose purpose)
42358 {
42359 if (!link)
42360 return false;
42361
42362 for (; link; link = link->next)
42363 {
42364 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
42365 continue;
42366
42367 if (DF_REF_IS_ARTIFICIAL (link->ref))
42368 return false;
42369
42370 rtx reached_insn = DF_REF_INSN (link->ref);
42371 unsigned uid = INSN_UID (reached_insn);
42372 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
42373
42374 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
42375 || insn_entry[uid].is_store)
42376 return false;
42377
42378 if (purpose == FOR_LOADS)
42379 {
42380 df_ref use;
42381 FOR_EACH_INSN_INFO_USE (use, insn_info)
42382 {
42383 struct df_link *swap_link = DF_REF_CHAIN (use);
42384
42385 while (swap_link)
42386 {
42387 if (DF_REF_IS_ARTIFICIAL (link->ref))
42388 return false;
42389
42390 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
42391 unsigned uid2 = INSN_UID (swap_def_insn);
42392
42393 /* Only permuting loads are allowed. */
42394 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
42395 return false;
42396
42397 swap_link = swap_link->next;
42398 }
42399 }
42400 }
42401 else if (purpose == FOR_STORES)
42402 {
42403 df_ref def;
42404 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42405 {
42406 struct df_link *swap_link = DF_REF_CHAIN (def);
42407
42408 while (swap_link)
42409 {
42410 if (DF_REF_IS_ARTIFICIAL (link->ref))
42411 return false;
42412
42413 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
42414 unsigned uid2 = INSN_UID (swap_use_insn);
42415
42416 /* Permuting stores or register swaps are allowed. */
42417 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
42418 return false;
42419
42420 swap_link = swap_link->next;
42421 }
42422 }
42423 }
42424 }
42425
42426 return true;
42427 }
42428
42429 /* Mark the xxswapdi instructions associated with permuting loads and
42430 stores for removal. Note that we only flag them for deletion here,
42431 as there is a possibility of a swap being reached from multiple
42432 loads, etc. */
42433 static void
42434 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
42435 {
42436 rtx insn = insn_entry[i].insn;
42437 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42438
42439 if (insn_entry[i].is_load)
42440 {
42441 df_ref def;
42442 FOR_EACH_INSN_INFO_DEF (def, insn_info)
42443 {
42444 struct df_link *link = DF_REF_CHAIN (def);
42445
42446 /* We know by now that these are swaps, so we can delete
42447 them confidently. */
42448 while (link)
42449 {
42450 rtx use_insn = DF_REF_INSN (link->ref);
42451 insn_entry[INSN_UID (use_insn)].will_delete = 1;
42452 link = link->next;
42453 }
42454 }
42455 }
42456 else if (insn_entry[i].is_store)
42457 {
42458 df_ref use;
42459 FOR_EACH_INSN_INFO_USE (use, insn_info)
42460 {
42461 /* Ignore uses for addressability. */
42462 machine_mode mode = GET_MODE (DF_REF_REG (use));
42463 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
42464 continue;
42465
42466 struct df_link *link = DF_REF_CHAIN (use);
42467
42468 /* We know by now that these are swaps, so we can delete
42469 them confidently. */
42470 while (link)
42471 {
42472 rtx def_insn = DF_REF_INSN (link->ref);
42473 insn_entry[INSN_UID (def_insn)].will_delete = 1;
42474 link = link->next;
42475 }
42476 }
42477 }
42478 }
42479
42480 /* OP is either a CONST_VECTOR or an expression containing one.
42481 Swap the first half of the vector with the second in the first
42482 case. Recurse to find it in the second. */
42483 static void
42484 swap_const_vector_halves (rtx op)
42485 {
42486 int i;
42487 enum rtx_code code = GET_CODE (op);
42488 if (GET_CODE (op) == CONST_VECTOR)
42489 {
42490 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
42491 for (i = 0; i < half_units; ++i)
42492 {
42493 rtx temp = CONST_VECTOR_ELT (op, i);
42494 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
42495 CONST_VECTOR_ELT (op, i + half_units) = temp;
42496 }
42497 }
42498 else
42499 {
42500 int j;
42501 const char *fmt = GET_RTX_FORMAT (code);
42502 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42503 if (fmt[i] == 'e' || fmt[i] == 'u')
42504 swap_const_vector_halves (XEXP (op, i));
42505 else if (fmt[i] == 'E')
42506 for (j = 0; j < XVECLEN (op, i); ++j)
42507 swap_const_vector_halves (XVECEXP (op, i, j));
42508 }
42509 }
42510
42511 /* Find all subregs of a vector expression that perform a narrowing,
42512 and adjust the subreg index to account for doubleword swapping. */
42513 static void
42514 adjust_subreg_index (rtx op)
42515 {
42516 enum rtx_code code = GET_CODE (op);
42517 if (code == SUBREG
42518 && (GET_MODE_SIZE (GET_MODE (op))
42519 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
42520 {
42521 unsigned int index = SUBREG_BYTE (op);
42522 if (index < 8)
42523 index += 8;
42524 else
42525 index -= 8;
42526 SUBREG_BYTE (op) = index;
42527 }
42528
42529 const char *fmt = GET_RTX_FORMAT (code);
42530 int i,j;
42531 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
42532 if (fmt[i] == 'e' || fmt[i] == 'u')
42533 adjust_subreg_index (XEXP (op, i));
42534 else if (fmt[i] == 'E')
42535 for (j = 0; j < XVECLEN (op, i); ++j)
42536 adjust_subreg_index (XVECEXP (op, i, j));
42537 }
42538
42539 /* Convert the non-permuting load INSN to a permuting one. */
42540 static void
42541 permute_load (rtx_insn *insn)
42542 {
42543 rtx body = PATTERN (insn);
42544 rtx mem_op = SET_SRC (body);
42545 rtx tgt_reg = SET_DEST (body);
42546 machine_mode mode = GET_MODE (tgt_reg);
42547 int n_elts = GET_MODE_NUNITS (mode);
42548 int half_elts = n_elts / 2;
42549 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42550 int i, j;
42551 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42552 XVECEXP (par, 0, i) = GEN_INT (j);
42553 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42554 XVECEXP (par, 0, i) = GEN_INT (j);
42555 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
42556 SET_SRC (body) = sel;
42557 INSN_CODE (insn) = -1; /* Force re-recognition. */
42558 df_insn_rescan (insn);
42559
42560 if (dump_file)
42561 fprintf (dump_file, "Replacing load %d with permuted load\n",
42562 INSN_UID (insn));
42563 }
42564
42565 /* Convert the non-permuting store INSN to a permuting one. */
42566 static void
42567 permute_store (rtx_insn *insn)
42568 {
42569 rtx body = PATTERN (insn);
42570 rtx src_reg = SET_SRC (body);
42571 machine_mode mode = GET_MODE (src_reg);
42572 int n_elts = GET_MODE_NUNITS (mode);
42573 int half_elts = n_elts / 2;
42574 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
42575 int i, j;
42576 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
42577 XVECEXP (par, 0, i) = GEN_INT (j);
42578 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
42579 XVECEXP (par, 0, i) = GEN_INT (j);
42580 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
42581 SET_SRC (body) = sel;
42582 INSN_CODE (insn) = -1; /* Force re-recognition. */
42583 df_insn_rescan (insn);
42584
42585 if (dump_file)
42586 fprintf (dump_file, "Replacing store %d with permuted store\n",
42587 INSN_UID (insn));
42588 }
42589
42590 /* Given OP that contains a vector extract operation, adjust the index
42591 of the extracted lane to account for the doubleword swap. */
42592 static void
42593 adjust_extract (rtx_insn *insn)
42594 {
42595 rtx pattern = PATTERN (insn);
42596 if (GET_CODE (pattern) == PARALLEL)
42597 pattern = XVECEXP (pattern, 0, 0);
42598 rtx src = SET_SRC (pattern);
42599 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
42600 account for that. */
42601 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
42602 rtx par = XEXP (sel, 1);
42603 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
42604 int lane = INTVAL (XVECEXP (par, 0, 0));
42605 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42606 XVECEXP (par, 0, 0) = GEN_INT (lane);
42607 INSN_CODE (insn) = -1; /* Force re-recognition. */
42608 df_insn_rescan (insn);
42609
42610 if (dump_file)
42611 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
42612 }
42613
42614 /* Given OP that contains a vector direct-splat operation, adjust the index
42615 of the source lane to account for the doubleword swap. */
42616 static void
42617 adjust_splat (rtx_insn *insn)
42618 {
42619 rtx body = PATTERN (insn);
42620 rtx unspec = XEXP (body, 1);
42621 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
42622 int lane = INTVAL (XVECEXP (unspec, 0, 1));
42623 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
42624 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
42625 INSN_CODE (insn) = -1; /* Force re-recognition. */
42626 df_insn_rescan (insn);
42627
42628 if (dump_file)
42629 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
42630 }
42631
42632 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
42633 swap), reverse the order of the source operands and adjust the indices
42634 of the source lanes to account for doubleword reversal. */
42635 static void
42636 adjust_xxpermdi (rtx_insn *insn)
42637 {
42638 rtx set = PATTERN (insn);
42639 rtx select = XEXP (set, 1);
42640 rtx concat = XEXP (select, 0);
42641 rtx src0 = XEXP (concat, 0);
42642 XEXP (concat, 0) = XEXP (concat, 1);
42643 XEXP (concat, 1) = src0;
42644 rtx parallel = XEXP (select, 1);
42645 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
42646 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
42647 int new_lane0 = 3 - lane1;
42648 int new_lane1 = 3 - lane0;
42649 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
42650 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
42651 INSN_CODE (insn) = -1; /* Force re-recognition. */
42652 df_insn_rescan (insn);
42653
42654 if (dump_file)
42655 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
42656 }
42657
42658 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
42659 reverse the order of those inputs. */
42660 static void
42661 adjust_concat (rtx_insn *insn)
42662 {
42663 rtx set = PATTERN (insn);
42664 rtx concat = XEXP (set, 1);
42665 rtx src0 = XEXP (concat, 0);
42666 XEXP (concat, 0) = XEXP (concat, 1);
42667 XEXP (concat, 1) = src0;
42668 INSN_CODE (insn) = -1; /* Force re-recognition. */
42669 df_insn_rescan (insn);
42670
42671 if (dump_file)
42672 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
42673 }
42674
42675 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
42676 constant pool to reflect swapped doublewords. */
42677 static void
42678 adjust_vperm (rtx_insn *insn)
42679 {
42680 /* We previously determined that the UNSPEC_VPERM was fed by a
42681 swap of a swapping load of a TOC-relative constant pool symbol.
42682 Find the MEM in the swapping load and replace it with a MEM for
42683 the adjusted mask constant. */
42684 rtx set = PATTERN (insn);
42685 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
42686
42687 /* Find the swap. */
42688 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42689 df_ref use;
42690 rtx_insn *swap_insn = 0;
42691 FOR_EACH_INSN_INFO_USE (use, insn_info)
42692 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
42693 {
42694 struct df_link *def_link = DF_REF_CHAIN (use);
42695 gcc_assert (def_link && !def_link->next);
42696 swap_insn = DF_REF_INSN (def_link->ref);
42697 break;
42698 }
42699 gcc_assert (swap_insn);
42700
42701 /* Find the load. */
42702 insn_info = DF_INSN_INFO_GET (swap_insn);
42703 rtx_insn *load_insn = 0;
42704 FOR_EACH_INSN_INFO_USE (use, insn_info)
42705 {
42706 struct df_link *def_link = DF_REF_CHAIN (use);
42707 gcc_assert (def_link && !def_link->next);
42708 load_insn = DF_REF_INSN (def_link->ref);
42709 break;
42710 }
42711 gcc_assert (load_insn);
42712
42713 /* Find the TOC-relative symbol access. */
42714 insn_info = DF_INSN_INFO_GET (load_insn);
42715 rtx_insn *tocrel_insn = 0;
42716 FOR_EACH_INSN_INFO_USE (use, insn_info)
42717 {
42718 struct df_link *def_link = DF_REF_CHAIN (use);
42719 gcc_assert (def_link && !def_link->next);
42720 tocrel_insn = DF_REF_INSN (def_link->ref);
42721 break;
42722 }
42723 gcc_assert (tocrel_insn);
42724
42725 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
42726 to set tocrel_base; otherwise it would be unnecessary as we've
42727 already established it will return true. */
42728 rtx base, offset;
42729 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
42730 /* There is an extra level of indirection for small/large code models. */
42731 if (GET_CODE (tocrel_expr) == MEM)
42732 tocrel_expr = XEXP (tocrel_expr, 0);
42733 if (!toc_relative_expr_p (tocrel_expr, false))
42734 gcc_unreachable ();
42735 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
42736 rtx const_vector = get_pool_constant (base);
42737 /* With the extra indirection, get_pool_constant will produce the
42738 real constant from the reg_equal expression, so get the real
42739 constant. */
42740 if (GET_CODE (const_vector) == SYMBOL_REF)
42741 const_vector = get_pool_constant (const_vector);
42742 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
42743
42744 /* Create an adjusted mask from the initial mask. */
42745 unsigned int new_mask[16], i, val;
42746 for (i = 0; i < 16; ++i) {
42747 val = INTVAL (XVECEXP (const_vector, 0, i));
42748 if (val < 16)
42749 new_mask[i] = (val + 8) % 16;
42750 else
42751 new_mask[i] = ((val + 8) % 16) + 16;
42752 }
42753
42754 /* Create a new CONST_VECTOR and a MEM that references it. */
42755 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
42756 for (i = 0; i < 16; ++i)
42757 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
42758 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
42759 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
42760 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
42761 can't recognize. Force the SYMBOL_REF into a register. */
42762 if (!REG_P (XEXP (new_mem, 0))) {
42763 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
42764 XEXP (new_mem, 0) = base_reg;
42765 /* Move the newly created insn ahead of the load insn. */
42766 rtx_insn *force_insn = get_last_insn ();
42767 remove_insn (force_insn);
42768 rtx_insn *before_load_insn = PREV_INSN (load_insn);
42769 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
42770 df_insn_rescan (before_load_insn);
42771 df_insn_rescan (force_insn);
42772 }
42773
42774 /* Replace the MEM in the load instruction and rescan it. */
42775 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
42776 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
42777 df_insn_rescan (load_insn);
42778
42779 if (dump_file)
42780 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
42781 }
42782
42783 /* The insn described by INSN_ENTRY[I] can be swapped, but only
42784 with special handling. Take care of that here. */
42785 static void
42786 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
42787 {
42788 rtx_insn *insn = insn_entry[i].insn;
42789 rtx body = PATTERN (insn);
42790
42791 switch (insn_entry[i].special_handling)
42792 {
42793 default:
42794 gcc_unreachable ();
42795 case SH_CONST_VECTOR:
42796 {
42797 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
42798 gcc_assert (GET_CODE (body) == SET);
42799 rtx rhs = SET_SRC (body);
42800 swap_const_vector_halves (rhs);
42801 if (dump_file)
42802 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
42803 break;
42804 }
42805 case SH_SUBREG:
42806 /* A subreg of the same size is already safe. For subregs that
42807 select a smaller portion of a reg, adjust the index for
42808 swapped doublewords. */
42809 adjust_subreg_index (body);
42810 if (dump_file)
42811 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
42812 break;
42813 case SH_NOSWAP_LD:
42814 /* Convert a non-permuting load to a permuting one. */
42815 permute_load (insn);
42816 break;
42817 case SH_NOSWAP_ST:
42818 /* Convert a non-permuting store to a permuting one. */
42819 permute_store (insn);
42820 break;
42821 case SH_EXTRACT:
42822 /* Change the lane on an extract operation. */
42823 adjust_extract (insn);
42824 break;
42825 case SH_SPLAT:
42826 /* Change the lane on a direct-splat operation. */
42827 adjust_splat (insn);
42828 break;
42829 case SH_XXPERMDI:
42830 /* Change the lanes on an XXPERMDI operation. */
42831 adjust_xxpermdi (insn);
42832 break;
42833 case SH_CONCAT:
42834 /* Reverse the order of a concatenation operation. */
42835 adjust_concat (insn);
42836 break;
42837 case SH_VPERM:
42838 /* Change the mask loaded from the constant pool for a VPERM. */
42839 adjust_vperm (insn);
42840 break;
42841 }
42842 }
42843
42844 /* Find the insn from the Ith table entry, which is known to be a
42845 register swap Y = SWAP(X). Replace it with a copy Y = X. */
42846 static void
42847 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
42848 {
42849 rtx_insn *insn = insn_entry[i].insn;
42850 rtx body = PATTERN (insn);
42851 rtx src_reg = XEXP (SET_SRC (body), 0);
42852 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
42853 rtx_insn *new_insn = emit_insn_before (copy, insn);
42854 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
42855 df_insn_rescan (new_insn);
42856
42857 if (dump_file)
42858 {
42859 unsigned int new_uid = INSN_UID (new_insn);
42860 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
42861 }
42862
42863 df_insn_delete (insn);
42864 remove_insn (insn);
42865 insn->set_deleted ();
42866 }
42867
42868 /* Dump the swap table to DUMP_FILE. */
42869 static void
42870 dump_swap_insn_table (swap_web_entry *insn_entry)
42871 {
42872 int e = get_max_uid ();
42873 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
42874
42875 for (int i = 0; i < e; ++i)
42876 if (insn_entry[i].is_relevant)
42877 {
42878 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
42879 fprintf (dump_file, "%6d %6d ", i,
42880 pred_entry && pred_entry->insn
42881 ? INSN_UID (pred_entry->insn) : 0);
42882 if (insn_entry[i].is_load)
42883 fputs ("load ", dump_file);
42884 if (insn_entry[i].is_store)
42885 fputs ("store ", dump_file);
42886 if (insn_entry[i].is_swap)
42887 fputs ("swap ", dump_file);
42888 if (insn_entry[i].is_live_in)
42889 fputs ("live-in ", dump_file);
42890 if (insn_entry[i].is_live_out)
42891 fputs ("live-out ", dump_file);
42892 if (insn_entry[i].contains_subreg)
42893 fputs ("subreg ", dump_file);
42894 if (insn_entry[i].is_128_int)
42895 fputs ("int128 ", dump_file);
42896 if (insn_entry[i].is_call)
42897 fputs ("call ", dump_file);
42898 if (insn_entry[i].is_swappable)
42899 {
42900 fputs ("swappable ", dump_file);
42901 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
42902 fputs ("special:constvec ", dump_file);
42903 else if (insn_entry[i].special_handling == SH_SUBREG)
42904 fputs ("special:subreg ", dump_file);
42905 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
42906 fputs ("special:load ", dump_file);
42907 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
42908 fputs ("special:store ", dump_file);
42909 else if (insn_entry[i].special_handling == SH_EXTRACT)
42910 fputs ("special:extract ", dump_file);
42911 else if (insn_entry[i].special_handling == SH_SPLAT)
42912 fputs ("special:splat ", dump_file);
42913 else if (insn_entry[i].special_handling == SH_XXPERMDI)
42914 fputs ("special:xxpermdi ", dump_file);
42915 else if (insn_entry[i].special_handling == SH_CONCAT)
42916 fputs ("special:concat ", dump_file);
42917 else if (insn_entry[i].special_handling == SH_VPERM)
42918 fputs ("special:vperm ", dump_file);
42919 }
42920 if (insn_entry[i].web_not_optimizable)
42921 fputs ("unoptimizable ", dump_file);
42922 if (insn_entry[i].will_delete)
42923 fputs ("delete ", dump_file);
42924 fputs ("\n", dump_file);
42925 }
42926 fputs ("\n", dump_file);
42927 }
42928
42929 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
42930 Here RTX is an (& addr (const_int -16)). Always return a new copy
42931 to avoid problems with combine. */
42932 static rtx
42933 alignment_with_canonical_addr (rtx align)
42934 {
42935 rtx canon;
42936 rtx addr = XEXP (align, 0);
42937
42938 if (REG_P (addr))
42939 canon = addr;
42940
42941 else if (GET_CODE (addr) == PLUS)
42942 {
42943 rtx addrop0 = XEXP (addr, 0);
42944 rtx addrop1 = XEXP (addr, 1);
42945
42946 if (!REG_P (addrop0))
42947 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
42948
42949 if (!REG_P (addrop1))
42950 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
42951
42952 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
42953 }
42954
42955 else
42956 canon = force_reg (GET_MODE (addr), addr);
42957
42958 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
42959 }
42960
42961 /* Check whether an rtx is an alignment mask, and if so, return
42962 a fully-expanded rtx for the masking operation. */
42963 static rtx
42964 alignment_mask (rtx_insn *insn)
42965 {
42966 rtx body = PATTERN (insn);
42967
42968 if (GET_CODE (body) != SET
42969 || GET_CODE (SET_SRC (body)) != AND
42970 || !REG_P (XEXP (SET_SRC (body), 0)))
42971 return 0;
42972
42973 rtx mask = XEXP (SET_SRC (body), 1);
42974
42975 if (GET_CODE (mask) == CONST_INT)
42976 {
42977 if (INTVAL (mask) == -16)
42978 return alignment_with_canonical_addr (SET_SRC (body));
42979 else
42980 return 0;
42981 }
42982
42983 if (!REG_P (mask))
42984 return 0;
42985
42986 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
42987 df_ref use;
42988 rtx real_mask = 0;
42989
42990 FOR_EACH_INSN_INFO_USE (use, insn_info)
42991 {
42992 if (!rtx_equal_p (DF_REF_REG (use), mask))
42993 continue;
42994
42995 struct df_link *def_link = DF_REF_CHAIN (use);
42996 if (!def_link || def_link->next)
42997 return 0;
42998
42999 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
43000 rtx const_body = PATTERN (const_insn);
43001 if (GET_CODE (const_body) != SET)
43002 return 0;
43003
43004 real_mask = SET_SRC (const_body);
43005
43006 if (GET_CODE (real_mask) != CONST_INT
43007 || INTVAL (real_mask) != -16)
43008 return 0;
43009 }
43010
43011 if (real_mask == 0)
43012 return 0;
43013
43014 return alignment_with_canonical_addr (SET_SRC (body));
43015 }
43016
43017 /* Given INSN that's a load or store based at BASE_REG, look for a
43018 feeding computation that aligns its address on a 16-byte boundary. */
43019 static rtx
43020 find_alignment_op (rtx_insn *insn, rtx base_reg)
43021 {
43022 df_ref base_use;
43023 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43024 rtx and_operation = 0;
43025
43026 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
43027 {
43028 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
43029 continue;
43030
43031 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
43032 if (!base_def_link || base_def_link->next)
43033 break;
43034
43035 /* With stack-protector code enabled, and possibly in other
43036 circumstances, there may not be an associated insn for
43037 the def. */
43038 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
43039 break;
43040
43041 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
43042 and_operation = alignment_mask (and_insn);
43043 if (and_operation != 0)
43044 break;
43045 }
43046
43047 return and_operation;
43048 }
43049
43050 struct del_info { bool replace; rtx_insn *replace_insn; };
43051
43052 /* If INSN is the load for an lvx pattern, put it in canonical form. */
43053 static void
43054 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
43055 {
43056 rtx body = PATTERN (insn);
43057 gcc_assert (GET_CODE (body) == SET
43058 && GET_CODE (SET_SRC (body)) == VEC_SELECT
43059 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
43060
43061 rtx mem = XEXP (SET_SRC (body), 0);
43062 rtx base_reg = XEXP (mem, 0);
43063
43064 rtx and_operation = find_alignment_op (insn, base_reg);
43065
43066 if (and_operation != 0)
43067 {
43068 df_ref def;
43069 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43070 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43071 {
43072 struct df_link *link = DF_REF_CHAIN (def);
43073 if (!link || link->next)
43074 break;
43075
43076 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43077 if (!insn_is_swap_p (swap_insn)
43078 || insn_is_load_p (swap_insn)
43079 || insn_is_store_p (swap_insn))
43080 break;
43081
43082 /* Expected lvx pattern found. Change the swap to
43083 a copy, and propagate the AND operation into the
43084 load. */
43085 to_delete[INSN_UID (swap_insn)].replace = true;
43086 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43087
43088 XEXP (mem, 0) = and_operation;
43089 SET_SRC (body) = mem;
43090 INSN_CODE (insn) = -1; /* Force re-recognition. */
43091 df_insn_rescan (insn);
43092
43093 if (dump_file)
43094 fprintf (dump_file, "lvx opportunity found at %d\n",
43095 INSN_UID (insn));
43096 }
43097 }
43098 }
43099
43100 /* If INSN is the store for an stvx pattern, put it in canonical form. */
43101 static void
43102 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
43103 {
43104 rtx body = PATTERN (insn);
43105 gcc_assert (GET_CODE (body) == SET
43106 && GET_CODE (SET_DEST (body)) == MEM
43107 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
43108 rtx mem = SET_DEST (body);
43109 rtx base_reg = XEXP (mem, 0);
43110
43111 rtx and_operation = find_alignment_op (insn, base_reg);
43112
43113 if (and_operation != 0)
43114 {
43115 rtx src_reg = XEXP (SET_SRC (body), 0);
43116 df_ref src_use;
43117 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43118 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
43119 {
43120 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
43121 continue;
43122
43123 struct df_link *link = DF_REF_CHAIN (src_use);
43124 if (!link || link->next)
43125 break;
43126
43127 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
43128 if (!insn_is_swap_p (swap_insn)
43129 || insn_is_load_p (swap_insn)
43130 || insn_is_store_p (swap_insn))
43131 break;
43132
43133 /* Expected stvx pattern found. Change the swap to
43134 a copy, and propagate the AND operation into the
43135 store. */
43136 to_delete[INSN_UID (swap_insn)].replace = true;
43137 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
43138
43139 XEXP (mem, 0) = and_operation;
43140 SET_SRC (body) = src_reg;
43141 INSN_CODE (insn) = -1; /* Force re-recognition. */
43142 df_insn_rescan (insn);
43143
43144 if (dump_file)
43145 fprintf (dump_file, "stvx opportunity found at %d\n",
43146 INSN_UID (insn));
43147 }
43148 }
43149 }
43150
43151 /* Look for patterns created from builtin lvx and stvx calls, and
43152 canonicalize them to be properly recognized as such. */
43153 static void
43154 recombine_lvx_stvx_patterns (function *fun)
43155 {
43156 int i;
43157 basic_block bb;
43158 rtx_insn *insn;
43159
43160 int num_insns = get_max_uid ();
43161 del_info *to_delete = XCNEWVEC (del_info, num_insns);
43162
43163 FOR_ALL_BB_FN (bb, fun)
43164 FOR_BB_INSNS (bb, insn)
43165 {
43166 if (!NONDEBUG_INSN_P (insn))
43167 continue;
43168
43169 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
43170 recombine_lvx_pattern (insn, to_delete);
43171 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
43172 recombine_stvx_pattern (insn, to_delete);
43173 }
43174
43175 /* Turning swaps into copies is delayed until now, to avoid problems
43176 with deleting instructions during the insn walk. */
43177 for (i = 0; i < num_insns; i++)
43178 if (to_delete[i].replace)
43179 {
43180 rtx swap_body = PATTERN (to_delete[i].replace_insn);
43181 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
43182 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
43183 rtx_insn *new_insn = emit_insn_before (copy,
43184 to_delete[i].replace_insn);
43185 set_block_for_insn (new_insn,
43186 BLOCK_FOR_INSN (to_delete[i].replace_insn));
43187 df_insn_rescan (new_insn);
43188 df_insn_delete (to_delete[i].replace_insn);
43189 remove_insn (to_delete[i].replace_insn);
43190 to_delete[i].replace_insn->set_deleted ();
43191 }
43192
43193 free (to_delete);
43194 }
43195
43196 /* Main entry point for this pass. */
43197 unsigned int
43198 rs6000_analyze_swaps (function *fun)
43199 {
43200 swap_web_entry *insn_entry;
43201 basic_block bb;
43202 rtx_insn *insn, *curr_insn = 0;
43203
43204 /* Dataflow analysis for use-def chains. */
43205 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
43206 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
43207 df_analyze ();
43208 df_set_flags (DF_DEFER_INSN_RESCAN);
43209
43210 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
43211 recombine_lvx_stvx_patterns (fun);
43212
43213 /* Allocate structure to represent webs of insns. */
43214 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
43215
43216 /* Walk the insns to gather basic data. */
43217 FOR_ALL_BB_FN (bb, fun)
43218 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
43219 {
43220 unsigned int uid = INSN_UID (insn);
43221 if (NONDEBUG_INSN_P (insn))
43222 {
43223 insn_entry[uid].insn = insn;
43224
43225 if (GET_CODE (insn) == CALL_INSN)
43226 insn_entry[uid].is_call = 1;
43227
43228 /* Walk the uses and defs to see if we mention vector regs.
43229 Record any constraints on optimization of such mentions. */
43230 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43231 df_ref mention;
43232 FOR_EACH_INSN_INFO_USE (mention, insn_info)
43233 {
43234 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43235 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43236
43237 /* If a use gets its value from a call insn, it will be
43238 a hard register and will look like (reg:V4SI 3 3).
43239 The df analysis creates two mentions for GPR3 and GPR4,
43240 both DImode. We must recognize this and treat it as a
43241 vector mention to ensure the call is unioned with this
43242 use. */
43243 if (mode == DImode && DF_REF_INSN_INFO (mention))
43244 {
43245 rtx feeder = DF_REF_INSN (mention);
43246 /* FIXME: It is pretty hard to get from the df mention
43247 to the mode of the use in the insn. We arbitrarily
43248 pick a vector mode here, even though the use might
43249 be a real DImode. We can be too conservative
43250 (create a web larger than necessary) because of
43251 this, so consider eventually fixing this. */
43252 if (GET_CODE (feeder) == CALL_INSN)
43253 mode = V4SImode;
43254 }
43255
43256 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43257 {
43258 insn_entry[uid].is_relevant = 1;
43259 if (mode == TImode || mode == V1TImode
43260 || FLOAT128_VECTOR_P (mode))
43261 insn_entry[uid].is_128_int = 1;
43262 if (DF_REF_INSN_INFO (mention))
43263 insn_entry[uid].contains_subreg
43264 = !rtx_equal_p (DF_REF_REG (mention),
43265 DF_REF_REAL_REG (mention));
43266 union_defs (insn_entry, insn, mention);
43267 }
43268 }
43269 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
43270 {
43271 /* We use DF_REF_REAL_REG here to get inside any subregs. */
43272 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
43273
43274 /* If we're loading up a hard vector register for a call,
43275 it looks like (set (reg:V4SI 9 9) (...)). The df
43276 analysis creates two mentions for GPR9 and GPR10, both
43277 DImode. So relying on the mode from the mentions
43278 isn't sufficient to ensure we union the call into the
43279 web with the parameter setup code. */
43280 if (mode == DImode && GET_CODE (insn) == SET
43281 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
43282 mode = GET_MODE (SET_DEST (insn));
43283
43284 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
43285 {
43286 insn_entry[uid].is_relevant = 1;
43287 if (mode == TImode || mode == V1TImode
43288 || FLOAT128_VECTOR_P (mode))
43289 insn_entry[uid].is_128_int = 1;
43290 if (DF_REF_INSN_INFO (mention))
43291 insn_entry[uid].contains_subreg
43292 = !rtx_equal_p (DF_REF_REG (mention),
43293 DF_REF_REAL_REG (mention));
43294 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
43295 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
43296 insn_entry[uid].is_live_out = 1;
43297 union_uses (insn_entry, insn, mention);
43298 }
43299 }
43300
43301 if (insn_entry[uid].is_relevant)
43302 {
43303 /* Determine if this is a load or store. */
43304 insn_entry[uid].is_load = insn_is_load_p (insn);
43305 insn_entry[uid].is_store = insn_is_store_p (insn);
43306
43307 /* Determine if this is a doubleword swap. If not,
43308 determine whether it can legally be swapped. */
43309 if (insn_is_swap_p (insn))
43310 insn_entry[uid].is_swap = 1;
43311 else
43312 {
43313 unsigned int special = SH_NONE;
43314 insn_entry[uid].is_swappable
43315 = insn_is_swappable_p (insn_entry, insn, &special);
43316 if (special != SH_NONE && insn_entry[uid].contains_subreg)
43317 insn_entry[uid].is_swappable = 0;
43318 else if (special != SH_NONE)
43319 insn_entry[uid].special_handling = special;
43320 else if (insn_entry[uid].contains_subreg)
43321 insn_entry[uid].special_handling = SH_SUBREG;
43322 }
43323 }
43324 }
43325 }
43326
43327 if (dump_file)
43328 {
43329 fprintf (dump_file, "\nSwap insn entry table when first built\n");
43330 dump_swap_insn_table (insn_entry);
43331 }
43332
43333 /* Record unoptimizable webs. */
43334 unsigned e = get_max_uid (), i;
43335 for (i = 0; i < e; ++i)
43336 {
43337 if (!insn_entry[i].is_relevant)
43338 continue;
43339
43340 swap_web_entry *root
43341 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
43342
43343 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
43344 || (insn_entry[i].contains_subreg
43345 && insn_entry[i].special_handling != SH_SUBREG)
43346 || insn_entry[i].is_128_int || insn_entry[i].is_call
43347 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
43348 root->web_not_optimizable = 1;
43349
43350 /* If we have loads or stores that aren't permuting then the
43351 optimization isn't appropriate. */
43352 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
43353 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
43354 root->web_not_optimizable = 1;
43355
43356 /* If we have permuting loads or stores that are not accompanied
43357 by a register swap, the optimization isn't appropriate. */
43358 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
43359 {
43360 rtx insn = insn_entry[i].insn;
43361 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43362 df_ref def;
43363
43364 FOR_EACH_INSN_INFO_DEF (def, insn_info)
43365 {
43366 struct df_link *link = DF_REF_CHAIN (def);
43367
43368 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
43369 {
43370 root->web_not_optimizable = 1;
43371 break;
43372 }
43373 }
43374 }
43375 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
43376 {
43377 rtx insn = insn_entry[i].insn;
43378 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
43379 df_ref use;
43380
43381 FOR_EACH_INSN_INFO_USE (use, insn_info)
43382 {
43383 struct df_link *link = DF_REF_CHAIN (use);
43384
43385 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
43386 {
43387 root->web_not_optimizable = 1;
43388 break;
43389 }
43390 }
43391 }
43392 }
43393
43394 if (dump_file)
43395 {
43396 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
43397 dump_swap_insn_table (insn_entry);
43398 }
43399
43400 /* For each load and store in an optimizable web (which implies
43401 the loads and stores are permuting), find the associated
43402 register swaps and mark them for removal. Due to various
43403 optimizations we may mark the same swap more than once. Also
43404 perform special handling for swappable insns that require it. */
43405 for (i = 0; i < e; ++i)
43406 if ((insn_entry[i].is_load || insn_entry[i].is_store)
43407 && insn_entry[i].is_swap)
43408 {
43409 swap_web_entry* root_entry
43410 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43411 if (!root_entry->web_not_optimizable)
43412 mark_swaps_for_removal (insn_entry, i);
43413 }
43414 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
43415 {
43416 swap_web_entry* root_entry
43417 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
43418 if (!root_entry->web_not_optimizable)
43419 handle_special_swappables (insn_entry, i);
43420 }
43421
43422 /* Now delete the swaps marked for removal. */
43423 for (i = 0; i < e; ++i)
43424 if (insn_entry[i].will_delete)
43425 replace_swap_with_copy (insn_entry, i);
43426
43427 /* Clean up. */
43428 free (insn_entry);
43429 return 0;
43430 }
43431
43432 const pass_data pass_data_analyze_swaps =
43433 {
43434 RTL_PASS, /* type */
43435 "swaps", /* name */
43436 OPTGROUP_NONE, /* optinfo_flags */
43437 TV_NONE, /* tv_id */
43438 0, /* properties_required */
43439 0, /* properties_provided */
43440 0, /* properties_destroyed */
43441 0, /* todo_flags_start */
43442 TODO_df_finish, /* todo_flags_finish */
43443 };
43444
43445 class pass_analyze_swaps : public rtl_opt_pass
43446 {
43447 public:
43448 pass_analyze_swaps(gcc::context *ctxt)
43449 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
43450 {}
43451
43452 /* opt_pass methods: */
43453 virtual bool gate (function *)
43454 {
43455 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
43456 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
43457 }
43458
43459 virtual unsigned int execute (function *fun)
43460 {
43461 return rs6000_analyze_swaps (fun);
43462 }
43463
43464 opt_pass *clone ()
43465 {
43466 return new pass_analyze_swaps (m_ctxt);
43467 }
43468
43469 }; // class pass_analyze_swaps
43470
43471 rtl_opt_pass *
43472 make_pass_analyze_swaps (gcc::context *ctxt)
43473 {
43474 return new pass_analyze_swaps (ctxt);
43475 }
43476
43477 #ifdef RS6000_GLIBC_ATOMIC_FENV
43478 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
43479 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
43480 #endif
43481
43482 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
43483
43484 static void
43485 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
43486 {
43487 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
43488 {
43489 #ifdef RS6000_GLIBC_ATOMIC_FENV
43490 if (atomic_hold_decl == NULL_TREE)
43491 {
43492 atomic_hold_decl
43493 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43494 get_identifier ("__atomic_feholdexcept"),
43495 build_function_type_list (void_type_node,
43496 double_ptr_type_node,
43497 NULL_TREE));
43498 TREE_PUBLIC (atomic_hold_decl) = 1;
43499 DECL_EXTERNAL (atomic_hold_decl) = 1;
43500 }
43501
43502 if (atomic_clear_decl == NULL_TREE)
43503 {
43504 atomic_clear_decl
43505 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43506 get_identifier ("__atomic_feclearexcept"),
43507 build_function_type_list (void_type_node,
43508 NULL_TREE));
43509 TREE_PUBLIC (atomic_clear_decl) = 1;
43510 DECL_EXTERNAL (atomic_clear_decl) = 1;
43511 }
43512
43513 tree const_double = build_qualified_type (double_type_node,
43514 TYPE_QUAL_CONST);
43515 tree const_double_ptr = build_pointer_type (const_double);
43516 if (atomic_update_decl == NULL_TREE)
43517 {
43518 atomic_update_decl
43519 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
43520 get_identifier ("__atomic_feupdateenv"),
43521 build_function_type_list (void_type_node,
43522 const_double_ptr,
43523 NULL_TREE));
43524 TREE_PUBLIC (atomic_update_decl) = 1;
43525 DECL_EXTERNAL (atomic_update_decl) = 1;
43526 }
43527
43528 tree fenv_var = create_tmp_var_raw (double_type_node);
43529 TREE_ADDRESSABLE (fenv_var) = 1;
43530 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
43531
43532 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
43533 *clear = build_call_expr (atomic_clear_decl, 0);
43534 *update = build_call_expr (atomic_update_decl, 1,
43535 fold_convert (const_double_ptr, fenv_addr));
43536 #endif
43537 return;
43538 }
43539
43540 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
43541 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
43542 tree call_mffs = build_call_expr (mffs, 0);
43543
43544 /* Generates the equivalent of feholdexcept (&fenv_var)
43545
43546 *fenv_var = __builtin_mffs ();
43547 double fenv_hold;
43548 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
43549 __builtin_mtfsf (0xff, fenv_hold); */
43550
43551 /* Mask to clear everything except for the rounding modes and non-IEEE
43552 arithmetic flag. */
43553 const unsigned HOST_WIDE_INT hold_exception_mask =
43554 HOST_WIDE_INT_C (0xffffffff00000007);
43555
43556 tree fenv_var = create_tmp_var_raw (double_type_node);
43557
43558 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
43559
43560 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
43561 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43562 build_int_cst (uint64_type_node,
43563 hold_exception_mask));
43564
43565 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43566 fenv_llu_and);
43567
43568 tree hold_mtfsf = build_call_expr (mtfsf, 2,
43569 build_int_cst (unsigned_type_node, 0xff),
43570 fenv_hold_mtfsf);
43571
43572 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
43573
43574 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
43575
43576 double fenv_clear = __builtin_mffs ();
43577 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
43578 __builtin_mtfsf (0xff, fenv_clear); */
43579
43580 /* Mask to clear everything except for the rounding modes and non-IEEE
43581 arithmetic flag. */
43582 const unsigned HOST_WIDE_INT clear_exception_mask =
43583 HOST_WIDE_INT_C (0xffffffff00000000);
43584
43585 tree fenv_clear = create_tmp_var_raw (double_type_node);
43586
43587 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
43588
43589 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
43590 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
43591 fenv_clean_llu,
43592 build_int_cst (uint64_type_node,
43593 clear_exception_mask));
43594
43595 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43596 fenv_clear_llu_and);
43597
43598 tree clear_mtfsf = build_call_expr (mtfsf, 2,
43599 build_int_cst (unsigned_type_node, 0xff),
43600 fenv_clear_mtfsf);
43601
43602 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
43603
43604 /* Generates the equivalent of feupdateenv (&fenv_var)
43605
43606 double old_fenv = __builtin_mffs ();
43607 double fenv_update;
43608 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
43609 (*(uint64_t*)fenv_var 0x1ff80fff);
43610 __builtin_mtfsf (0xff, fenv_update); */
43611
43612 const unsigned HOST_WIDE_INT update_exception_mask =
43613 HOST_WIDE_INT_C (0xffffffff1fffff00);
43614 const unsigned HOST_WIDE_INT new_exception_mask =
43615 HOST_WIDE_INT_C (0x1ff80fff);
43616
43617 tree old_fenv = create_tmp_var_raw (double_type_node);
43618 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
43619
43620 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
43621 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
43622 build_int_cst (uint64_type_node,
43623 update_exception_mask));
43624
43625 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
43626 build_int_cst (uint64_type_node,
43627 new_exception_mask));
43628
43629 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
43630 old_llu_and, new_llu_and);
43631
43632 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
43633 new_llu_mask);
43634
43635 tree update_mtfsf = build_call_expr (mtfsf, 2,
43636 build_int_cst (unsigned_type_node, 0xff),
43637 fenv_update_mtfsf);
43638
43639 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
43640 }
43641
43642 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
43643
43644 static bool
43645 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
43646 optimization_type opt_type)
43647 {
43648 switch (op)
43649 {
43650 case rsqrt_optab:
43651 return (opt_type == OPTIMIZE_FOR_SPEED
43652 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
43653
43654 default:
43655 return true;
43656 }
43657 }
43658 \f
43659 struct gcc_target targetm = TARGET_INITIALIZER;
43660
43661 #include "gt-powerpcspe.h"