]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
Remove enum before machine_mode
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "tree-ssa-propagate.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 #ifndef TARGET_NO_PROTOTYPE
86 #define TARGET_NO_PROTOTYPE 0
87 #endif
88
89 #define min(A,B) ((A) < (B) ? (A) : (B))
90 #define max(A,B) ((A) > (B) ? (A) : (B))
91
92 /* Structure used to define the rs6000 stack */
93 typedef struct rs6000_stack {
94 int reload_completed; /* stack info won't change from here on */
95 int first_gp_reg_save; /* first callee saved GP register used */
96 int first_fp_reg_save; /* first callee saved FP register used */
97 int first_altivec_reg_save; /* first callee saved AltiVec register used */
98 int lr_save_p; /* true if the link reg needs to be saved */
99 int cr_save_p; /* true if the CR reg needs to be saved */
100 unsigned int vrsave_mask; /* mask of vec registers to save */
101 int push_p; /* true if we need to allocate stack space */
102 int calls_p; /* true if the function makes any calls */
103 int world_save_p; /* true if we're saving *everything*:
104 r13-r31, cr, f14-f31, vrsave, v20-v31 */
105 enum rs6000_abi abi; /* which ABI to use */
106 int gp_save_offset; /* offset to save GP regs from initial SP */
107 int fp_save_offset; /* offset to save FP regs from initial SP */
108 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
109 int lr_save_offset; /* offset to save LR from initial SP */
110 int cr_save_offset; /* offset to save CR from initial SP */
111 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
112 int varargs_save_offset; /* offset to save the varargs registers */
113 int ehrd_offset; /* offset to EH return data */
114 int ehcr_offset; /* offset to EH CR field data */
115 int reg_size; /* register size (4 or 8) */
116 HOST_WIDE_INT vars_size; /* variable save area size */
117 int parm_size; /* outgoing parameter size */
118 int save_size; /* save area size */
119 int fixed_size; /* fixed size of stack frame */
120 int gp_size; /* size of saved GP registers */
121 int fp_size; /* size of saved FP registers */
122 int altivec_size; /* size of saved AltiVec registers */
123 int cr_size; /* size to hold CR if not in fixed area */
124 int vrsave_size; /* size to hold VRSAVE */
125 int altivec_padding_size; /* size of altivec alignment padding */
126 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
127 int savres_strategy;
128 } rs6000_stack_t;
129
130 /* A C structure for machine-specific, per-function data.
131 This is added to the cfun structure. */
132 typedef struct GTY(()) machine_function
133 {
134 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
135 int ra_needs_full_frame;
136 /* Flags if __builtin_return_address (0) was used. */
137 int ra_need_lr;
138 /* Cache lr_save_p after expansion of builtin_eh_return. */
139 int lr_save_state;
140 /* Whether we need to save the TOC to the reserved stack location in the
141 function prologue. */
142 bool save_toc_in_prologue;
143 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
144 varargs save area. */
145 HOST_WIDE_INT varargs_save_offset;
146 /* Temporary stack slot to use for SDmode copies. This slot is
147 64-bits wide and is allocated early enough so that the offset
148 does not overflow the 16-bit load/store offset field. */
149 rtx sdmode_stack_slot;
150 /* Alternative internal arg pointer for -fsplit-stack. */
151 rtx split_stack_arg_pointer;
152 bool split_stack_argp_used;
153 /* Flag if r2 setup is needed with ELFv2 ABI. */
154 bool r2_setup_needed;
155 /* The number of components we use for separate shrink-wrapping. */
156 int n_components;
157 /* The components already handled by separate shrink-wrapping, which should
158 not be considered by the prologue and epilogue. */
159 bool gpr_is_wrapped_separately[32];
160 bool fpr_is_wrapped_separately[32];
161 bool lr_is_wrapped_separately;
162 } machine_function;
163
164 /* Support targetm.vectorize.builtin_mask_for_load. */
165 static GTY(()) tree altivec_builtin_mask_for_load;
166
167 /* Set to nonzero once AIX common-mode calls have been defined. */
168 static GTY(()) int common_mode_defined;
169
170 /* Label number of label created for -mrelocatable, to call to so we can
171 get the address of the GOT section */
172 static int rs6000_pic_labelno;
173
174 #ifdef USING_ELFOS_H
175 /* Counter for labels which are to be placed in .fixup. */
176 int fixuplabelno = 0;
177 #endif
178
179 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
180 int dot_symbols;
181
182 /* Specify the machine mode that pointers have. After generation of rtl, the
183 compiler makes no further distinction between pointers and any other objects
184 of this machine mode. The type is unsigned since not all things that
185 include rs6000.h also include machmode.h. */
186 unsigned rs6000_pmode;
187
188 /* Width in bits of a pointer. */
189 unsigned rs6000_pointer_size;
190
191 #ifdef HAVE_AS_GNU_ATTRIBUTE
192 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
193 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
194 # endif
195 /* Flag whether floating point values have been passed/returned.
196 Note that this doesn't say whether fprs are used, since the
197 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
198 should be set for soft-float values passed in gprs and ieee128
199 values passed in vsx registers. */
200 static bool rs6000_passes_float;
201 static bool rs6000_passes_long_double;
202 /* Flag whether vector values have been passed/returned. */
203 static bool rs6000_passes_vector;
204 /* Flag whether small (<= 8 byte) structures have been returned. */
205 static bool rs6000_returns_struct;
206 #endif
207
208 /* Value is TRUE if register/mode pair is acceptable. */
209 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
210
211 /* Maximum number of registers needed for a given register class and mode. */
212 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
213
214 /* How many registers are needed for a given register and mode. */
215 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
216
217 /* Map register number to register class. */
218 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
219
220 static int dbg_cost_ctrl;
221
222 /* Built in types. */
223 tree rs6000_builtin_types[RS6000_BTI_MAX];
224 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
225
226 /* Flag to say the TOC is initialized */
227 int toc_initialized, need_toc_init;
228 char toc_label_name[10];
229
230 /* Cached value of rs6000_variable_issue. This is cached in
231 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
232 static short cached_can_issue_more;
233
234 static GTY(()) section *read_only_data_section;
235 static GTY(()) section *private_data_section;
236 static GTY(()) section *tls_data_section;
237 static GTY(()) section *tls_private_data_section;
238 static GTY(()) section *read_only_private_data_section;
239 static GTY(()) section *sdata2_section;
240 static GTY(()) section *toc_section;
241
242 struct builtin_description
243 {
244 const HOST_WIDE_INT mask;
245 const enum insn_code icode;
246 const char *const name;
247 const enum rs6000_builtins code;
248 };
249
250 /* Describe the vector unit used for modes. */
251 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
252 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
253
254 /* Register classes for various constraints that are based on the target
255 switches. */
256 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
257
258 /* Describe the alignment of a vector. */
259 int rs6000_vector_align[NUM_MACHINE_MODES];
260
261 /* Map selected modes to types for builtins. */
262 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
263
264 /* What modes to automatically generate reciprocal divide estimate (fre) and
265 reciprocal sqrt (frsqrte) for. */
266 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
267
268 /* Masks to determine which reciprocal esitmate instructions to generate
269 automatically. */
270 enum rs6000_recip_mask {
271 RECIP_SF_DIV = 0x001, /* Use divide estimate */
272 RECIP_DF_DIV = 0x002,
273 RECIP_V4SF_DIV = 0x004,
274 RECIP_V2DF_DIV = 0x008,
275
276 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
277 RECIP_DF_RSQRT = 0x020,
278 RECIP_V4SF_RSQRT = 0x040,
279 RECIP_V2DF_RSQRT = 0x080,
280
281 /* Various combination of flags for -mrecip=xxx. */
282 RECIP_NONE = 0,
283 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
284 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
285 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
286
287 RECIP_HIGH_PRECISION = RECIP_ALL,
288
289 /* On low precision machines like the power5, don't enable double precision
290 reciprocal square root estimate, since it isn't accurate enough. */
291 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
292 };
293
294 /* -mrecip options. */
295 static struct
296 {
297 const char *string; /* option name */
298 unsigned int mask; /* mask bits to set */
299 } recip_options[] = {
300 { "all", RECIP_ALL },
301 { "none", RECIP_NONE },
302 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
303 | RECIP_V2DF_DIV) },
304 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
305 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
306 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
307 | RECIP_V2DF_RSQRT) },
308 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
309 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
310 };
311
312 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
313 static const struct
314 {
315 const char *cpu;
316 unsigned int cpuid;
317 } cpu_is_info[] = {
318 { "power9", PPC_PLATFORM_POWER9 },
319 { "power8", PPC_PLATFORM_POWER8 },
320 { "power7", PPC_PLATFORM_POWER7 },
321 { "power6x", PPC_PLATFORM_POWER6X },
322 { "power6", PPC_PLATFORM_POWER6 },
323 { "power5+", PPC_PLATFORM_POWER5_PLUS },
324 { "power5", PPC_PLATFORM_POWER5 },
325 { "ppc970", PPC_PLATFORM_PPC970 },
326 { "power4", PPC_PLATFORM_POWER4 },
327 { "ppca2", PPC_PLATFORM_PPCA2 },
328 { "ppc476", PPC_PLATFORM_PPC476 },
329 { "ppc464", PPC_PLATFORM_PPC464 },
330 { "ppc440", PPC_PLATFORM_PPC440 },
331 { "ppc405", PPC_PLATFORM_PPC405 },
332 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
333 };
334
335 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
336 static const struct
337 {
338 const char *hwcap;
339 int mask;
340 unsigned int id;
341 } cpu_supports_info[] = {
342 /* AT_HWCAP masks. */
343 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
344 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
345 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
346 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
347 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
348 { "booke", PPC_FEATURE_BOOKE, 0 },
349 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
350 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
351 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
352 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
353 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
354 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
355 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
356 { "notb", PPC_FEATURE_NO_TB, 0 },
357 { "pa6t", PPC_FEATURE_PA6T, 0 },
358 { "power4", PPC_FEATURE_POWER4, 0 },
359 { "power5", PPC_FEATURE_POWER5, 0 },
360 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
361 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
362 { "ppc32", PPC_FEATURE_32, 0 },
363 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
364 { "ppc64", PPC_FEATURE_64, 0 },
365 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
366 { "smt", PPC_FEATURE_SMT, 0 },
367 { "spe", PPC_FEATURE_HAS_SPE, 0 },
368 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
369 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
370 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
371
372 /* AT_HWCAP2 masks. */
373 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
374 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
375 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
376 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
377 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
378 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
379 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
380 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
381 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
382 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }
383 };
384
385 /* On PowerPC, we have a limited number of target clones that we care about
386 which means we can use an array to hold the options, rather than having more
387 elaborate data structures to identify each possible variation. Order the
388 clones from the default to the highest ISA. */
389 enum {
390 CLONE_DEFAULT = 0, /* default clone. */
391 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
392 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
393 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
394 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
395 CLONE_MAX
396 };
397
398 /* Map compiler ISA bits into HWCAP names. */
399 struct clone_map {
400 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
401 const char *name; /* name to use in __builtin_cpu_supports. */
402 };
403
404 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
405 { 0, "" }, /* Default options. */
406 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
407 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
408 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
409 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
410 };
411
412
413 /* Newer LIBCs explicitly export this symbol to declare that they provide
414 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
415 reference to this symbol whenever we expand a CPU builtin, so that
416 we never link against an old LIBC. */
417 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
418
419 /* True if we have expanded a CPU builtin. */
420 bool cpu_builtin_p;
421
422 /* Pointer to function (in rs6000-c.c) that can define or undefine target
423 macros that have changed. Languages that don't support the preprocessor
424 don't link in rs6000-c.c, so we can't call it directly. */
425 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
426
427 /* Simplfy register classes into simpler classifications. We assume
428 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
429 check for standard register classes (gpr/floating/altivec/vsx) and
430 floating/vector classes (float/altivec/vsx). */
431
432 enum rs6000_reg_type {
433 NO_REG_TYPE,
434 PSEUDO_REG_TYPE,
435 GPR_REG_TYPE,
436 VSX_REG_TYPE,
437 ALTIVEC_REG_TYPE,
438 FPR_REG_TYPE,
439 SPR_REG_TYPE,
440 CR_REG_TYPE,
441 };
442
443 /* Map register class to register type. */
444 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
445
446 /* First/last register type for the 'normal' register types (i.e. general
447 purpose, floating point, altivec, and VSX registers). */
448 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
449
450 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
451
452
453 /* Register classes we care about in secondary reload or go if legitimate
454 address. We only need to worry about GPR, FPR, and Altivec registers here,
455 along an ANY field that is the OR of the 3 register classes. */
456
457 enum rs6000_reload_reg_type {
458 RELOAD_REG_GPR, /* General purpose registers. */
459 RELOAD_REG_FPR, /* Traditional floating point regs. */
460 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
461 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
462 N_RELOAD_REG
463 };
464
465 /* For setting up register classes, loop through the 3 register classes mapping
466 into real registers, and skip the ANY class, which is just an OR of the
467 bits. */
468 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
469 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
470
471 /* Map reload register type to a register in the register class. */
472 struct reload_reg_map_type {
473 const char *name; /* Register class name. */
474 int reg; /* Register in the register class. */
475 };
476
477 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
478 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
479 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
480 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
481 { "Any", -1 }, /* RELOAD_REG_ANY. */
482 };
483
484 /* Mask bits for each register class, indexed per mode. Historically the
485 compiler has been more restrictive which types can do PRE_MODIFY instead of
486 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
487 typedef unsigned char addr_mask_type;
488
489 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
490 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
491 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
492 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
493 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
494 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
495 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
496 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
497
498 /* Register type masks based on the type, of valid addressing modes. */
499 struct rs6000_reg_addr {
500 enum insn_code reload_load; /* INSN to reload for loading. */
501 enum insn_code reload_store; /* INSN to reload for storing. */
502 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
503 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
504 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
505 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
506 /* INSNs for fusing addi with loads
507 or stores for each reg. class. */
508 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
509 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
510 /* INSNs for fusing addis with loads
511 or stores for each reg. class. */
512 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
513 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
514 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
515 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
516 bool fused_toc; /* Mode supports TOC fusion. */
517 };
518
519 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
520
521 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
522 static inline bool
523 mode_supports_pre_incdec_p (machine_mode mode)
524 {
525 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
526 != 0);
527 }
528
529 /* Helper function to say whether a mode supports PRE_MODIFY. */
530 static inline bool
531 mode_supports_pre_modify_p (machine_mode mode)
532 {
533 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
534 != 0);
535 }
536
537 /* Given that there exists at least one variable that is set (produced)
538 by OUT_INSN and read (consumed) by IN_INSN, return true iff
539 IN_INSN represents one or more memory store operations and none of
540 the variables set by OUT_INSN is used by IN_INSN as the address of a
541 store operation. If either IN_INSN or OUT_INSN does not represent
542 a "single" RTL SET expression (as loosely defined by the
543 implementation of the single_set function) or a PARALLEL with only
544 SETs, CLOBBERs, and USEs inside, this function returns false.
545
546 This rs6000-specific version of store_data_bypass_p checks for
547 certain conditions that result in assertion failures (and internal
548 compiler errors) in the generic store_data_bypass_p function and
549 returns false rather than calling store_data_bypass_p if one of the
550 problematic conditions is detected. */
551
552 int
553 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
554 {
555 rtx out_set, in_set;
556 rtx out_pat, in_pat;
557 rtx out_exp, in_exp;
558 int i, j;
559
560 in_set = single_set (in_insn);
561 if (in_set)
562 {
563 if (MEM_P (SET_DEST (in_set)))
564 {
565 out_set = single_set (out_insn);
566 if (!out_set)
567 {
568 out_pat = PATTERN (out_insn);
569 if (GET_CODE (out_pat) == PARALLEL)
570 {
571 for (i = 0; i < XVECLEN (out_pat, 0); i++)
572 {
573 out_exp = XVECEXP (out_pat, 0, i);
574 if ((GET_CODE (out_exp) == CLOBBER)
575 || (GET_CODE (out_exp) == USE))
576 continue;
577 else if (GET_CODE (out_exp) != SET)
578 return false;
579 }
580 }
581 }
582 }
583 }
584 else
585 {
586 in_pat = PATTERN (in_insn);
587 if (GET_CODE (in_pat) != PARALLEL)
588 return false;
589
590 for (i = 0; i < XVECLEN (in_pat, 0); i++)
591 {
592 in_exp = XVECEXP (in_pat, 0, i);
593 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
594 continue;
595 else if (GET_CODE (in_exp) != SET)
596 return false;
597
598 if (MEM_P (SET_DEST (in_exp)))
599 {
600 out_set = single_set (out_insn);
601 if (!out_set)
602 {
603 out_pat = PATTERN (out_insn);
604 if (GET_CODE (out_pat) != PARALLEL)
605 return false;
606 for (j = 0; j < XVECLEN (out_pat, 0); j++)
607 {
608 out_exp = XVECEXP (out_pat, 0, j);
609 if ((GET_CODE (out_exp) == CLOBBER)
610 || (GET_CODE (out_exp) == USE))
611 continue;
612 else if (GET_CODE (out_exp) != SET)
613 return false;
614 }
615 }
616 }
617 }
618 }
619 return store_data_bypass_p (out_insn, in_insn);
620 }
621
622 /* Return true if we have D-form addressing in altivec registers. */
623 static inline bool
624 mode_supports_vmx_dform (machine_mode mode)
625 {
626 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
627 }
628
629 /* Return true if we have D-form addressing in VSX registers. This addressing
630 is more limited than normal d-form addressing in that the offset must be
631 aligned on a 16-byte boundary. */
632 static inline bool
633 mode_supports_vsx_dform_quad (machine_mode mode)
634 {
635 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
636 != 0);
637 }
638
639 \f
640 /* Target cpu costs. */
641
642 struct processor_costs {
643 const int mulsi; /* cost of SImode multiplication. */
644 const int mulsi_const; /* cost of SImode multiplication by constant. */
645 const int mulsi_const9; /* cost of SImode mult by short constant. */
646 const int muldi; /* cost of DImode multiplication. */
647 const int divsi; /* cost of SImode division. */
648 const int divdi; /* cost of DImode division. */
649 const int fp; /* cost of simple SFmode and DFmode insns. */
650 const int dmul; /* cost of DFmode multiplication (and fmadd). */
651 const int sdiv; /* cost of SFmode division (fdivs). */
652 const int ddiv; /* cost of DFmode division (fdiv). */
653 const int cache_line_size; /* cache line size in bytes. */
654 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
655 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
656 const int simultaneous_prefetches; /* number of parallel prefetch
657 operations. */
658 const int sfdf_convert; /* cost of SF->DF conversion. */
659 };
660
661 const struct processor_costs *rs6000_cost;
662
663 /* Processor costs (relative to an add) */
664
665 /* Instruction size costs on 32bit processors. */
666 static const
667 struct processor_costs size32_cost = {
668 COSTS_N_INSNS (1), /* mulsi */
669 COSTS_N_INSNS (1), /* mulsi_const */
670 COSTS_N_INSNS (1), /* mulsi_const9 */
671 COSTS_N_INSNS (1), /* muldi */
672 COSTS_N_INSNS (1), /* divsi */
673 COSTS_N_INSNS (1), /* divdi */
674 COSTS_N_INSNS (1), /* fp */
675 COSTS_N_INSNS (1), /* dmul */
676 COSTS_N_INSNS (1), /* sdiv */
677 COSTS_N_INSNS (1), /* ddiv */
678 32, /* cache line size */
679 0, /* l1 cache */
680 0, /* l2 cache */
681 0, /* streams */
682 0, /* SF->DF convert */
683 };
684
685 /* Instruction size costs on 64bit processors. */
686 static const
687 struct processor_costs size64_cost = {
688 COSTS_N_INSNS (1), /* mulsi */
689 COSTS_N_INSNS (1), /* mulsi_const */
690 COSTS_N_INSNS (1), /* mulsi_const9 */
691 COSTS_N_INSNS (1), /* muldi */
692 COSTS_N_INSNS (1), /* divsi */
693 COSTS_N_INSNS (1), /* divdi */
694 COSTS_N_INSNS (1), /* fp */
695 COSTS_N_INSNS (1), /* dmul */
696 COSTS_N_INSNS (1), /* sdiv */
697 COSTS_N_INSNS (1), /* ddiv */
698 128, /* cache line size */
699 0, /* l1 cache */
700 0, /* l2 cache */
701 0, /* streams */
702 0, /* SF->DF convert */
703 };
704
705 /* Instruction costs on RS64A processors. */
706 static const
707 struct processor_costs rs64a_cost = {
708 COSTS_N_INSNS (20), /* mulsi */
709 COSTS_N_INSNS (12), /* mulsi_const */
710 COSTS_N_INSNS (8), /* mulsi_const9 */
711 COSTS_N_INSNS (34), /* muldi */
712 COSTS_N_INSNS (65), /* divsi */
713 COSTS_N_INSNS (67), /* divdi */
714 COSTS_N_INSNS (4), /* fp */
715 COSTS_N_INSNS (4), /* dmul */
716 COSTS_N_INSNS (31), /* sdiv */
717 COSTS_N_INSNS (31), /* ddiv */
718 128, /* cache line size */
719 128, /* l1 cache */
720 2048, /* l2 cache */
721 1, /* streams */
722 0, /* SF->DF convert */
723 };
724
725 /* Instruction costs on MPCCORE processors. */
726 static const
727 struct processor_costs mpccore_cost = {
728 COSTS_N_INSNS (2), /* mulsi */
729 COSTS_N_INSNS (2), /* mulsi_const */
730 COSTS_N_INSNS (2), /* mulsi_const9 */
731 COSTS_N_INSNS (2), /* muldi */
732 COSTS_N_INSNS (6), /* divsi */
733 COSTS_N_INSNS (6), /* divdi */
734 COSTS_N_INSNS (4), /* fp */
735 COSTS_N_INSNS (5), /* dmul */
736 COSTS_N_INSNS (10), /* sdiv */
737 COSTS_N_INSNS (17), /* ddiv */
738 32, /* cache line size */
739 4, /* l1 cache */
740 16, /* l2 cache */
741 1, /* streams */
742 0, /* SF->DF convert */
743 };
744
745 /* Instruction costs on PPC403 processors. */
746 static const
747 struct processor_costs ppc403_cost = {
748 COSTS_N_INSNS (4), /* mulsi */
749 COSTS_N_INSNS (4), /* mulsi_const */
750 COSTS_N_INSNS (4), /* mulsi_const9 */
751 COSTS_N_INSNS (4), /* muldi */
752 COSTS_N_INSNS (33), /* divsi */
753 COSTS_N_INSNS (33), /* divdi */
754 COSTS_N_INSNS (11), /* fp */
755 COSTS_N_INSNS (11), /* dmul */
756 COSTS_N_INSNS (11), /* sdiv */
757 COSTS_N_INSNS (11), /* ddiv */
758 32, /* cache line size */
759 4, /* l1 cache */
760 16, /* l2 cache */
761 1, /* streams */
762 0, /* SF->DF convert */
763 };
764
765 /* Instruction costs on PPC405 processors. */
766 static const
767 struct processor_costs ppc405_cost = {
768 COSTS_N_INSNS (5), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (3), /* mulsi_const9 */
771 COSTS_N_INSNS (5), /* muldi */
772 COSTS_N_INSNS (35), /* divsi */
773 COSTS_N_INSNS (35), /* divdi */
774 COSTS_N_INSNS (11), /* fp */
775 COSTS_N_INSNS (11), /* dmul */
776 COSTS_N_INSNS (11), /* sdiv */
777 COSTS_N_INSNS (11), /* ddiv */
778 32, /* cache line size */
779 16, /* l1 cache */
780 128, /* l2 cache */
781 1, /* streams */
782 0, /* SF->DF convert */
783 };
784
785 /* Instruction costs on PPC440 processors. */
786 static const
787 struct processor_costs ppc440_cost = {
788 COSTS_N_INSNS (3), /* mulsi */
789 COSTS_N_INSNS (2), /* mulsi_const */
790 COSTS_N_INSNS (2), /* mulsi_const9 */
791 COSTS_N_INSNS (3), /* muldi */
792 COSTS_N_INSNS (34), /* divsi */
793 COSTS_N_INSNS (34), /* divdi */
794 COSTS_N_INSNS (5), /* fp */
795 COSTS_N_INSNS (5), /* dmul */
796 COSTS_N_INSNS (19), /* sdiv */
797 COSTS_N_INSNS (33), /* ddiv */
798 32, /* cache line size */
799 32, /* l1 cache */
800 256, /* l2 cache */
801 1, /* streams */
802 0, /* SF->DF convert */
803 };
804
805 /* Instruction costs on PPC476 processors. */
806 static const
807 struct processor_costs ppc476_cost = {
808 COSTS_N_INSNS (4), /* mulsi */
809 COSTS_N_INSNS (4), /* mulsi_const */
810 COSTS_N_INSNS (4), /* mulsi_const9 */
811 COSTS_N_INSNS (4), /* muldi */
812 COSTS_N_INSNS (11), /* divsi */
813 COSTS_N_INSNS (11), /* divdi */
814 COSTS_N_INSNS (6), /* fp */
815 COSTS_N_INSNS (6), /* dmul */
816 COSTS_N_INSNS (19), /* sdiv */
817 COSTS_N_INSNS (33), /* ddiv */
818 32, /* l1 cache line size */
819 32, /* l1 cache */
820 512, /* l2 cache */
821 1, /* streams */
822 0, /* SF->DF convert */
823 };
824
825 /* Instruction costs on PPC601 processors. */
826 static const
827 struct processor_costs ppc601_cost = {
828 COSTS_N_INSNS (5), /* mulsi */
829 COSTS_N_INSNS (5), /* mulsi_const */
830 COSTS_N_INSNS (5), /* mulsi_const9 */
831 COSTS_N_INSNS (5), /* muldi */
832 COSTS_N_INSNS (36), /* divsi */
833 COSTS_N_INSNS (36), /* divdi */
834 COSTS_N_INSNS (4), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (17), /* sdiv */
837 COSTS_N_INSNS (31), /* ddiv */
838 32, /* cache line size */
839 32, /* l1 cache */
840 256, /* l2 cache */
841 1, /* streams */
842 0, /* SF->DF convert */
843 };
844
845 /* Instruction costs on PPC603 processors. */
846 static const
847 struct processor_costs ppc603_cost = {
848 COSTS_N_INSNS (5), /* mulsi */
849 COSTS_N_INSNS (3), /* mulsi_const */
850 COSTS_N_INSNS (2), /* mulsi_const9 */
851 COSTS_N_INSNS (5), /* muldi */
852 COSTS_N_INSNS (37), /* divsi */
853 COSTS_N_INSNS (37), /* divdi */
854 COSTS_N_INSNS (3), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (18), /* sdiv */
857 COSTS_N_INSNS (33), /* ddiv */
858 32, /* cache line size */
859 8, /* l1 cache */
860 64, /* l2 cache */
861 1, /* streams */
862 0, /* SF->DF convert */
863 };
864
865 /* Instruction costs on PPC604 processors. */
866 static const
867 struct processor_costs ppc604_cost = {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (20), /* divsi */
873 COSTS_N_INSNS (20), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (3), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (32), /* ddiv */
878 32, /* cache line size */
879 16, /* l1 cache */
880 512, /* l2 cache */
881 1, /* streams */
882 0, /* SF->DF convert */
883 };
884
885 /* Instruction costs on PPC604e processors. */
886 static const
887 struct processor_costs ppc604e_cost = {
888 COSTS_N_INSNS (2), /* mulsi */
889 COSTS_N_INSNS (2), /* mulsi_const */
890 COSTS_N_INSNS (2), /* mulsi_const9 */
891 COSTS_N_INSNS (2), /* muldi */
892 COSTS_N_INSNS (20), /* divsi */
893 COSTS_N_INSNS (20), /* divdi */
894 COSTS_N_INSNS (3), /* fp */
895 COSTS_N_INSNS (3), /* dmul */
896 COSTS_N_INSNS (18), /* sdiv */
897 COSTS_N_INSNS (32), /* ddiv */
898 32, /* cache line size */
899 32, /* l1 cache */
900 1024, /* l2 cache */
901 1, /* streams */
902 0, /* SF->DF convert */
903 };
904
905 /* Instruction costs on PPC620 processors. */
906 static const
907 struct processor_costs ppc620_cost = {
908 COSTS_N_INSNS (5), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (3), /* mulsi_const9 */
911 COSTS_N_INSNS (7), /* muldi */
912 COSTS_N_INSNS (21), /* divsi */
913 COSTS_N_INSNS (37), /* divdi */
914 COSTS_N_INSNS (3), /* fp */
915 COSTS_N_INSNS (3), /* dmul */
916 COSTS_N_INSNS (18), /* sdiv */
917 COSTS_N_INSNS (32), /* ddiv */
918 128, /* cache line size */
919 32, /* l1 cache */
920 1024, /* l2 cache */
921 1, /* streams */
922 0, /* SF->DF convert */
923 };
924
925 /* Instruction costs on PPC630 processors. */
926 static const
927 struct processor_costs ppc630_cost = {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (4), /* mulsi_const */
930 COSTS_N_INSNS (3), /* mulsi_const9 */
931 COSTS_N_INSNS (7), /* muldi */
932 COSTS_N_INSNS (21), /* divsi */
933 COSTS_N_INSNS (37), /* divdi */
934 COSTS_N_INSNS (3), /* fp */
935 COSTS_N_INSNS (3), /* dmul */
936 COSTS_N_INSNS (17), /* sdiv */
937 COSTS_N_INSNS (21), /* ddiv */
938 128, /* cache line size */
939 64, /* l1 cache */
940 1024, /* l2 cache */
941 1, /* streams */
942 0, /* SF->DF convert */
943 };
944
945 /* Instruction costs on Cell processor. */
946 /* COSTS_N_INSNS (1) ~ one add. */
947 static const
948 struct processor_costs ppccell_cost = {
949 COSTS_N_INSNS (9/2)+2, /* mulsi */
950 COSTS_N_INSNS (6/2), /* mulsi_const */
951 COSTS_N_INSNS (6/2), /* mulsi_const9 */
952 COSTS_N_INSNS (15/2)+2, /* muldi */
953 COSTS_N_INSNS (38/2), /* divsi */
954 COSTS_N_INSNS (70/2), /* divdi */
955 COSTS_N_INSNS (10/2), /* fp */
956 COSTS_N_INSNS (10/2), /* dmul */
957 COSTS_N_INSNS (74/2), /* sdiv */
958 COSTS_N_INSNS (74/2), /* ddiv */
959 128, /* cache line size */
960 32, /* l1 cache */
961 512, /* l2 cache */
962 6, /* streams */
963 0, /* SF->DF convert */
964 };
965
966 /* Instruction costs on PPC750 and PPC7400 processors. */
967 static const
968 struct processor_costs ppc750_cost = {
969 COSTS_N_INSNS (5), /* mulsi */
970 COSTS_N_INSNS (3), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (5), /* muldi */
973 COSTS_N_INSNS (17), /* divsi */
974 COSTS_N_INSNS (17), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (17), /* sdiv */
978 COSTS_N_INSNS (31), /* ddiv */
979 32, /* cache line size */
980 32, /* l1 cache */
981 512, /* l2 cache */
982 1, /* streams */
983 0, /* SF->DF convert */
984 };
985
986 /* Instruction costs on PPC7450 processors. */
987 static const
988 struct processor_costs ppc7450_cost = {
989 COSTS_N_INSNS (4), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (4), /* muldi */
993 COSTS_N_INSNS (23), /* divsi */
994 COSTS_N_INSNS (23), /* divdi */
995 COSTS_N_INSNS (5), /* fp */
996 COSTS_N_INSNS (5), /* dmul */
997 COSTS_N_INSNS (21), /* sdiv */
998 COSTS_N_INSNS (35), /* ddiv */
999 32, /* cache line size */
1000 32, /* l1 cache */
1001 1024, /* l2 cache */
1002 1, /* streams */
1003 0, /* SF->DF convert */
1004 };
1005
1006 /* Instruction costs on PPC8540 processors. */
1007 static const
1008 struct processor_costs ppc8540_cost = {
1009 COSTS_N_INSNS (4), /* mulsi */
1010 COSTS_N_INSNS (4), /* mulsi_const */
1011 COSTS_N_INSNS (4), /* mulsi_const9 */
1012 COSTS_N_INSNS (4), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (19), /* divdi */
1015 COSTS_N_INSNS (4), /* fp */
1016 COSTS_N_INSNS (4), /* dmul */
1017 COSTS_N_INSNS (29), /* sdiv */
1018 COSTS_N_INSNS (29), /* ddiv */
1019 32, /* cache line size */
1020 32, /* l1 cache */
1021 256, /* l2 cache */
1022 1, /* prefetch streams /*/
1023 0, /* SF->DF convert */
1024 };
1025
1026 /* Instruction costs on E300C2 and E300C3 cores. */
1027 static const
1028 struct processor_costs ppce300c2c3_cost = {
1029 COSTS_N_INSNS (4), /* mulsi */
1030 COSTS_N_INSNS (4), /* mulsi_const */
1031 COSTS_N_INSNS (4), /* mulsi_const9 */
1032 COSTS_N_INSNS (4), /* muldi */
1033 COSTS_N_INSNS (19), /* divsi */
1034 COSTS_N_INSNS (19), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (4), /* dmul */
1037 COSTS_N_INSNS (18), /* sdiv */
1038 COSTS_N_INSNS (33), /* ddiv */
1039 32,
1040 16, /* l1 cache */
1041 16, /* l2 cache */
1042 1, /* prefetch streams /*/
1043 0, /* SF->DF convert */
1044 };
1045
1046 /* Instruction costs on PPCE500MC processors. */
1047 static const
1048 struct processor_costs ppce500mc_cost = {
1049 COSTS_N_INSNS (4), /* mulsi */
1050 COSTS_N_INSNS (4), /* mulsi_const */
1051 COSTS_N_INSNS (4), /* mulsi_const9 */
1052 COSTS_N_INSNS (4), /* muldi */
1053 COSTS_N_INSNS (14), /* divsi */
1054 COSTS_N_INSNS (14), /* divdi */
1055 COSTS_N_INSNS (8), /* fp */
1056 COSTS_N_INSNS (10), /* dmul */
1057 COSTS_N_INSNS (36), /* sdiv */
1058 COSTS_N_INSNS (66), /* ddiv */
1059 64, /* cache line size */
1060 32, /* l1 cache */
1061 128, /* l2 cache */
1062 1, /* prefetch streams /*/
1063 0, /* SF->DF convert */
1064 };
1065
1066 /* Instruction costs on PPCE500MC64 processors. */
1067 static const
1068 struct processor_costs ppce500mc64_cost = {
1069 COSTS_N_INSNS (4), /* mulsi */
1070 COSTS_N_INSNS (4), /* mulsi_const */
1071 COSTS_N_INSNS (4), /* mulsi_const9 */
1072 COSTS_N_INSNS (4), /* muldi */
1073 COSTS_N_INSNS (14), /* divsi */
1074 COSTS_N_INSNS (14), /* divdi */
1075 COSTS_N_INSNS (4), /* fp */
1076 COSTS_N_INSNS (10), /* dmul */
1077 COSTS_N_INSNS (36), /* sdiv */
1078 COSTS_N_INSNS (66), /* ddiv */
1079 64, /* cache line size */
1080 32, /* l1 cache */
1081 128, /* l2 cache */
1082 1, /* prefetch streams /*/
1083 0, /* SF->DF convert */
1084 };
1085
1086 /* Instruction costs on PPCE5500 processors. */
1087 static const
1088 struct processor_costs ppce5500_cost = {
1089 COSTS_N_INSNS (5), /* mulsi */
1090 COSTS_N_INSNS (5), /* mulsi_const */
1091 COSTS_N_INSNS (4), /* mulsi_const9 */
1092 COSTS_N_INSNS (5), /* muldi */
1093 COSTS_N_INSNS (14), /* divsi */
1094 COSTS_N_INSNS (14), /* divdi */
1095 COSTS_N_INSNS (7), /* fp */
1096 COSTS_N_INSNS (10), /* dmul */
1097 COSTS_N_INSNS (36), /* sdiv */
1098 COSTS_N_INSNS (66), /* ddiv */
1099 64, /* cache line size */
1100 32, /* l1 cache */
1101 128, /* l2 cache */
1102 1, /* prefetch streams /*/
1103 0, /* SF->DF convert */
1104 };
1105
1106 /* Instruction costs on PPCE6500 processors. */
1107 static const
1108 struct processor_costs ppce6500_cost = {
1109 COSTS_N_INSNS (5), /* mulsi */
1110 COSTS_N_INSNS (5), /* mulsi_const */
1111 COSTS_N_INSNS (4), /* mulsi_const9 */
1112 COSTS_N_INSNS (5), /* muldi */
1113 COSTS_N_INSNS (14), /* divsi */
1114 COSTS_N_INSNS (14), /* divdi */
1115 COSTS_N_INSNS (7), /* fp */
1116 COSTS_N_INSNS (10), /* dmul */
1117 COSTS_N_INSNS (36), /* sdiv */
1118 COSTS_N_INSNS (66), /* ddiv */
1119 64, /* cache line size */
1120 32, /* l1 cache */
1121 128, /* l2 cache */
1122 1, /* prefetch streams /*/
1123 0, /* SF->DF convert */
1124 };
1125
1126 /* Instruction costs on AppliedMicro Titan processors. */
1127 static const
1128 struct processor_costs titan_cost = {
1129 COSTS_N_INSNS (5), /* mulsi */
1130 COSTS_N_INSNS (5), /* mulsi_const */
1131 COSTS_N_INSNS (5), /* mulsi_const9 */
1132 COSTS_N_INSNS (5), /* muldi */
1133 COSTS_N_INSNS (18), /* divsi */
1134 COSTS_N_INSNS (18), /* divdi */
1135 COSTS_N_INSNS (10), /* fp */
1136 COSTS_N_INSNS (10), /* dmul */
1137 COSTS_N_INSNS (46), /* sdiv */
1138 COSTS_N_INSNS (72), /* ddiv */
1139 32, /* cache line size */
1140 32, /* l1 cache */
1141 512, /* l2 cache */
1142 1, /* prefetch streams /*/
1143 0, /* SF->DF convert */
1144 };
1145
1146 /* Instruction costs on POWER4 and POWER5 processors. */
1147 static const
1148 struct processor_costs power4_cost = {
1149 COSTS_N_INSNS (3), /* mulsi */
1150 COSTS_N_INSNS (2), /* mulsi_const */
1151 COSTS_N_INSNS (2), /* mulsi_const9 */
1152 COSTS_N_INSNS (4), /* muldi */
1153 COSTS_N_INSNS (18), /* divsi */
1154 COSTS_N_INSNS (34), /* divdi */
1155 COSTS_N_INSNS (3), /* fp */
1156 COSTS_N_INSNS (3), /* dmul */
1157 COSTS_N_INSNS (17), /* sdiv */
1158 COSTS_N_INSNS (17), /* ddiv */
1159 128, /* cache line size */
1160 32, /* l1 cache */
1161 1024, /* l2 cache */
1162 8, /* prefetch streams /*/
1163 0, /* SF->DF convert */
1164 };
1165
1166 /* Instruction costs on POWER6 processors. */
1167 static const
1168 struct processor_costs power6_cost = {
1169 COSTS_N_INSNS (8), /* mulsi */
1170 COSTS_N_INSNS (8), /* mulsi_const */
1171 COSTS_N_INSNS (8), /* mulsi_const9 */
1172 COSTS_N_INSNS (8), /* muldi */
1173 COSTS_N_INSNS (22), /* divsi */
1174 COSTS_N_INSNS (28), /* divdi */
1175 COSTS_N_INSNS (3), /* fp */
1176 COSTS_N_INSNS (3), /* dmul */
1177 COSTS_N_INSNS (13), /* sdiv */
1178 COSTS_N_INSNS (16), /* ddiv */
1179 128, /* cache line size */
1180 64, /* l1 cache */
1181 2048, /* l2 cache */
1182 16, /* prefetch streams */
1183 0, /* SF->DF convert */
1184 };
1185
1186 /* Instruction costs on POWER7 processors. */
1187 static const
1188 struct processor_costs power7_cost = {
1189 COSTS_N_INSNS (2), /* mulsi */
1190 COSTS_N_INSNS (2), /* mulsi_const */
1191 COSTS_N_INSNS (2), /* mulsi_const9 */
1192 COSTS_N_INSNS (2), /* muldi */
1193 COSTS_N_INSNS (18), /* divsi */
1194 COSTS_N_INSNS (34), /* divdi */
1195 COSTS_N_INSNS (3), /* fp */
1196 COSTS_N_INSNS (3), /* dmul */
1197 COSTS_N_INSNS (13), /* sdiv */
1198 COSTS_N_INSNS (16), /* ddiv */
1199 128, /* cache line size */
1200 32, /* l1 cache */
1201 256, /* l2 cache */
1202 12, /* prefetch streams */
1203 COSTS_N_INSNS (3), /* SF->DF convert */
1204 };
1205
1206 /* Instruction costs on POWER8 processors. */
1207 static const
1208 struct processor_costs power8_cost = {
1209 COSTS_N_INSNS (3), /* mulsi */
1210 COSTS_N_INSNS (3), /* mulsi_const */
1211 COSTS_N_INSNS (3), /* mulsi_const9 */
1212 COSTS_N_INSNS (3), /* muldi */
1213 COSTS_N_INSNS (19), /* divsi */
1214 COSTS_N_INSNS (35), /* divdi */
1215 COSTS_N_INSNS (3), /* fp */
1216 COSTS_N_INSNS (3), /* dmul */
1217 COSTS_N_INSNS (14), /* sdiv */
1218 COSTS_N_INSNS (17), /* ddiv */
1219 128, /* cache line size */
1220 32, /* l1 cache */
1221 256, /* l2 cache */
1222 12, /* prefetch streams */
1223 COSTS_N_INSNS (3), /* SF->DF convert */
1224 };
1225
1226 /* Instruction costs on POWER9 processors. */
1227 static const
1228 struct processor_costs power9_cost = {
1229 COSTS_N_INSNS (3), /* mulsi */
1230 COSTS_N_INSNS (3), /* mulsi_const */
1231 COSTS_N_INSNS (3), /* mulsi_const9 */
1232 COSTS_N_INSNS (3), /* muldi */
1233 COSTS_N_INSNS (8), /* divsi */
1234 COSTS_N_INSNS (12), /* divdi */
1235 COSTS_N_INSNS (3), /* fp */
1236 COSTS_N_INSNS (3), /* dmul */
1237 COSTS_N_INSNS (13), /* sdiv */
1238 COSTS_N_INSNS (18), /* ddiv */
1239 128, /* cache line size */
1240 32, /* l1 cache */
1241 512, /* l2 cache */
1242 8, /* prefetch streams */
1243 COSTS_N_INSNS (3), /* SF->DF convert */
1244 };
1245
1246 /* Instruction costs on POWER A2 processors. */
1247 static const
1248 struct processor_costs ppca2_cost = {
1249 COSTS_N_INSNS (16), /* mulsi */
1250 COSTS_N_INSNS (16), /* mulsi_const */
1251 COSTS_N_INSNS (16), /* mulsi_const9 */
1252 COSTS_N_INSNS (16), /* muldi */
1253 COSTS_N_INSNS (22), /* divsi */
1254 COSTS_N_INSNS (28), /* divdi */
1255 COSTS_N_INSNS (3), /* fp */
1256 COSTS_N_INSNS (3), /* dmul */
1257 COSTS_N_INSNS (59), /* sdiv */
1258 COSTS_N_INSNS (72), /* ddiv */
1259 64,
1260 16, /* l1 cache */
1261 2048, /* l2 cache */
1262 16, /* prefetch streams */
1263 0, /* SF->DF convert */
1264 };
1265
1266 \f
1267 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1268 #undef RS6000_BUILTIN_0
1269 #undef RS6000_BUILTIN_1
1270 #undef RS6000_BUILTIN_2
1271 #undef RS6000_BUILTIN_3
1272 #undef RS6000_BUILTIN_A
1273 #undef RS6000_BUILTIN_D
1274 #undef RS6000_BUILTIN_H
1275 #undef RS6000_BUILTIN_P
1276 #undef RS6000_BUILTIN_Q
1277 #undef RS6000_BUILTIN_X
1278
1279 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1280 { NAME, ICODE, MASK, ATTR },
1281
1282 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1283 { NAME, ICODE, MASK, ATTR },
1284
1285 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1286 { NAME, ICODE, MASK, ATTR },
1287
1288 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1289 { NAME, ICODE, MASK, ATTR },
1290
1291 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1292 { NAME, ICODE, MASK, ATTR },
1293
1294 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1295 { NAME, ICODE, MASK, ATTR },
1296
1297 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1298 { NAME, ICODE, MASK, ATTR },
1299
1300 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1301 { NAME, ICODE, MASK, ATTR },
1302
1303 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1304 { NAME, ICODE, MASK, ATTR },
1305
1306 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1307 { NAME, ICODE, MASK, ATTR },
1308
1309 struct rs6000_builtin_info_type {
1310 const char *name;
1311 const enum insn_code icode;
1312 const HOST_WIDE_INT mask;
1313 const unsigned attr;
1314 };
1315
1316 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1317 {
1318 #include "rs6000-builtin.def"
1319 };
1320
1321 #undef RS6000_BUILTIN_0
1322 #undef RS6000_BUILTIN_1
1323 #undef RS6000_BUILTIN_2
1324 #undef RS6000_BUILTIN_3
1325 #undef RS6000_BUILTIN_A
1326 #undef RS6000_BUILTIN_D
1327 #undef RS6000_BUILTIN_H
1328 #undef RS6000_BUILTIN_P
1329 #undef RS6000_BUILTIN_Q
1330 #undef RS6000_BUILTIN_X
1331
1332 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1333 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1334
1335 \f
1336 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1337 static struct machine_function * rs6000_init_machine_status (void);
1338 static int rs6000_ra_ever_killed (void);
1339 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1340 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1341 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1342 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1343 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1344 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1345 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1346 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1347 bool);
1348 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1349 unsigned int);
1350 static bool is_microcoded_insn (rtx_insn *);
1351 static bool is_nonpipeline_insn (rtx_insn *);
1352 static bool is_cracked_insn (rtx_insn *);
1353 static bool is_load_insn (rtx, rtx *);
1354 static bool is_store_insn (rtx, rtx *);
1355 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1356 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1357 static bool insn_must_be_first_in_group (rtx_insn *);
1358 static bool insn_must_be_last_in_group (rtx_insn *);
1359 static void altivec_init_builtins (void);
1360 static tree builtin_function_type (machine_mode, machine_mode,
1361 machine_mode, machine_mode,
1362 enum rs6000_builtins, const char *name);
1363 static void rs6000_common_init_builtins (void);
1364 static void paired_init_builtins (void);
1365 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1366 static void htm_init_builtins (void);
1367 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1368 static rs6000_stack_t *rs6000_stack_info (void);
1369 static void is_altivec_return_reg (rtx, void *);
1370 int easy_vector_constant (rtx, machine_mode);
1371 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1372 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1373 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1374 bool, bool);
1375 #if TARGET_MACHO
1376 static void macho_branch_islands (void);
1377 #endif
1378 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1379 int, int *);
1380 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1381 int, int, int *);
1382 static bool rs6000_mode_dependent_address (const_rtx);
1383 static bool rs6000_debug_mode_dependent_address (const_rtx);
1384 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1385 machine_mode, rtx);
1386 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1387 machine_mode,
1388 rtx);
1389 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1390 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1391 enum reg_class);
1392 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1393 machine_mode);
1394 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1395 enum reg_class,
1396 machine_mode);
1397 static bool rs6000_cannot_change_mode_class (machine_mode,
1398 machine_mode,
1399 enum reg_class);
1400 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1401 machine_mode,
1402 enum reg_class);
1403 static bool rs6000_save_toc_in_prologue_p (void);
1404 static rtx rs6000_internal_arg_pointer (void);
1405
1406 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1407 int, int *)
1408 = rs6000_legitimize_reload_address;
1409
1410 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1411 = rs6000_mode_dependent_address;
1412
1413 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1414 machine_mode, rtx)
1415 = rs6000_secondary_reload_class;
1416
1417 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1418 = rs6000_preferred_reload_class;
1419
1420 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1421 machine_mode)
1422 = rs6000_secondary_memory_needed;
1423
1424 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1425 machine_mode,
1426 enum reg_class)
1427 = rs6000_cannot_change_mode_class;
1428
1429 const int INSN_NOT_AVAILABLE = -1;
1430
1431 static void rs6000_print_isa_options (FILE *, int, const char *,
1432 HOST_WIDE_INT);
1433 static void rs6000_print_builtin_options (FILE *, int, const char *,
1434 HOST_WIDE_INT);
1435 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1436
1437 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1438 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1439 enum rs6000_reg_type,
1440 machine_mode,
1441 secondary_reload_info *,
1442 bool);
1443 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1444 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1445 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1446
1447 /* Hash table stuff for keeping track of TOC entries. */
1448
1449 struct GTY((for_user)) toc_hash_struct
1450 {
1451 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1452 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1453 rtx key;
1454 machine_mode key_mode;
1455 int labelno;
1456 };
1457
1458 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1459 {
1460 static hashval_t hash (toc_hash_struct *);
1461 static bool equal (toc_hash_struct *, toc_hash_struct *);
1462 };
1463
1464 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1465
1466 /* Hash table to keep track of the argument types for builtin functions. */
1467
1468 struct GTY((for_user)) builtin_hash_struct
1469 {
1470 tree type;
1471 machine_mode mode[4]; /* return value + 3 arguments. */
1472 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1473 };
1474
1475 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1476 {
1477 static hashval_t hash (builtin_hash_struct *);
1478 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1479 };
1480
1481 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1482
1483 \f
1484 /* Default register names. */
1485 char rs6000_reg_names[][8] =
1486 {
1487 "0", "1", "2", "3", "4", "5", "6", "7",
1488 "8", "9", "10", "11", "12", "13", "14", "15",
1489 "16", "17", "18", "19", "20", "21", "22", "23",
1490 "24", "25", "26", "27", "28", "29", "30", "31",
1491 "0", "1", "2", "3", "4", "5", "6", "7",
1492 "8", "9", "10", "11", "12", "13", "14", "15",
1493 "16", "17", "18", "19", "20", "21", "22", "23",
1494 "24", "25", "26", "27", "28", "29", "30", "31",
1495 "mq", "lr", "ctr","ap",
1496 "0", "1", "2", "3", "4", "5", "6", "7",
1497 "ca",
1498 /* AltiVec registers. */
1499 "0", "1", "2", "3", "4", "5", "6", "7",
1500 "8", "9", "10", "11", "12", "13", "14", "15",
1501 "16", "17", "18", "19", "20", "21", "22", "23",
1502 "24", "25", "26", "27", "28", "29", "30", "31",
1503 "vrsave", "vscr",
1504 /* Soft frame pointer. */
1505 "sfp",
1506 /* HTM SPR registers. */
1507 "tfhar", "tfiar", "texasr"
1508 };
1509
1510 #ifdef TARGET_REGNAMES
1511 static const char alt_reg_names[][8] =
1512 {
1513 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1514 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1515 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1516 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1517 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1518 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1519 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1520 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1521 "mq", "lr", "ctr", "ap",
1522 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1523 "ca",
1524 /* AltiVec registers. */
1525 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1526 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1527 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1528 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1529 "vrsave", "vscr",
1530 /* Soft frame pointer. */
1531 "sfp",
1532 /* HTM SPR registers. */
1533 "tfhar", "tfiar", "texasr"
1534 };
1535 #endif
1536
1537 /* Table of valid machine attributes. */
1538
1539 static const struct attribute_spec rs6000_attribute_table[] =
1540 {
1541 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1542 affects_type_identity } */
1543 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1544 false },
1545 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1546 false },
1547 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1548 false },
1549 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1550 false },
1551 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1552 false },
1553 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1554 SUBTARGET_ATTRIBUTE_TABLE,
1555 #endif
1556 { NULL, 0, 0, false, false, false, NULL, false }
1557 };
1558 \f
1559 #ifndef TARGET_PROFILE_KERNEL
1560 #define TARGET_PROFILE_KERNEL 0
1561 #endif
1562
1563 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1564 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1565 \f
1566 /* Initialize the GCC target structure. */
1567 #undef TARGET_ATTRIBUTE_TABLE
1568 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1569 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1570 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1571 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1572 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1573
1574 #undef TARGET_ASM_ALIGNED_DI_OP
1575 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1576
1577 /* Default unaligned ops are only provided for ELF. Find the ops needed
1578 for non-ELF systems. */
1579 #ifndef OBJECT_FORMAT_ELF
1580 #if TARGET_XCOFF
1581 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1582 64-bit targets. */
1583 #undef TARGET_ASM_UNALIGNED_HI_OP
1584 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1585 #undef TARGET_ASM_UNALIGNED_SI_OP
1586 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1587 #undef TARGET_ASM_UNALIGNED_DI_OP
1588 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1589 #else
1590 /* For Darwin. */
1591 #undef TARGET_ASM_UNALIGNED_HI_OP
1592 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1593 #undef TARGET_ASM_UNALIGNED_SI_OP
1594 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1595 #undef TARGET_ASM_UNALIGNED_DI_OP
1596 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1597 #undef TARGET_ASM_ALIGNED_DI_OP
1598 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1599 #endif
1600 #endif
1601
1602 /* This hook deals with fixups for relocatable code and DI-mode objects
1603 in 64-bit code. */
1604 #undef TARGET_ASM_INTEGER
1605 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1606
1607 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1608 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1609 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1610 #endif
1611
1612 #undef TARGET_SET_UP_BY_PROLOGUE
1613 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1614
1615 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1616 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1617 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1618 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1619 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1620 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1621 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1622 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1623 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1624 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1625 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1626 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1627
1628 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1629 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1630
1631 #undef TARGET_INTERNAL_ARG_POINTER
1632 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1633
1634 #undef TARGET_HAVE_TLS
1635 #define TARGET_HAVE_TLS HAVE_AS_TLS
1636
1637 #undef TARGET_CANNOT_FORCE_CONST_MEM
1638 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1639
1640 #undef TARGET_DELEGITIMIZE_ADDRESS
1641 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1642
1643 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1644 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1645
1646 #undef TARGET_LEGITIMATE_COMBINED_INSN
1647 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1648
1649 #undef TARGET_ASM_FUNCTION_PROLOGUE
1650 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1651 #undef TARGET_ASM_FUNCTION_EPILOGUE
1652 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1653
1654 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1655 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1656
1657 #undef TARGET_LEGITIMIZE_ADDRESS
1658 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1659
1660 #undef TARGET_SCHED_VARIABLE_ISSUE
1661 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1662
1663 #undef TARGET_SCHED_ISSUE_RATE
1664 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1665 #undef TARGET_SCHED_ADJUST_COST
1666 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1667 #undef TARGET_SCHED_ADJUST_PRIORITY
1668 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1669 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1670 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1671 #undef TARGET_SCHED_INIT
1672 #define TARGET_SCHED_INIT rs6000_sched_init
1673 #undef TARGET_SCHED_FINISH
1674 #define TARGET_SCHED_FINISH rs6000_sched_finish
1675 #undef TARGET_SCHED_REORDER
1676 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1677 #undef TARGET_SCHED_REORDER2
1678 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1679
1680 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1681 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1682
1683 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1684 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1685
1686 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1687 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1688 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1689 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1690 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1691 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1692 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1693 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1694
1695 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1696 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1697
1698 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1699 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1700 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1701 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1702 rs6000_builtin_support_vector_misalignment
1703 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1704 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1707 rs6000_builtin_vectorization_cost
1708 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1709 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1710 rs6000_preferred_simd_mode
1711 #undef TARGET_VECTORIZE_INIT_COST
1712 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1713 #undef TARGET_VECTORIZE_ADD_STMT_COST
1714 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1715 #undef TARGET_VECTORIZE_FINISH_COST
1716 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1717 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1718 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1719
1720 #undef TARGET_INIT_BUILTINS
1721 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1722 #undef TARGET_BUILTIN_DECL
1723 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1724
1725 #undef TARGET_FOLD_BUILTIN
1726 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1727 #undef TARGET_GIMPLE_FOLD_BUILTIN
1728 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1729
1730 #undef TARGET_EXPAND_BUILTIN
1731 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1732
1733 #undef TARGET_MANGLE_TYPE
1734 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1735
1736 #undef TARGET_INIT_LIBFUNCS
1737 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1738
1739 #if TARGET_MACHO
1740 #undef TARGET_BINDS_LOCAL_P
1741 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1742 #endif
1743
1744 #undef TARGET_MS_BITFIELD_LAYOUT_P
1745 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1746
1747 #undef TARGET_ASM_OUTPUT_MI_THUNK
1748 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1749
1750 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1751 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1752
1753 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1754 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1755
1756 #undef TARGET_REGISTER_MOVE_COST
1757 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1758 #undef TARGET_MEMORY_MOVE_COST
1759 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1760 #undef TARGET_CANNOT_COPY_INSN_P
1761 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1762 #undef TARGET_RTX_COSTS
1763 #define TARGET_RTX_COSTS rs6000_rtx_costs
1764 #undef TARGET_ADDRESS_COST
1765 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1766
1767 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1768 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1769
1770 #undef TARGET_PROMOTE_FUNCTION_MODE
1771 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1772
1773 #undef TARGET_RETURN_IN_MEMORY
1774 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1775
1776 #undef TARGET_RETURN_IN_MSB
1777 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1778
1779 #undef TARGET_SETUP_INCOMING_VARARGS
1780 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1781
1782 /* Always strict argument naming on rs6000. */
1783 #undef TARGET_STRICT_ARGUMENT_NAMING
1784 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1785 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1786 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1787 #undef TARGET_SPLIT_COMPLEX_ARG
1788 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1789 #undef TARGET_MUST_PASS_IN_STACK
1790 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1791 #undef TARGET_PASS_BY_REFERENCE
1792 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1793 #undef TARGET_ARG_PARTIAL_BYTES
1794 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1795 #undef TARGET_FUNCTION_ARG_ADVANCE
1796 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1797 #undef TARGET_FUNCTION_ARG
1798 #define TARGET_FUNCTION_ARG rs6000_function_arg
1799 #undef TARGET_FUNCTION_ARG_BOUNDARY
1800 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1801
1802 #undef TARGET_BUILD_BUILTIN_VA_LIST
1803 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1804
1805 #undef TARGET_EXPAND_BUILTIN_VA_START
1806 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1807
1808 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1809 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1810
1811 #undef TARGET_EH_RETURN_FILTER_MODE
1812 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1813
1814 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1815 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1816
1817 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1818 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1819
1820 #undef TARGET_FLOATN_MODE
1821 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1822
1823 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1824 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1825
1826 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1827 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1828
1829 #undef TARGET_MD_ASM_ADJUST
1830 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1831
1832 #undef TARGET_OPTION_OVERRIDE
1833 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1834
1835 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1836 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1837 rs6000_builtin_vectorized_function
1838
1839 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1840 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1841 rs6000_builtin_md_vectorized_function
1842
1843 #undef TARGET_STACK_PROTECT_GUARD
1844 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1845
1846 #if !TARGET_MACHO
1847 #undef TARGET_STACK_PROTECT_FAIL
1848 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1849 #endif
1850
1851 #ifdef HAVE_AS_TLS
1852 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1853 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1854 #endif
1855
1856 /* Use a 32-bit anchor range. This leads to sequences like:
1857
1858 addis tmp,anchor,high
1859 add dest,tmp,low
1860
1861 where tmp itself acts as an anchor, and can be shared between
1862 accesses to the same 64k page. */
1863 #undef TARGET_MIN_ANCHOR_OFFSET
1864 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1865 #undef TARGET_MAX_ANCHOR_OFFSET
1866 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1867 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1868 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1869 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1870 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1871
1872 #undef TARGET_BUILTIN_RECIPROCAL
1873 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1874
1875 #undef TARGET_EXPAND_TO_RTL_HOOK
1876 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1877
1878 #undef TARGET_INSTANTIATE_DECLS
1879 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1880
1881 #undef TARGET_SECONDARY_RELOAD
1882 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1883
1884 #undef TARGET_LEGITIMATE_ADDRESS_P
1885 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1886
1887 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1888 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1889
1890 #undef TARGET_LRA_P
1891 #define TARGET_LRA_P rs6000_lra_p
1892
1893 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1894 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1895
1896 #undef TARGET_CAN_ELIMINATE
1897 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1898
1899 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1900 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1901
1902 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1903 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1904
1905 #undef TARGET_TRAMPOLINE_INIT
1906 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1907
1908 #undef TARGET_FUNCTION_VALUE
1909 #define TARGET_FUNCTION_VALUE rs6000_function_value
1910
1911 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1912 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1913
1914 #undef TARGET_OPTION_SAVE
1915 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1916
1917 #undef TARGET_OPTION_RESTORE
1918 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1919
1920 #undef TARGET_OPTION_PRINT
1921 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1922
1923 #undef TARGET_CAN_INLINE_P
1924 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1925
1926 #undef TARGET_SET_CURRENT_FUNCTION
1927 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1928
1929 #undef TARGET_LEGITIMATE_CONSTANT_P
1930 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1931
1932 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1933 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1934
1935 #undef TARGET_CAN_USE_DOLOOP_P
1936 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1937
1938 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1939 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1940
1941 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1942 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1943 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1944 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1945 #undef TARGET_UNWIND_WORD_MODE
1946 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1947
1948 #undef TARGET_OFFLOAD_OPTIONS
1949 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1950
1951 #undef TARGET_C_MODE_FOR_SUFFIX
1952 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1953
1954 #undef TARGET_INVALID_BINARY_OP
1955 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1956
1957 #undef TARGET_OPTAB_SUPPORTED_P
1958 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1959
1960 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1961 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1962
1963 #undef TARGET_COMPARE_VERSION_PRIORITY
1964 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1965
1966 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1967 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1968 rs6000_generate_version_dispatcher_body
1969
1970 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1971 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1972 rs6000_get_function_versions_dispatcher
1973
1974 #undef TARGET_OPTION_FUNCTION_VERSIONS
1975 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1976
1977 \f
1978
1979 /* Processor table. */
1980 struct rs6000_ptt
1981 {
1982 const char *const name; /* Canonical processor name. */
1983 const enum processor_type processor; /* Processor type enum value. */
1984 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1985 };
1986
1987 static struct rs6000_ptt const processor_target_table[] =
1988 {
1989 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1990 #include "rs6000-cpus.def"
1991 #undef RS6000_CPU
1992 };
1993
1994 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1995 name is invalid. */
1996
1997 static int
1998 rs6000_cpu_name_lookup (const char *name)
1999 {
2000 size_t i;
2001
2002 if (name != NULL)
2003 {
2004 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2005 if (! strcmp (name, processor_target_table[i].name))
2006 return (int)i;
2007 }
2008
2009 return -1;
2010 }
2011
2012 \f
2013 /* Return number of consecutive hard regs needed starting at reg REGNO
2014 to hold something of mode MODE.
2015 This is ordinarily the length in words of a value of mode MODE
2016 but can be less for certain modes in special long registers.
2017
2018 POWER and PowerPC GPRs hold 32 bits worth;
2019 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2020
2021 static int
2022 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2023 {
2024 unsigned HOST_WIDE_INT reg_size;
2025
2026 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2027 128-bit floating point that can go in vector registers, which has VSX
2028 memory addressing. */
2029 if (FP_REGNO_P (regno))
2030 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2031 ? UNITS_PER_VSX_WORD
2032 : UNITS_PER_FP_WORD);
2033
2034 else if (ALTIVEC_REGNO_P (regno))
2035 reg_size = UNITS_PER_ALTIVEC_WORD;
2036
2037 else
2038 reg_size = UNITS_PER_WORD;
2039
2040 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2041 }
2042
2043 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2044 MODE. */
2045 static int
2046 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
2047 {
2048 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2049
2050 if (COMPLEX_MODE_P (mode))
2051 mode = GET_MODE_INNER (mode);
2052
2053 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2054 register combinations, and use PTImode where we need to deal with quad
2055 word memory operations. Don't allow quad words in the argument or frame
2056 pointer registers, just registers 0..31. */
2057 if (mode == PTImode)
2058 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2059 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2060 && ((regno & 1) == 0));
2061
2062 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2063 implementations. Don't allow an item to be split between a FP register
2064 and an Altivec register. Allow TImode in all VSX registers if the user
2065 asked for it. */
2066 if (TARGET_VSX && VSX_REGNO_P (regno)
2067 && (VECTOR_MEM_VSX_P (mode)
2068 || FLOAT128_VECTOR_P (mode)
2069 || reg_addr[mode].scalar_in_vmx_p
2070 || (TARGET_VSX_TIMODE && mode == TImode)
2071 || (TARGET_VADDUQM && mode == V1TImode)))
2072 {
2073 if (FP_REGNO_P (regno))
2074 return FP_REGNO_P (last_regno);
2075
2076 if (ALTIVEC_REGNO_P (regno))
2077 {
2078 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2079 return 0;
2080
2081 return ALTIVEC_REGNO_P (last_regno);
2082 }
2083 }
2084
2085 /* The GPRs can hold any mode, but values bigger than one register
2086 cannot go past R31. */
2087 if (INT_REGNO_P (regno))
2088 return INT_REGNO_P (last_regno);
2089
2090 /* The float registers (except for VSX vector modes) can only hold floating
2091 modes and DImode. */
2092 if (FP_REGNO_P (regno))
2093 {
2094 if (FLOAT128_VECTOR_P (mode))
2095 return false;
2096
2097 if (SCALAR_FLOAT_MODE_P (mode)
2098 && (mode != TDmode || (regno % 2) == 0)
2099 && FP_REGNO_P (last_regno))
2100 return 1;
2101
2102 if (GET_MODE_CLASS (mode) == MODE_INT)
2103 {
2104 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2105 return 1;
2106
2107 if (TARGET_VSX_SMALL_INTEGER)
2108 {
2109 if (mode == SImode)
2110 return 1;
2111
2112 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
2113 return 1;
2114 }
2115 }
2116
2117 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2118 && PAIRED_VECTOR_MODE (mode))
2119 return 1;
2120
2121 return 0;
2122 }
2123
2124 /* The CR register can only hold CC modes. */
2125 if (CR_REGNO_P (regno))
2126 return GET_MODE_CLASS (mode) == MODE_CC;
2127
2128 if (CA_REGNO_P (regno))
2129 return mode == Pmode || mode == SImode;
2130
2131 /* AltiVec only in AldyVec registers. */
2132 if (ALTIVEC_REGNO_P (regno))
2133 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2134 || mode == V1TImode);
2135
2136 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2137 and it must be able to fit within the register set. */
2138
2139 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2140 }
2141
2142 /* Print interesting facts about registers. */
2143 static void
2144 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2145 {
2146 int r, m;
2147
2148 for (r = first_regno; r <= last_regno; ++r)
2149 {
2150 const char *comma = "";
2151 int len;
2152
2153 if (first_regno == last_regno)
2154 fprintf (stderr, "%s:\t", reg_name);
2155 else
2156 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2157
2158 len = 8;
2159 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2160 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2161 {
2162 if (len > 70)
2163 {
2164 fprintf (stderr, ",\n\t");
2165 len = 8;
2166 comma = "";
2167 }
2168
2169 if (rs6000_hard_regno_nregs[m][r] > 1)
2170 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2171 rs6000_hard_regno_nregs[m][r]);
2172 else
2173 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2174
2175 comma = ", ";
2176 }
2177
2178 if (call_used_regs[r])
2179 {
2180 if (len > 70)
2181 {
2182 fprintf (stderr, ",\n\t");
2183 len = 8;
2184 comma = "";
2185 }
2186
2187 len += fprintf (stderr, "%s%s", comma, "call-used");
2188 comma = ", ";
2189 }
2190
2191 if (fixed_regs[r])
2192 {
2193 if (len > 70)
2194 {
2195 fprintf (stderr, ",\n\t");
2196 len = 8;
2197 comma = "";
2198 }
2199
2200 len += fprintf (stderr, "%s%s", comma, "fixed");
2201 comma = ", ";
2202 }
2203
2204 if (len > 70)
2205 {
2206 fprintf (stderr, ",\n\t");
2207 comma = "";
2208 }
2209
2210 len += fprintf (stderr, "%sreg-class = %s", comma,
2211 reg_class_names[(int)rs6000_regno_regclass[r]]);
2212 comma = ", ";
2213
2214 if (len > 70)
2215 {
2216 fprintf (stderr, ",\n\t");
2217 comma = "";
2218 }
2219
2220 fprintf (stderr, "%sregno = %d\n", comma, r);
2221 }
2222 }
2223
2224 static const char *
2225 rs6000_debug_vector_unit (enum rs6000_vector v)
2226 {
2227 const char *ret;
2228
2229 switch (v)
2230 {
2231 case VECTOR_NONE: ret = "none"; break;
2232 case VECTOR_ALTIVEC: ret = "altivec"; break;
2233 case VECTOR_VSX: ret = "vsx"; break;
2234 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2235 case VECTOR_PAIRED: ret = "paired"; break;
2236 case VECTOR_OTHER: ret = "other"; break;
2237 default: ret = "unknown"; break;
2238 }
2239
2240 return ret;
2241 }
2242
2243 /* Inner function printing just the address mask for a particular reload
2244 register class. */
2245 DEBUG_FUNCTION char *
2246 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2247 {
2248 static char ret[8];
2249 char *p = ret;
2250
2251 if ((mask & RELOAD_REG_VALID) != 0)
2252 *p++ = 'v';
2253 else if (keep_spaces)
2254 *p++ = ' ';
2255
2256 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2257 *p++ = 'm';
2258 else if (keep_spaces)
2259 *p++ = ' ';
2260
2261 if ((mask & RELOAD_REG_INDEXED) != 0)
2262 *p++ = 'i';
2263 else if (keep_spaces)
2264 *p++ = ' ';
2265
2266 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2267 *p++ = 'O';
2268 else if ((mask & RELOAD_REG_OFFSET) != 0)
2269 *p++ = 'o';
2270 else if (keep_spaces)
2271 *p++ = ' ';
2272
2273 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2274 *p++ = '+';
2275 else if (keep_spaces)
2276 *p++ = ' ';
2277
2278 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2279 *p++ = '+';
2280 else if (keep_spaces)
2281 *p++ = ' ';
2282
2283 if ((mask & RELOAD_REG_AND_M16) != 0)
2284 *p++ = '&';
2285 else if (keep_spaces)
2286 *p++ = ' ';
2287
2288 *p = '\0';
2289
2290 return ret;
2291 }
2292
2293 /* Print the address masks in a human readble fashion. */
2294 DEBUG_FUNCTION void
2295 rs6000_debug_print_mode (ssize_t m)
2296 {
2297 ssize_t rc;
2298 int spaces = 0;
2299 bool fuse_extra_p;
2300
2301 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2302 for (rc = 0; rc < N_RELOAD_REG; rc++)
2303 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2304 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2305
2306 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2307 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2308 fprintf (stderr, " Reload=%c%c",
2309 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2310 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2311 else
2312 spaces += sizeof (" Reload=sl") - 1;
2313
2314 if (reg_addr[m].scalar_in_vmx_p)
2315 {
2316 fprintf (stderr, "%*s Upper=y", spaces, "");
2317 spaces = 0;
2318 }
2319 else
2320 spaces += sizeof (" Upper=y") - 1;
2321
2322 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2323 || reg_addr[m].fused_toc);
2324 if (!fuse_extra_p)
2325 {
2326 for (rc = 0; rc < N_RELOAD_REG; rc++)
2327 {
2328 if (rc != RELOAD_REG_ANY)
2329 {
2330 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2331 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2332 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2333 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2334 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2335 {
2336 fuse_extra_p = true;
2337 break;
2338 }
2339 }
2340 }
2341 }
2342
2343 if (fuse_extra_p)
2344 {
2345 fprintf (stderr, "%*s Fuse:", spaces, "");
2346 spaces = 0;
2347
2348 for (rc = 0; rc < N_RELOAD_REG; rc++)
2349 {
2350 if (rc != RELOAD_REG_ANY)
2351 {
2352 char load, store;
2353
2354 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2355 load = 'l';
2356 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2357 load = 'L';
2358 else
2359 load = '-';
2360
2361 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2362 store = 's';
2363 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2364 store = 'S';
2365 else
2366 store = '-';
2367
2368 if (load == '-' && store == '-')
2369 spaces += 5;
2370 else
2371 {
2372 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2373 reload_reg_map[rc].name[0], load, store);
2374 spaces = 0;
2375 }
2376 }
2377 }
2378
2379 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2380 {
2381 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2382 spaces = 0;
2383 }
2384 else
2385 spaces += sizeof (" P8gpr") - 1;
2386
2387 if (reg_addr[m].fused_toc)
2388 {
2389 fprintf (stderr, "%*sToc", (spaces + 1), "");
2390 spaces = 0;
2391 }
2392 else
2393 spaces += sizeof (" Toc") - 1;
2394 }
2395 else
2396 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2397
2398 if (rs6000_vector_unit[m] != VECTOR_NONE
2399 || rs6000_vector_mem[m] != VECTOR_NONE)
2400 {
2401 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2402 spaces, "",
2403 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2404 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2405 }
2406
2407 fputs ("\n", stderr);
2408 }
2409
2410 #define DEBUG_FMT_ID "%-32s= "
2411 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2412 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2413 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2414
2415 /* Print various interesting information with -mdebug=reg. */
2416 static void
2417 rs6000_debug_reg_global (void)
2418 {
2419 static const char *const tf[2] = { "false", "true" };
2420 const char *nl = (const char *)0;
2421 int m;
2422 size_t m1, m2, v;
2423 char costly_num[20];
2424 char nop_num[20];
2425 char flags_buffer[40];
2426 const char *costly_str;
2427 const char *nop_str;
2428 const char *trace_str;
2429 const char *abi_str;
2430 const char *cmodel_str;
2431 struct cl_target_option cl_opts;
2432
2433 /* Modes we want tieable information on. */
2434 static const machine_mode print_tieable_modes[] = {
2435 QImode,
2436 HImode,
2437 SImode,
2438 DImode,
2439 TImode,
2440 PTImode,
2441 SFmode,
2442 DFmode,
2443 TFmode,
2444 IFmode,
2445 KFmode,
2446 SDmode,
2447 DDmode,
2448 TDmode,
2449 V2SImode,
2450 V16QImode,
2451 V8HImode,
2452 V4SImode,
2453 V2DImode,
2454 V1TImode,
2455 V32QImode,
2456 V16HImode,
2457 V8SImode,
2458 V4DImode,
2459 V2TImode,
2460 V2SFmode,
2461 V4SFmode,
2462 V2DFmode,
2463 V8SFmode,
2464 V4DFmode,
2465 CCmode,
2466 CCUNSmode,
2467 CCEQmode,
2468 };
2469
2470 /* Virtual regs we are interested in. */
2471 const static struct {
2472 int regno; /* register number. */
2473 const char *name; /* register name. */
2474 } virtual_regs[] = {
2475 { STACK_POINTER_REGNUM, "stack pointer:" },
2476 { TOC_REGNUM, "toc: " },
2477 { STATIC_CHAIN_REGNUM, "static chain: " },
2478 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2479 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2480 { ARG_POINTER_REGNUM, "arg pointer: " },
2481 { FRAME_POINTER_REGNUM, "frame pointer:" },
2482 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2483 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2484 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2485 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2486 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2487 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2488 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2489 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2490 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2491 };
2492
2493 fputs ("\nHard register information:\n", stderr);
2494 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2495 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2496 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2497 LAST_ALTIVEC_REGNO,
2498 "vs");
2499 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2500 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2501 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2502 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2503 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2504 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2505
2506 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2507 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2508 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2509
2510 fprintf (stderr,
2511 "\n"
2512 "d reg_class = %s\n"
2513 "f reg_class = %s\n"
2514 "v reg_class = %s\n"
2515 "wa reg_class = %s\n"
2516 "wb reg_class = %s\n"
2517 "wd reg_class = %s\n"
2518 "we reg_class = %s\n"
2519 "wf reg_class = %s\n"
2520 "wg reg_class = %s\n"
2521 "wh reg_class = %s\n"
2522 "wi reg_class = %s\n"
2523 "wj reg_class = %s\n"
2524 "wk reg_class = %s\n"
2525 "wl reg_class = %s\n"
2526 "wm reg_class = %s\n"
2527 "wo reg_class = %s\n"
2528 "wp reg_class = %s\n"
2529 "wq reg_class = %s\n"
2530 "wr reg_class = %s\n"
2531 "ws reg_class = %s\n"
2532 "wt reg_class = %s\n"
2533 "wu reg_class = %s\n"
2534 "wv reg_class = %s\n"
2535 "ww reg_class = %s\n"
2536 "wx reg_class = %s\n"
2537 "wy reg_class = %s\n"
2538 "wz reg_class = %s\n"
2539 "wA reg_class = %s\n"
2540 "wH reg_class = %s\n"
2541 "wI reg_class = %s\n"
2542 "wJ reg_class = %s\n"
2543 "wK reg_class = %s\n"
2544 "\n",
2545 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2546 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2547 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2548 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2549 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2550 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2551 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2552 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2553 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2554 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2555 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2556 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2557 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2558 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2559 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2560 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2561 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2562 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2563 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2564 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2565 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2566 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2567 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2568 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2569 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2570 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2571 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2572 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2573 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2574 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2575 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2576 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2577
2578 nl = "\n";
2579 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2580 rs6000_debug_print_mode (m);
2581
2582 fputs ("\n", stderr);
2583
2584 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2585 {
2586 machine_mode mode1 = print_tieable_modes[m1];
2587 bool first_time = true;
2588
2589 nl = (const char *)0;
2590 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2591 {
2592 machine_mode mode2 = print_tieable_modes[m2];
2593 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2594 {
2595 if (first_time)
2596 {
2597 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2598 nl = "\n";
2599 first_time = false;
2600 }
2601
2602 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2603 }
2604 }
2605
2606 if (!first_time)
2607 fputs ("\n", stderr);
2608 }
2609
2610 if (nl)
2611 fputs (nl, stderr);
2612
2613 if (rs6000_recip_control)
2614 {
2615 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2616
2617 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2618 if (rs6000_recip_bits[m])
2619 {
2620 fprintf (stderr,
2621 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2622 GET_MODE_NAME (m),
2623 (RS6000_RECIP_AUTO_RE_P (m)
2624 ? "auto"
2625 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2626 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2627 ? "auto"
2628 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2629 }
2630
2631 fputs ("\n", stderr);
2632 }
2633
2634 if (rs6000_cpu_index >= 0)
2635 {
2636 const char *name = processor_target_table[rs6000_cpu_index].name;
2637 HOST_WIDE_INT flags
2638 = processor_target_table[rs6000_cpu_index].target_enable;
2639
2640 sprintf (flags_buffer, "-mcpu=%s flags", name);
2641 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2642 }
2643 else
2644 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2645
2646 if (rs6000_tune_index >= 0)
2647 {
2648 const char *name = processor_target_table[rs6000_tune_index].name;
2649 HOST_WIDE_INT flags
2650 = processor_target_table[rs6000_tune_index].target_enable;
2651
2652 sprintf (flags_buffer, "-mtune=%s flags", name);
2653 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2654 }
2655 else
2656 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2657
2658 cl_target_option_save (&cl_opts, &global_options);
2659 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2660 rs6000_isa_flags);
2661
2662 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2663 rs6000_isa_flags_explicit);
2664
2665 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2666 rs6000_builtin_mask);
2667
2668 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2669
2670 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2671 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2672
2673 switch (rs6000_sched_costly_dep)
2674 {
2675 case max_dep_latency:
2676 costly_str = "max_dep_latency";
2677 break;
2678
2679 case no_dep_costly:
2680 costly_str = "no_dep_costly";
2681 break;
2682
2683 case all_deps_costly:
2684 costly_str = "all_deps_costly";
2685 break;
2686
2687 case true_store_to_load_dep_costly:
2688 costly_str = "true_store_to_load_dep_costly";
2689 break;
2690
2691 case store_to_load_dep_costly:
2692 costly_str = "store_to_load_dep_costly";
2693 break;
2694
2695 default:
2696 costly_str = costly_num;
2697 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2698 break;
2699 }
2700
2701 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2702
2703 switch (rs6000_sched_insert_nops)
2704 {
2705 case sched_finish_regroup_exact:
2706 nop_str = "sched_finish_regroup_exact";
2707 break;
2708
2709 case sched_finish_pad_groups:
2710 nop_str = "sched_finish_pad_groups";
2711 break;
2712
2713 case sched_finish_none:
2714 nop_str = "sched_finish_none";
2715 break;
2716
2717 default:
2718 nop_str = nop_num;
2719 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2720 break;
2721 }
2722
2723 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2724
2725 switch (rs6000_sdata)
2726 {
2727 default:
2728 case SDATA_NONE:
2729 break;
2730
2731 case SDATA_DATA:
2732 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2733 break;
2734
2735 case SDATA_SYSV:
2736 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2737 break;
2738
2739 case SDATA_EABI:
2740 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2741 break;
2742
2743 }
2744
2745 switch (rs6000_traceback)
2746 {
2747 case traceback_default: trace_str = "default"; break;
2748 case traceback_none: trace_str = "none"; break;
2749 case traceback_part: trace_str = "part"; break;
2750 case traceback_full: trace_str = "full"; break;
2751 default: trace_str = "unknown"; break;
2752 }
2753
2754 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2755
2756 switch (rs6000_current_cmodel)
2757 {
2758 case CMODEL_SMALL: cmodel_str = "small"; break;
2759 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2760 case CMODEL_LARGE: cmodel_str = "large"; break;
2761 default: cmodel_str = "unknown"; break;
2762 }
2763
2764 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2765
2766 switch (rs6000_current_abi)
2767 {
2768 case ABI_NONE: abi_str = "none"; break;
2769 case ABI_AIX: abi_str = "aix"; break;
2770 case ABI_ELFv2: abi_str = "ELFv2"; break;
2771 case ABI_V4: abi_str = "V4"; break;
2772 case ABI_DARWIN: abi_str = "darwin"; break;
2773 default: abi_str = "unknown"; break;
2774 }
2775
2776 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2777
2778 if (rs6000_altivec_abi)
2779 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2780
2781 if (rs6000_darwin64_abi)
2782 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2783
2784 fprintf (stderr, DEBUG_FMT_S, "single_float",
2785 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2786
2787 fprintf (stderr, DEBUG_FMT_S, "double_float",
2788 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2789
2790 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2791 (TARGET_SOFT_FLOAT ? "true" : "false"));
2792
2793 if (TARGET_LINK_STACK)
2794 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2795
2796 fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
2797
2798 if (TARGET_P8_FUSION)
2799 {
2800 char options[80];
2801
2802 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2803 if (TARGET_TOC_FUSION)
2804 strcat (options, ", toc");
2805
2806 if (TARGET_P8_FUSION_SIGN)
2807 strcat (options, ", sign");
2808
2809 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2810 }
2811
2812 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2813 TARGET_SECURE_PLT ? "secure" : "bss");
2814 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2815 aix_struct_return ? "aix" : "sysv");
2816 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2817 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2818 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2819 tf[!!rs6000_align_branch_targets]);
2820 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2821 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2822 rs6000_long_double_type_size);
2823 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2824 (int)rs6000_sched_restricted_insns_priority);
2825 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2826 (int)END_BUILTINS);
2827 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2828 (int)RS6000_BUILTIN_COUNT);
2829
2830 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2831 (int)TARGET_FLOAT128_ENABLE_TYPE);
2832
2833 if (TARGET_VSX)
2834 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2835 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2836
2837 if (TARGET_DIRECT_MOVE_128)
2838 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2839 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2840 }
2841
2842 \f
2843 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2844 legitimate address support to figure out the appropriate addressing to
2845 use. */
2846
2847 static void
2848 rs6000_setup_reg_addr_masks (void)
2849 {
2850 ssize_t rc, reg, m, nregs;
2851 addr_mask_type any_addr_mask, addr_mask;
2852
2853 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2854 {
2855 machine_mode m2 = (machine_mode) m;
2856 bool complex_p = false;
2857 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2858 size_t msize;
2859
2860 if (COMPLEX_MODE_P (m2))
2861 {
2862 complex_p = true;
2863 m2 = GET_MODE_INNER (m2);
2864 }
2865
2866 msize = GET_MODE_SIZE (m2);
2867
2868 /* SDmode is special in that we want to access it only via REG+REG
2869 addressing on power7 and above, since we want to use the LFIWZX and
2870 STFIWZX instructions to load it. */
2871 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2872
2873 any_addr_mask = 0;
2874 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2875 {
2876 addr_mask = 0;
2877 reg = reload_reg_map[rc].reg;
2878
2879 /* Can mode values go in the GPR/FPR/Altivec registers? */
2880 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2881 {
2882 bool small_int_vsx_p = (small_int_p
2883 && (rc == RELOAD_REG_FPR
2884 || rc == RELOAD_REG_VMX));
2885
2886 nregs = rs6000_hard_regno_nregs[m][reg];
2887 addr_mask |= RELOAD_REG_VALID;
2888
2889 /* Indicate if the mode takes more than 1 physical register. If
2890 it takes a single register, indicate it can do REG+REG
2891 addressing. Small integers in VSX registers can only do
2892 REG+REG addressing. */
2893 if (small_int_vsx_p)
2894 addr_mask |= RELOAD_REG_INDEXED;
2895 else if (nregs > 1 || m == BLKmode || complex_p)
2896 addr_mask |= RELOAD_REG_MULTIPLE;
2897 else
2898 addr_mask |= RELOAD_REG_INDEXED;
2899
2900 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2901 addressing. If we allow scalars into Altivec registers,
2902 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2903
2904 if (TARGET_UPDATE
2905 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2906 && msize <= 8
2907 && !VECTOR_MODE_P (m2)
2908 && !FLOAT128_VECTOR_P (m2)
2909 && !complex_p
2910 && !small_int_vsx_p
2911 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2912 && (m2 != SFmode || !TARGET_UPPER_REGS_SF))
2913 {
2914 addr_mask |= RELOAD_REG_PRE_INCDEC;
2915
2916 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2917 we don't allow PRE_MODIFY for some multi-register
2918 operations. */
2919 switch (m)
2920 {
2921 default:
2922 addr_mask |= RELOAD_REG_PRE_MODIFY;
2923 break;
2924
2925 case DImode:
2926 if (TARGET_POWERPC64)
2927 addr_mask |= RELOAD_REG_PRE_MODIFY;
2928 break;
2929
2930 case DFmode:
2931 case DDmode:
2932 if (TARGET_DF_INSN)
2933 addr_mask |= RELOAD_REG_PRE_MODIFY;
2934 break;
2935 }
2936 }
2937 }
2938
2939 /* GPR and FPR registers can do REG+OFFSET addressing, except
2940 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2941 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2942 if ((addr_mask != 0) && !indexed_only_p
2943 && msize <= 8
2944 && (rc == RELOAD_REG_GPR
2945 || ((msize == 8 || m2 == SFmode)
2946 && (rc == RELOAD_REG_FPR
2947 || (rc == RELOAD_REG_VMX
2948 && TARGET_P9_DFORM_SCALAR)))))
2949 addr_mask |= RELOAD_REG_OFFSET;
2950
2951 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2952 instructions are enabled. The offset for 128-bit VSX registers is
2953 only 12-bits. While GPRs can handle the full offset range, VSX
2954 registers can only handle the restricted range. */
2955 else if ((addr_mask != 0) && !indexed_only_p
2956 && msize == 16 && TARGET_P9_DFORM_VECTOR
2957 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2958 || (m2 == TImode && TARGET_VSX_TIMODE)))
2959 {
2960 addr_mask |= RELOAD_REG_OFFSET;
2961 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2962 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2963 }
2964
2965 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2966 addressing on 128-bit types. */
2967 if (rc == RELOAD_REG_VMX && msize == 16
2968 && (addr_mask & RELOAD_REG_VALID) != 0)
2969 addr_mask |= RELOAD_REG_AND_M16;
2970
2971 reg_addr[m].addr_mask[rc] = addr_mask;
2972 any_addr_mask |= addr_mask;
2973 }
2974
2975 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2976 }
2977 }
2978
2979 \f
2980 /* Initialize the various global tables that are based on register size. */
2981 static void
2982 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2983 {
2984 ssize_t r, m, c;
2985 int align64;
2986 int align32;
2987
2988 /* Precalculate REGNO_REG_CLASS. */
2989 rs6000_regno_regclass[0] = GENERAL_REGS;
2990 for (r = 1; r < 32; ++r)
2991 rs6000_regno_regclass[r] = BASE_REGS;
2992
2993 for (r = 32; r < 64; ++r)
2994 rs6000_regno_regclass[r] = FLOAT_REGS;
2995
2996 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2997 rs6000_regno_regclass[r] = NO_REGS;
2998
2999 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3000 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3001
3002 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3003 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3004 rs6000_regno_regclass[r] = CR_REGS;
3005
3006 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3007 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3008 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3009 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3010 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3011 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3012 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3013 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3014 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3015 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3016
3017 /* Precalculate register class to simpler reload register class. We don't
3018 need all of the register classes that are combinations of different
3019 classes, just the simple ones that have constraint letters. */
3020 for (c = 0; c < N_REG_CLASSES; c++)
3021 reg_class_to_reg_type[c] = NO_REG_TYPE;
3022
3023 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3024 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3025 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3026 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3027 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3028 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3029 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3030 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3031 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3032 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3033
3034 if (TARGET_VSX)
3035 {
3036 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3037 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3038 }
3039 else
3040 {
3041 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3042 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3043 }
3044
3045 /* Precalculate the valid memory formats as well as the vector information,
3046 this must be set up before the rs6000_hard_regno_nregs_internal calls
3047 below. */
3048 gcc_assert ((int)VECTOR_NONE == 0);
3049 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3050 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3051
3052 gcc_assert ((int)CODE_FOR_nothing == 0);
3053 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3054
3055 gcc_assert ((int)NO_REGS == 0);
3056 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3057
3058 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3059 believes it can use native alignment or still uses 128-bit alignment. */
3060 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3061 {
3062 align64 = 64;
3063 align32 = 32;
3064 }
3065 else
3066 {
3067 align64 = 128;
3068 align32 = 128;
3069 }
3070
3071 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3072 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3073 if (TARGET_FLOAT128_TYPE)
3074 {
3075 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3076 rs6000_vector_align[KFmode] = 128;
3077
3078 if (FLOAT128_IEEE_P (TFmode))
3079 {
3080 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3081 rs6000_vector_align[TFmode] = 128;
3082 }
3083 }
3084
3085 /* V2DF mode, VSX only. */
3086 if (TARGET_VSX)
3087 {
3088 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3089 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3090 rs6000_vector_align[V2DFmode] = align64;
3091 }
3092
3093 /* V4SF mode, either VSX or Altivec. */
3094 if (TARGET_VSX)
3095 {
3096 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3097 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3098 rs6000_vector_align[V4SFmode] = align32;
3099 }
3100 else if (TARGET_ALTIVEC)
3101 {
3102 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3103 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3104 rs6000_vector_align[V4SFmode] = align32;
3105 }
3106
3107 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3108 and stores. */
3109 if (TARGET_ALTIVEC)
3110 {
3111 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3112 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3113 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3114 rs6000_vector_align[V4SImode] = align32;
3115 rs6000_vector_align[V8HImode] = align32;
3116 rs6000_vector_align[V16QImode] = align32;
3117
3118 if (TARGET_VSX)
3119 {
3120 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3121 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3122 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3123 }
3124 else
3125 {
3126 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3127 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3128 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3129 }
3130 }
3131
3132 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3133 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3134 if (TARGET_VSX)
3135 {
3136 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3137 rs6000_vector_unit[V2DImode]
3138 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3139 rs6000_vector_align[V2DImode] = align64;
3140
3141 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3142 rs6000_vector_unit[V1TImode]
3143 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3144 rs6000_vector_align[V1TImode] = 128;
3145 }
3146
3147 /* DFmode, see if we want to use the VSX unit. Memory is handled
3148 differently, so don't set rs6000_vector_mem. */
3149 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
3150 {
3151 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3152 rs6000_vector_align[DFmode] = 64;
3153 }
3154
3155 /* SFmode, see if we want to use the VSX unit. */
3156 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
3157 {
3158 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3159 rs6000_vector_align[SFmode] = 32;
3160 }
3161
3162 /* Allow TImode in VSX register and set the VSX memory macros. */
3163 if (TARGET_VSX && TARGET_VSX_TIMODE)
3164 {
3165 rs6000_vector_mem[TImode] = VECTOR_VSX;
3166 rs6000_vector_align[TImode] = align64;
3167 }
3168
3169 /* TODO add paired floating point vector support. */
3170
3171 /* Register class constraints for the constraints that depend on compile
3172 switches. When the VSX code was added, different constraints were added
3173 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3174 of the VSX registers are used. The register classes for scalar floating
3175 point types is set, based on whether we allow that type into the upper
3176 (Altivec) registers. GCC has register classes to target the Altivec
3177 registers for load/store operations, to select using a VSX memory
3178 operation instead of the traditional floating point operation. The
3179 constraints are:
3180
3181 d - Register class to use with traditional DFmode instructions.
3182 f - Register class to use with traditional SFmode instructions.
3183 v - Altivec register.
3184 wa - Any VSX register.
3185 wc - Reserved to represent individual CR bits (used in LLVM).
3186 wd - Preferred register class for V2DFmode.
3187 wf - Preferred register class for V4SFmode.
3188 wg - Float register for power6x move insns.
3189 wh - FP register for direct move instructions.
3190 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3191 wj - FP or VSX register to hold 64-bit integers for direct moves.
3192 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3193 wl - Float register if we can do 32-bit signed int loads.
3194 wm - VSX register for ISA 2.07 direct move operations.
3195 wn - always NO_REGS.
3196 wr - GPR if 64-bit mode is permitted.
3197 ws - Register class to do ISA 2.06 DF operations.
3198 wt - VSX register for TImode in VSX registers.
3199 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3200 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3201 ww - Register class to do SF conversions in with VSX operations.
3202 wx - Float register if we can do 32-bit int stores.
3203 wy - Register class to do ISA 2.07 SF operations.
3204 wz - Float register if we can do 32-bit unsigned int loads.
3205 wH - Altivec register if SImode is allowed in VSX registers.
3206 wI - VSX register if SImode is allowed in VSX registers.
3207 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3208 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3209
3210 if (TARGET_HARD_FLOAT)
3211 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3212
3213 if (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
3214 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3215
3216 if (TARGET_VSX)
3217 {
3218 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3219 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3220 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3221
3222 if (TARGET_VSX_TIMODE)
3223 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3224
3225 if (TARGET_UPPER_REGS_DF) /* DFmode */
3226 {
3227 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
3228 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
3229 }
3230 else
3231 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
3232
3233 if (TARGET_UPPER_REGS_DI) /* DImode */
3234 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
3235 else
3236 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
3237 }
3238
3239 /* Add conditional constraints based on various options, to allow us to
3240 collapse multiple insn patterns. */
3241 if (TARGET_ALTIVEC)
3242 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3243
3244 if (TARGET_MFPGPR) /* DFmode */
3245 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3246
3247 if (TARGET_LFIWAX)
3248 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3249
3250 if (TARGET_DIRECT_MOVE)
3251 {
3252 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3253 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3254 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3255 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3256 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3257 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3258 }
3259
3260 if (TARGET_POWERPC64)
3261 {
3262 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3263 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3264 }
3265
3266 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
3267 {
3268 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3269 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3270 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3271 }
3272 else if (TARGET_P8_VECTOR)
3273 {
3274 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3275 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3276 }
3277 else if (TARGET_VSX)
3278 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3279
3280 if (TARGET_STFIWX)
3281 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3282
3283 if (TARGET_LFIWZX)
3284 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3285
3286 if (TARGET_FLOAT128_TYPE)
3287 {
3288 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3289 if (FLOAT128_IEEE_P (TFmode))
3290 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3291 }
3292
3293 /* Support for new D-form instructions. */
3294 if (TARGET_P9_DFORM_SCALAR)
3295 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3296
3297 /* Support for ISA 3.0 (power9) vectors. */
3298 if (TARGET_P9_VECTOR)
3299 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3300
3301 /* Support for new direct moves (ISA 3.0 + 64bit). */
3302 if (TARGET_DIRECT_MOVE_128)
3303 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3304
3305 /* Support small integers in VSX registers. */
3306 if (TARGET_VSX_SMALL_INTEGER)
3307 {
3308 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3309 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3310 if (TARGET_P9_VECTOR)
3311 {
3312 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3313 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3314 }
3315 }
3316
3317 /* Set up the reload helper and direct move functions. */
3318 if (TARGET_VSX || TARGET_ALTIVEC)
3319 {
3320 if (TARGET_64BIT)
3321 {
3322 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3323 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3324 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3325 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3326 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3327 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3328 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3329 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3330 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3331 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3332 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3333 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3334 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3335 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3336 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3337 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3338 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3339 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3340 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3341 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3342
3343 if (FLOAT128_VECTOR_P (KFmode))
3344 {
3345 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3346 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3347 }
3348
3349 if (FLOAT128_VECTOR_P (TFmode))
3350 {
3351 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3352 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3353 }
3354
3355 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3356 available. */
3357 if (TARGET_NO_SDMODE_STACK)
3358 {
3359 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3360 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3361 }
3362
3363 if (TARGET_VSX_TIMODE)
3364 {
3365 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3366 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3367 }
3368
3369 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3370 {
3371 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3372 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3373 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3374 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3375 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3376 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3377 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3378 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3379 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3380
3381 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3382 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3383 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3384 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3385 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3386 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3387 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3388 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3389 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3390
3391 if (FLOAT128_VECTOR_P (KFmode))
3392 {
3393 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3394 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3395 }
3396
3397 if (FLOAT128_VECTOR_P (TFmode))
3398 {
3399 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3400 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3401 }
3402 }
3403 }
3404 else
3405 {
3406 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3407 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3408 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3409 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3410 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3411 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3412 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3413 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3414 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3415 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3416 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3417 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3418 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3419 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3420 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3421 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3422 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3423 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3424 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3425 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3426
3427 if (FLOAT128_VECTOR_P (KFmode))
3428 {
3429 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3430 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3431 }
3432
3433 if (FLOAT128_IEEE_P (TFmode))
3434 {
3435 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3436 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3437 }
3438
3439 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3440 available. */
3441 if (TARGET_NO_SDMODE_STACK)
3442 {
3443 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3444 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3445 }
3446
3447 if (TARGET_VSX_TIMODE)
3448 {
3449 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3450 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3451 }
3452
3453 if (TARGET_DIRECT_MOVE)
3454 {
3455 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3456 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3457 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3458 }
3459 }
3460
3461 if (TARGET_UPPER_REGS_DF)
3462 reg_addr[DFmode].scalar_in_vmx_p = true;
3463
3464 if (TARGET_UPPER_REGS_DI)
3465 reg_addr[DImode].scalar_in_vmx_p = true;
3466
3467 if (TARGET_UPPER_REGS_SF)
3468 reg_addr[SFmode].scalar_in_vmx_p = true;
3469
3470 if (TARGET_VSX_SMALL_INTEGER)
3471 {
3472 reg_addr[SImode].scalar_in_vmx_p = true;
3473 if (TARGET_P9_VECTOR)
3474 {
3475 reg_addr[HImode].scalar_in_vmx_p = true;
3476 reg_addr[QImode].scalar_in_vmx_p = true;
3477 }
3478 }
3479 }
3480
3481 /* Setup the fusion operations. */
3482 if (TARGET_P8_FUSION)
3483 {
3484 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3485 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3486 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3487 if (TARGET_64BIT)
3488 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3489 }
3490
3491 if (TARGET_P9_FUSION)
3492 {
3493 struct fuse_insns {
3494 enum machine_mode mode; /* mode of the fused type. */
3495 enum machine_mode pmode; /* pointer mode. */
3496 enum rs6000_reload_reg_type rtype; /* register type. */
3497 enum insn_code load; /* load insn. */
3498 enum insn_code store; /* store insn. */
3499 };
3500
3501 static const struct fuse_insns addis_insns[] = {
3502 { SFmode, DImode, RELOAD_REG_FPR,
3503 CODE_FOR_fusion_vsx_di_sf_load,
3504 CODE_FOR_fusion_vsx_di_sf_store },
3505
3506 { SFmode, SImode, RELOAD_REG_FPR,
3507 CODE_FOR_fusion_vsx_si_sf_load,
3508 CODE_FOR_fusion_vsx_si_sf_store },
3509
3510 { DFmode, DImode, RELOAD_REG_FPR,
3511 CODE_FOR_fusion_vsx_di_df_load,
3512 CODE_FOR_fusion_vsx_di_df_store },
3513
3514 { DFmode, SImode, RELOAD_REG_FPR,
3515 CODE_FOR_fusion_vsx_si_df_load,
3516 CODE_FOR_fusion_vsx_si_df_store },
3517
3518 { DImode, DImode, RELOAD_REG_FPR,
3519 CODE_FOR_fusion_vsx_di_di_load,
3520 CODE_FOR_fusion_vsx_di_di_store },
3521
3522 { DImode, SImode, RELOAD_REG_FPR,
3523 CODE_FOR_fusion_vsx_si_di_load,
3524 CODE_FOR_fusion_vsx_si_di_store },
3525
3526 { QImode, DImode, RELOAD_REG_GPR,
3527 CODE_FOR_fusion_gpr_di_qi_load,
3528 CODE_FOR_fusion_gpr_di_qi_store },
3529
3530 { QImode, SImode, RELOAD_REG_GPR,
3531 CODE_FOR_fusion_gpr_si_qi_load,
3532 CODE_FOR_fusion_gpr_si_qi_store },
3533
3534 { HImode, DImode, RELOAD_REG_GPR,
3535 CODE_FOR_fusion_gpr_di_hi_load,
3536 CODE_FOR_fusion_gpr_di_hi_store },
3537
3538 { HImode, SImode, RELOAD_REG_GPR,
3539 CODE_FOR_fusion_gpr_si_hi_load,
3540 CODE_FOR_fusion_gpr_si_hi_store },
3541
3542 { SImode, DImode, RELOAD_REG_GPR,
3543 CODE_FOR_fusion_gpr_di_si_load,
3544 CODE_FOR_fusion_gpr_di_si_store },
3545
3546 { SImode, SImode, RELOAD_REG_GPR,
3547 CODE_FOR_fusion_gpr_si_si_load,
3548 CODE_FOR_fusion_gpr_si_si_store },
3549
3550 { SFmode, DImode, RELOAD_REG_GPR,
3551 CODE_FOR_fusion_gpr_di_sf_load,
3552 CODE_FOR_fusion_gpr_di_sf_store },
3553
3554 { SFmode, SImode, RELOAD_REG_GPR,
3555 CODE_FOR_fusion_gpr_si_sf_load,
3556 CODE_FOR_fusion_gpr_si_sf_store },
3557
3558 { DImode, DImode, RELOAD_REG_GPR,
3559 CODE_FOR_fusion_gpr_di_di_load,
3560 CODE_FOR_fusion_gpr_di_di_store },
3561
3562 { DFmode, DImode, RELOAD_REG_GPR,
3563 CODE_FOR_fusion_gpr_di_df_load,
3564 CODE_FOR_fusion_gpr_di_df_store },
3565 };
3566
3567 machine_mode cur_pmode = Pmode;
3568 size_t i;
3569
3570 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3571 {
3572 machine_mode xmode = addis_insns[i].mode;
3573 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3574
3575 if (addis_insns[i].pmode != cur_pmode)
3576 continue;
3577
3578 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3579 continue;
3580
3581 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3582 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3583
3584 if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
3585 {
3586 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3587 = addis_insns[i].load;
3588 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3589 = addis_insns[i].store;
3590 }
3591 }
3592 }
3593
3594 /* Note which types we support fusing TOC setup plus memory insn. We only do
3595 fused TOCs for medium/large code models. */
3596 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3597 && (TARGET_CMODEL != CMODEL_SMALL))
3598 {
3599 reg_addr[QImode].fused_toc = true;
3600 reg_addr[HImode].fused_toc = true;
3601 reg_addr[SImode].fused_toc = true;
3602 reg_addr[DImode].fused_toc = true;
3603 if (TARGET_HARD_FLOAT)
3604 {
3605 if (TARGET_SINGLE_FLOAT)
3606 reg_addr[SFmode].fused_toc = true;
3607 if (TARGET_DOUBLE_FLOAT)
3608 reg_addr[DFmode].fused_toc = true;
3609 }
3610 }
3611
3612 /* Precalculate HARD_REGNO_NREGS. */
3613 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3614 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3615 rs6000_hard_regno_nregs[m][r]
3616 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3617
3618 /* Precalculate HARD_REGNO_MODE_OK. */
3619 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3620 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3621 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3622 rs6000_hard_regno_mode_ok_p[m][r] = true;
3623
3624 /* Precalculate CLASS_MAX_NREGS sizes. */
3625 for (c = 0; c < LIM_REG_CLASSES; ++c)
3626 {
3627 int reg_size;
3628
3629 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3630 reg_size = UNITS_PER_VSX_WORD;
3631
3632 else if (c == ALTIVEC_REGS)
3633 reg_size = UNITS_PER_ALTIVEC_WORD;
3634
3635 else if (c == FLOAT_REGS)
3636 reg_size = UNITS_PER_FP_WORD;
3637
3638 else
3639 reg_size = UNITS_PER_WORD;
3640
3641 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3642 {
3643 machine_mode m2 = (machine_mode)m;
3644 int reg_size2 = reg_size;
3645
3646 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3647 in VSX. */
3648 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3649 reg_size2 = UNITS_PER_FP_WORD;
3650
3651 rs6000_class_max_nregs[m][c]
3652 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3653 }
3654 }
3655
3656 /* Calculate which modes to automatically generate code to use a the
3657 reciprocal divide and square root instructions. In the future, possibly
3658 automatically generate the instructions even if the user did not specify
3659 -mrecip. The older machines double precision reciprocal sqrt estimate is
3660 not accurate enough. */
3661 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3662 if (TARGET_FRES)
3663 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3664 if (TARGET_FRE)
3665 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3666 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3667 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3668 if (VECTOR_UNIT_VSX_P (V2DFmode))
3669 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3670
3671 if (TARGET_FRSQRTES)
3672 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3673 if (TARGET_FRSQRTE)
3674 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3675 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3676 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3677 if (VECTOR_UNIT_VSX_P (V2DFmode))
3678 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3679
3680 if (rs6000_recip_control)
3681 {
3682 if (!flag_finite_math_only)
3683 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3684 if (flag_trapping_math)
3685 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3686 if (!flag_reciprocal_math)
3687 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3688 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3689 {
3690 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3691 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3692 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3693
3694 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3695 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3696 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3697
3698 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3699 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3700 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3701
3702 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3703 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3704 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3705
3706 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3707 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3708 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3709
3710 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3711 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3712 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3713
3714 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3715 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3716 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3717
3718 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3719 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3720 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3721 }
3722 }
3723
3724 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3725 legitimate address support to figure out the appropriate addressing to
3726 use. */
3727 rs6000_setup_reg_addr_masks ();
3728
3729 if (global_init_p || TARGET_DEBUG_TARGET)
3730 {
3731 if (TARGET_DEBUG_REG)
3732 rs6000_debug_reg_global ();
3733
3734 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3735 fprintf (stderr,
3736 "SImode variable mult cost = %d\n"
3737 "SImode constant mult cost = %d\n"
3738 "SImode short constant mult cost = %d\n"
3739 "DImode multipliciation cost = %d\n"
3740 "SImode division cost = %d\n"
3741 "DImode division cost = %d\n"
3742 "Simple fp operation cost = %d\n"
3743 "DFmode multiplication cost = %d\n"
3744 "SFmode division cost = %d\n"
3745 "DFmode division cost = %d\n"
3746 "cache line size = %d\n"
3747 "l1 cache size = %d\n"
3748 "l2 cache size = %d\n"
3749 "simultaneous prefetches = %d\n"
3750 "\n",
3751 rs6000_cost->mulsi,
3752 rs6000_cost->mulsi_const,
3753 rs6000_cost->mulsi_const9,
3754 rs6000_cost->muldi,
3755 rs6000_cost->divsi,
3756 rs6000_cost->divdi,
3757 rs6000_cost->fp,
3758 rs6000_cost->dmul,
3759 rs6000_cost->sdiv,
3760 rs6000_cost->ddiv,
3761 rs6000_cost->cache_line_size,
3762 rs6000_cost->l1_cache_size,
3763 rs6000_cost->l2_cache_size,
3764 rs6000_cost->simultaneous_prefetches);
3765 }
3766 }
3767
3768 #if TARGET_MACHO
3769 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3770
3771 static void
3772 darwin_rs6000_override_options (void)
3773 {
3774 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3775 off. */
3776 rs6000_altivec_abi = 1;
3777 TARGET_ALTIVEC_VRSAVE = 1;
3778 rs6000_current_abi = ABI_DARWIN;
3779
3780 if (DEFAULT_ABI == ABI_DARWIN
3781 && TARGET_64BIT)
3782 darwin_one_byte_bool = 1;
3783
3784 if (TARGET_64BIT && ! TARGET_POWERPC64)
3785 {
3786 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3787 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3788 }
3789 if (flag_mkernel)
3790 {
3791 rs6000_default_long_calls = 1;
3792 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3793 }
3794
3795 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3796 Altivec. */
3797 if (!flag_mkernel && !flag_apple_kext
3798 && TARGET_64BIT
3799 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3800 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3801
3802 /* Unless the user (not the configurer) has explicitly overridden
3803 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3804 G4 unless targeting the kernel. */
3805 if (!flag_mkernel
3806 && !flag_apple_kext
3807 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3808 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3809 && ! global_options_set.x_rs6000_cpu_index)
3810 {
3811 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3812 }
3813 }
3814 #endif
3815
3816 /* If not otherwise specified by a target, make 'long double' equivalent to
3817 'double'. */
3818
3819 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3820 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3821 #endif
3822
3823 /* Return the builtin mask of the various options used that could affect which
3824 builtins were used. In the past we used target_flags, but we've run out of
3825 bits, and some options like PAIRED are no longer in target_flags. */
3826
3827 HOST_WIDE_INT
3828 rs6000_builtin_mask_calculate (void)
3829 {
3830 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3831 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3832 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3833 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3834 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3835 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3836 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3837 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3838 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3839 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3840 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3841 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3842 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3843 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3844 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3845 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3846 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3847 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3848 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3849 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3850 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0));
3851 }
3852
3853 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3854 to clobber the XER[CA] bit because clobbering that bit without telling
3855 the compiler worked just fine with versions of GCC before GCC 5, and
3856 breaking a lot of older code in ways that are hard to track down is
3857 not such a great idea. */
3858
3859 static rtx_insn *
3860 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3861 vec<const char *> &/*constraints*/,
3862 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3863 {
3864 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3865 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3866 return NULL;
3867 }
3868
3869 /* Override command line options.
3870
3871 Combine build-specific configuration information with options
3872 specified on the command line to set various state variables which
3873 influence code generation, optimization, and expansion of built-in
3874 functions. Assure that command-line configuration preferences are
3875 compatible with each other and with the build configuration; issue
3876 warnings while adjusting configuration or error messages while
3877 rejecting configuration.
3878
3879 Upon entry to this function:
3880
3881 This function is called once at the beginning of
3882 compilation, and then again at the start and end of compiling
3883 each section of code that has a different configuration, as
3884 indicated, for example, by adding the
3885
3886 __attribute__((__target__("cpu=power9")))
3887
3888 qualifier to a function definition or, for example, by bracketing
3889 code between
3890
3891 #pragma GCC target("altivec")
3892
3893 and
3894
3895 #pragma GCC reset_options
3896
3897 directives. Parameter global_init_p is true for the initial
3898 invocation, which initializes global variables, and false for all
3899 subsequent invocations.
3900
3901
3902 Various global state information is assumed to be valid. This
3903 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3904 default CPU specified at build configure time, TARGET_DEFAULT,
3905 representing the default set of option flags for the default
3906 target, and global_options_set.x_rs6000_isa_flags, representing
3907 which options were requested on the command line.
3908
3909 Upon return from this function:
3910
3911 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3912 was set by name on the command line. Additionally, if certain
3913 attributes are automatically enabled or disabled by this function
3914 in order to assure compatibility between options and
3915 configuration, the flags associated with those attributes are
3916 also set. By setting these "explicit bits", we avoid the risk
3917 that other code might accidentally overwrite these particular
3918 attributes with "default values".
3919
3920 The various bits of rs6000_isa_flags are set to indicate the
3921 target options that have been selected for the most current
3922 compilation efforts. This has the effect of also turning on the
3923 associated TARGET_XXX values since these are macros which are
3924 generally defined to test the corresponding bit of the
3925 rs6000_isa_flags variable.
3926
3927 The variable rs6000_builtin_mask is set to represent the target
3928 options for the most current compilation efforts, consistent with
3929 the current contents of rs6000_isa_flags. This variable controls
3930 expansion of built-in functions.
3931
3932 Various other global variables and fields of global structures
3933 (over 50 in all) are initialized to reflect the desired options
3934 for the most current compilation efforts. */
3935
3936 static bool
3937 rs6000_option_override_internal (bool global_init_p)
3938 {
3939 bool ret = true;
3940 bool have_cpu = false;
3941
3942 /* The default cpu requested at configure time, if any. */
3943 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3944
3945 HOST_WIDE_INT set_masks;
3946 HOST_WIDE_INT ignore_masks;
3947 int cpu_index;
3948 int tune_index;
3949 struct cl_target_option *main_target_opt
3950 = ((global_init_p || target_option_default_node == NULL)
3951 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3952
3953 /* Print defaults. */
3954 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3955 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3956
3957 /* Remember the explicit arguments. */
3958 if (global_init_p)
3959 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3960
3961 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3962 library functions, so warn about it. The flag may be useful for
3963 performance studies from time to time though, so don't disable it
3964 entirely. */
3965 if (global_options_set.x_rs6000_alignment_flags
3966 && rs6000_alignment_flags == MASK_ALIGN_POWER
3967 && DEFAULT_ABI == ABI_DARWIN
3968 && TARGET_64BIT)
3969 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3970 " it is incompatible with the installed C and C++ libraries");
3971
3972 /* Numerous experiment shows that IRA based loop pressure
3973 calculation works better for RTL loop invariant motion on targets
3974 with enough (>= 32) registers. It is an expensive optimization.
3975 So it is on only for peak performance. */
3976 if (optimize >= 3 && global_init_p
3977 && !global_options_set.x_flag_ira_loop_pressure)
3978 flag_ira_loop_pressure = 1;
3979
3980 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3981 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3982 options were already specified. */
3983 if (flag_sanitize & SANITIZE_USER_ADDRESS
3984 && !global_options_set.x_flag_asynchronous_unwind_tables)
3985 flag_asynchronous_unwind_tables = 1;
3986
3987 /* Set the pointer size. */
3988 if (TARGET_64BIT)
3989 {
3990 rs6000_pmode = (int)DImode;
3991 rs6000_pointer_size = 64;
3992 }
3993 else
3994 {
3995 rs6000_pmode = (int)SImode;
3996 rs6000_pointer_size = 32;
3997 }
3998
3999 /* Some OSs don't support saving the high part of 64-bit registers on context
4000 switch. Other OSs don't support saving Altivec registers. On those OSs,
4001 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4002 if the user wants either, the user must explicitly specify them and we
4003 won't interfere with the user's specification. */
4004
4005 set_masks = POWERPC_MASKS;
4006 #ifdef OS_MISSING_POWERPC64
4007 if (OS_MISSING_POWERPC64)
4008 set_masks &= ~OPTION_MASK_POWERPC64;
4009 #endif
4010 #ifdef OS_MISSING_ALTIVEC
4011 if (OS_MISSING_ALTIVEC)
4012 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4013 | OTHER_VSX_VECTOR_MASKS);
4014 #endif
4015
4016 /* Don't override by the processor default if given explicitly. */
4017 set_masks &= ~rs6000_isa_flags_explicit;
4018
4019 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4020 the cpu in a target attribute or pragma, but did not specify a tuning
4021 option, use the cpu for the tuning option rather than the option specified
4022 with -mtune on the command line. Process a '--with-cpu' configuration
4023 request as an implicit --cpu. */
4024 if (rs6000_cpu_index >= 0)
4025 {
4026 cpu_index = rs6000_cpu_index;
4027 have_cpu = true;
4028 }
4029 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4030 {
4031 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
4032 have_cpu = true;
4033 }
4034 else if (implicit_cpu)
4035 {
4036 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
4037 have_cpu = true;
4038 }
4039 else
4040 {
4041 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4042 const char *default_cpu = ((!TARGET_POWERPC64)
4043 ? "powerpc"
4044 : ((BYTES_BIG_ENDIAN)
4045 ? "powerpc64"
4046 : "powerpc64le"));
4047
4048 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4049 have_cpu = false;
4050 }
4051
4052 gcc_assert (cpu_index >= 0);
4053
4054 if (have_cpu)
4055 {
4056 #ifndef HAVE_AS_POWER9
4057 if (processor_target_table[rs6000_cpu_index].processor
4058 == PROCESSOR_POWER9)
4059 {
4060 have_cpu = false;
4061 warning (0, "will not generate power9 instructions because "
4062 "assembler lacks power9 support");
4063 }
4064 #endif
4065 #ifndef HAVE_AS_POWER8
4066 if (processor_target_table[rs6000_cpu_index].processor
4067 == PROCESSOR_POWER8)
4068 {
4069 have_cpu = false;
4070 warning (0, "will not generate power8 instructions because "
4071 "assembler lacks power8 support");
4072 }
4073 #endif
4074 #ifndef HAVE_AS_POPCNTD
4075 if (processor_target_table[rs6000_cpu_index].processor
4076 == PROCESSOR_POWER7)
4077 {
4078 have_cpu = false;
4079 warning (0, "will not generate power7 instructions because "
4080 "assembler lacks power7 support");
4081 }
4082 #endif
4083 #ifndef HAVE_AS_DFP
4084 if (processor_target_table[rs6000_cpu_index].processor
4085 == PROCESSOR_POWER6)
4086 {
4087 have_cpu = false;
4088 warning (0, "will not generate power6 instructions because "
4089 "assembler lacks power6 support");
4090 }
4091 #endif
4092 #ifndef HAVE_AS_POPCNTB
4093 if (processor_target_table[rs6000_cpu_index].processor
4094 == PROCESSOR_POWER5)
4095 {
4096 have_cpu = false;
4097 warning (0, "will not generate power5 instructions because "
4098 "assembler lacks power5 support");
4099 }
4100 #endif
4101
4102 if (!have_cpu)
4103 {
4104 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4105 const char *default_cpu = (!TARGET_POWERPC64
4106 ? "powerpc"
4107 : (BYTES_BIG_ENDIAN
4108 ? "powerpc64"
4109 : "powerpc64le"));
4110
4111 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
4112 }
4113 }
4114
4115 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4116 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4117 with those from the cpu, except for options that were explicitly set. If
4118 we don't have a cpu, do not override the target bits set in
4119 TARGET_DEFAULT. */
4120 if (have_cpu)
4121 {
4122 rs6000_isa_flags &= ~set_masks;
4123 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4124 & set_masks);
4125 }
4126 else
4127 {
4128 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4129 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4130 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4131 to using rs6000_isa_flags, we need to do the initialization here.
4132
4133 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4134 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4135 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
4136 : processor_target_table[cpu_index].target_enable);
4137 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4138 }
4139
4140 if (rs6000_tune_index >= 0)
4141 tune_index = rs6000_tune_index;
4142 else if (have_cpu)
4143 rs6000_tune_index = tune_index = cpu_index;
4144 else
4145 {
4146 size_t i;
4147 enum processor_type tune_proc
4148 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4149
4150 tune_index = -1;
4151 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4152 if (processor_target_table[i].processor == tune_proc)
4153 {
4154 rs6000_tune_index = tune_index = i;
4155 break;
4156 }
4157 }
4158
4159 gcc_assert (tune_index >= 0);
4160 rs6000_cpu = processor_target_table[tune_index].processor;
4161
4162 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4163 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4164 || rs6000_cpu == PROCESSOR_PPCE5500)
4165 {
4166 if (TARGET_ALTIVEC)
4167 error ("AltiVec not supported in this target");
4168 }
4169
4170 /* If we are optimizing big endian systems for space, use the load/store
4171 multiple and string instructions. */
4172 if (BYTES_BIG_ENDIAN && optimize_size)
4173 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4174 | OPTION_MASK_STRING);
4175
4176 /* Don't allow -mmultiple or -mstring on little endian systems
4177 unless the cpu is a 750, because the hardware doesn't support the
4178 instructions used in little endian mode, and causes an alignment
4179 trap. The 750 does not cause an alignment trap (except when the
4180 target is unaligned). */
4181
4182 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4183 {
4184 if (TARGET_MULTIPLE)
4185 {
4186 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4187 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4188 warning (0, "-mmultiple is not supported on little endian systems");
4189 }
4190
4191 if (TARGET_STRING)
4192 {
4193 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4194 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4195 warning (0, "-mstring is not supported on little endian systems");
4196 }
4197 }
4198
4199 /* If little-endian, default to -mstrict-align on older processors.
4200 Testing for htm matches power8 and later. */
4201 if (!BYTES_BIG_ENDIAN
4202 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4203 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4204
4205 /* -maltivec={le,be} implies -maltivec. */
4206 if (rs6000_altivec_element_order != 0)
4207 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4208
4209 /* Disallow -maltivec=le in big endian mode for now. This is not
4210 known to be useful for anyone. */
4211 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4212 {
4213 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4214 rs6000_altivec_element_order = 0;
4215 }
4216
4217 /* Add some warnings for VSX. */
4218 if (TARGET_VSX)
4219 {
4220 const char *msg = NULL;
4221 if (!TARGET_HARD_FLOAT || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4222 {
4223 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4224 msg = N_("-mvsx requires hardware floating point");
4225 else
4226 {
4227 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4228 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4229 }
4230 }
4231 else if (TARGET_PAIRED_FLOAT)
4232 msg = N_("-mvsx and -mpaired are incompatible");
4233 else if (TARGET_AVOID_XFORM > 0)
4234 msg = N_("-mvsx needs indexed addressing");
4235 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4236 & OPTION_MASK_ALTIVEC))
4237 {
4238 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4239 msg = N_("-mvsx and -mno-altivec are incompatible");
4240 else
4241 msg = N_("-mno-altivec disables vsx");
4242 }
4243
4244 if (msg)
4245 {
4246 warning (0, msg);
4247 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4248 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4249 }
4250 }
4251
4252 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4253 the -mcpu setting to enable options that conflict. */
4254 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4255 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4256 | OPTION_MASK_ALTIVEC
4257 | OPTION_MASK_VSX)) != 0)
4258 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4259 | OPTION_MASK_DIRECT_MOVE)
4260 & ~rs6000_isa_flags_explicit);
4261
4262 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4263 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4264
4265 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4266 off all of the options that depend on those flags. */
4267 ignore_masks = rs6000_disable_incompatible_switches ();
4268
4269 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4270 unless the user explicitly used the -mno-<option> to disable the code. */
4271 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR
4272 || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0)
4273 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4274 else if (TARGET_P9_MINMAX)
4275 {
4276 if (have_cpu)
4277 {
4278 if (cpu_index == PROCESSOR_POWER9)
4279 {
4280 /* legacy behavior: allow -mcpu-power9 with certain
4281 capabilities explicitly disabled. */
4282 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4283 /* However, reject this automatic fix if certain
4284 capabilities required for TARGET_P9_MINMAX support
4285 have been explicitly disabled. */
4286 if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4287 | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
4288 != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
4289 | OPTION_MASK_UPPER_REGS_DF))
4290 error ("-mpower9-minmax incompatible with explicitly disabled options");
4291 }
4292 else
4293 error ("Power9 target option is incompatible with -mcpu=<xxx> for "
4294 "<xxx> less than power9");
4295 }
4296 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4297 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4298 & rs6000_isa_flags_explicit))
4299 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4300 were explicitly cleared. */
4301 error ("-mpower9-minmax incompatible with explicitly disabled options");
4302 else
4303 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4304 }
4305 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4306 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4307 else if (TARGET_VSX)
4308 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4309 else if (TARGET_POPCNTD)
4310 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4311 else if (TARGET_DFP)
4312 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4313 else if (TARGET_CMPB)
4314 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4315 else if (TARGET_FPRND)
4316 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4317 else if (TARGET_POPCNTB)
4318 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4319 else if (TARGET_ALTIVEC)
4320 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4321
4322 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4323 {
4324 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4325 error ("-mcrypto requires -maltivec");
4326 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4327 }
4328
4329 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4330 {
4331 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4332 error ("-mdirect-move requires -mvsx");
4333 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4334 }
4335
4336 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4337 {
4338 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4339 error ("-mpower8-vector requires -maltivec");
4340 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4341 }
4342
4343 if (TARGET_P8_VECTOR && !TARGET_VSX)
4344 {
4345 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4346 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4347 error ("-mpower8-vector requires -mvsx");
4348 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4349 {
4350 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4351 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4352 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4353 }
4354 else
4355 {
4356 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4357 not explicit. */
4358 rs6000_isa_flags |= OPTION_MASK_VSX;
4359 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4360 }
4361 }
4362
4363 if (TARGET_VSX_TIMODE && !TARGET_VSX)
4364 {
4365 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
4366 error ("-mvsx-timode requires -mvsx");
4367 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4368 }
4369
4370 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4371 {
4372 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4373 error ("-mhard-dfp requires -mhard-float");
4374 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4375 }
4376
4377 /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
4378 and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
4379 set the individual option. */
4380 if (TARGET_UPPER_REGS > 0)
4381 {
4382 if (TARGET_VSX
4383 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4384 {
4385 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
4386 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4387 }
4388 if (TARGET_VSX
4389 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4390 {
4391 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
4392 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4393 }
4394 if (TARGET_P8_VECTOR
4395 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4396 {
4397 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
4398 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4399 }
4400 }
4401 else if (TARGET_UPPER_REGS == 0)
4402 {
4403 if (TARGET_VSX
4404 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
4405 {
4406 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4407 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
4408 }
4409 if (TARGET_VSX
4410 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
4411 {
4412 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4413 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
4414 }
4415 if (TARGET_P8_VECTOR
4416 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
4417 {
4418 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4419 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
4420 }
4421 }
4422
4423 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
4424 {
4425 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4426 error ("-mupper-regs-df requires -mvsx");
4427 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
4428 }
4429
4430 if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
4431 {
4432 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
4433 error ("-mupper-regs-di requires -mvsx");
4434 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
4435 }
4436
4437 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
4438 {
4439 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4440 error ("-mupper-regs-sf requires -mpower8-vector");
4441 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
4442 }
4443
4444 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4445 silently turn off quad memory mode. */
4446 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4447 {
4448 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4449 warning (0, N_("-mquad-memory requires 64-bit mode"));
4450
4451 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4452 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4453
4454 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4455 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4456 }
4457
4458 /* Non-atomic quad memory load/store are disabled for little endian, since
4459 the words are reversed, but atomic operations can still be done by
4460 swapping the words. */
4461 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4462 {
4463 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4464 warning (0, N_("-mquad-memory is not available in little endian mode"));
4465
4466 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4467 }
4468
4469 /* Assume if the user asked for normal quad memory instructions, they want
4470 the atomic versions as well, unless they explicity told us not to use quad
4471 word atomic instructions. */
4472 if (TARGET_QUAD_MEMORY
4473 && !TARGET_QUAD_MEMORY_ATOMIC
4474 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4475 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4476
4477 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4478 generating power8 instructions. */
4479 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4480 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4481 & OPTION_MASK_P8_FUSION);
4482
4483 /* Setting additional fusion flags turns on base fusion. */
4484 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4485 {
4486 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4487 {
4488 if (TARGET_P8_FUSION_SIGN)
4489 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4490
4491 if (TARGET_TOC_FUSION)
4492 error ("-mtoc-fusion requires -mpower8-fusion");
4493
4494 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4495 }
4496 else
4497 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4498 }
4499
4500 /* Power9 fusion is a superset over power8 fusion. */
4501 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4502 {
4503 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4504 {
4505 /* We prefer to not mention undocumented options in
4506 error messages. However, if users have managed to select
4507 power9-fusion without selecting power8-fusion, they
4508 already know about undocumented flags. */
4509 error ("-mpower9-fusion requires -mpower8-fusion");
4510 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4511 }
4512 else
4513 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4514 }
4515
4516 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4517 generating power9 instructions. */
4518 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4519 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4520 & OPTION_MASK_P9_FUSION);
4521
4522 /* Power8 does not fuse sign extended loads with the addis. If we are
4523 optimizing at high levels for speed, convert a sign extended load into a
4524 zero extending load, and an explicit sign extension. */
4525 if (TARGET_P8_FUSION
4526 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4527 && optimize_function_for_speed_p (cfun)
4528 && optimize >= 3)
4529 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4530
4531 /* TOC fusion requires 64-bit and medium/large code model. */
4532 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4533 {
4534 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4535 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4536 warning (0, N_("-mtoc-fusion requires 64-bit"));
4537 }
4538
4539 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4540 {
4541 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4542 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4543 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4544 }
4545
4546 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4547 model. */
4548 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4549 && (TARGET_CMODEL != CMODEL_SMALL)
4550 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4551 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4552
4553 /* ISA 3.0 vector instructions include ISA 2.07. */
4554 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4555 {
4556 /* We prefer to not mention undocumented options in
4557 error messages. However, if users have managed to select
4558 power9-vector without selecting power8-vector, they
4559 already know about undocumented flags. */
4560 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4561 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4562 error ("-mpower9-vector requires -mpower8-vector");
4563 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4564 {
4565 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4566 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4567 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4568 }
4569 else
4570 {
4571 /* OPTION_MASK_P9_VECTOR is explicit and
4572 OPTION_MASK_P8_VECTOR is not explicit. */
4573 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4574 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4575 }
4576 }
4577
4578 /* -mpower9-dform turns on both -mpower9-dform-scalar and
4579 -mpower9-dform-vector. */
4580 if (TARGET_P9_DFORM_BOTH > 0)
4581 {
4582 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4583 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR;
4584
4585 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4586 rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR;
4587 }
4588 else if (TARGET_P9_DFORM_BOTH == 0)
4589 {
4590 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR))
4591 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR;
4592
4593 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR))
4594 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4595 }
4596
4597 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4598 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR)
4599 {
4600 /* We prefer to not mention undocumented options in
4601 error messages. However, if users have managed to select
4602 power9-dform without selecting power9-vector, they
4603 already know about undocumented flags. */
4604 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4605 && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR
4606 | OPTION_MASK_P9_DFORM_VECTOR)))
4607 error ("-mpower9-dform requires -mpower9-vector");
4608 else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4609 {
4610 rs6000_isa_flags &=
4611 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4612 rs6000_isa_flags_explicit |=
4613 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4614 }
4615 else
4616 {
4617 /* We know that OPTION_MASK_P9_VECTOR is not explicit and
4618 OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR
4619 may be explicit. */
4620 rs6000_isa_flags |= OPTION_MASK_P9_VECTOR;
4621 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4622 }
4623 }
4624
4625 if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
4626 && !TARGET_DIRECT_MOVE)
4627 {
4628 /* We prefer to not mention undocumented options in
4629 error messages. However, if users have managed to select
4630 power9-dform without selecting direct-move, they
4631 already know about undocumented flags. */
4632 if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4633 && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
4634 (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
4635 (TARGET_P9_DFORM_BOTH == 1)))
4636 error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
4637 " require -mdirect-move");
4638 else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
4639 {
4640 rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
4641 rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
4642 }
4643 else
4644 {
4645 rs6000_isa_flags &=
4646 ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4647 rs6000_isa_flags_explicit |=
4648 (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
4649 }
4650 }
4651
4652 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
4653 {
4654 /* We prefer to not mention undocumented options in
4655 error messages. However, if users have managed to select
4656 power9-dform without selecting upper-regs-df, they
4657 already know about undocumented flags. */
4658 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4659 error ("-mpower9-dform requires -mupper-regs-df");
4660 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4661 }
4662
4663 if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
4664 {
4665 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4666 error ("-mpower9-dform requires -mupper-regs-sf");
4667 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
4668 }
4669
4670 /* Enable LRA by default. */
4671 if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
4672 rs6000_isa_flags |= OPTION_MASK_LRA;
4673
4674 /* There have been bugs with -mvsx-timode that don't show up with -mlra,
4675 but do show up with -mno-lra. Given -mlra will become the default once
4676 PR 69847 is fixed, turn off the options with problems by default if
4677 -mno-lra was used, and warn if the user explicitly asked for the option.
4678
4679 Enable -mpower9-dform-vector by default if LRA and other power9 options.
4680 Enable -mvsx-timode by default if LRA and VSX. */
4681 if (!TARGET_LRA)
4682 {
4683 if (TARGET_VSX_TIMODE)
4684 {
4685 if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
4686 warning (0, "-mvsx-timode might need -mlra");
4687
4688 else
4689 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
4690 }
4691 }
4692
4693 else
4694 {
4695 if (TARGET_VSX && !TARGET_VSX_TIMODE
4696 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
4697 rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
4698 }
4699
4700 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4701 support. If we only have ISA 2.06 support, and the user did not specify
4702 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4703 but we don't enable the full vectorization support */
4704 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4705 TARGET_ALLOW_MOVMISALIGN = 1;
4706
4707 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4708 {
4709 if (TARGET_ALLOW_MOVMISALIGN > 0
4710 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4711 error ("-mallow-movmisalign requires -mvsx");
4712
4713 TARGET_ALLOW_MOVMISALIGN = 0;
4714 }
4715
4716 /* Determine when unaligned vector accesses are permitted, and when
4717 they are preferred over masked Altivec loads. Note that if
4718 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4719 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4720 not true. */
4721 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4722 {
4723 if (!TARGET_VSX)
4724 {
4725 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4726 error ("-mefficient-unaligned-vsx requires -mvsx");
4727
4728 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4729 }
4730
4731 else if (!TARGET_ALLOW_MOVMISALIGN)
4732 {
4733 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4734 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4735
4736 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4737 }
4738 }
4739
4740 /* Check whether we should allow small integers into VSX registers. We
4741 require direct move to prevent the register allocator from having to move
4742 variables through memory to do moves. SImode can be used on ISA 2.07,
4743 while HImode and QImode require ISA 3.0. */
4744 if (TARGET_VSX_SMALL_INTEGER
4745 && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
4746 {
4747 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
4748 error ("-mvsx-small-integer requires -mpower8-vector, "
4749 "-mupper-regs-di, and -mdirect-move");
4750
4751 rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
4752 }
4753
4754 /* Set long double size before the IEEE 128-bit tests. */
4755 if (!global_options_set.x_rs6000_long_double_type_size)
4756 {
4757 if (main_target_opt != NULL
4758 && (main_target_opt->x_rs6000_long_double_type_size
4759 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4760 error ("target attribute or pragma changes long double size");
4761 else
4762 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4763 }
4764
4765 /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin
4766 explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not
4767 pick up this default. */
4768 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4769 if (!global_options_set.x_rs6000_ieeequad)
4770 rs6000_ieeequad = 1;
4771 #endif
4772
4773 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4774 sytems, but don't enable the __float128 keyword. */
4775 if (TARGET_VSX && TARGET_LONG_DOUBLE_128
4776 && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD)
4777 && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0))
4778 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4779
4780 /* IEEE 128-bit floating point requires VSX support. */
4781 if (!TARGET_VSX)
4782 {
4783 if (TARGET_FLOAT128_KEYWORD)
4784 {
4785 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4786 error ("-mfloat128 requires VSX support");
4787
4788 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4789 | OPTION_MASK_FLOAT128_KEYWORD
4790 | OPTION_MASK_FLOAT128_HW);
4791 }
4792
4793 else if (TARGET_FLOAT128_TYPE)
4794 {
4795 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0)
4796 error ("-mfloat128-type requires VSX support");
4797
4798 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4799 | OPTION_MASK_FLOAT128_KEYWORD
4800 | OPTION_MASK_FLOAT128_HW);
4801 }
4802 }
4803
4804 /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE
4805 128-bit floating point support to be enabled. */
4806 if (!TARGET_FLOAT128_TYPE)
4807 {
4808 if (TARGET_FLOAT128_KEYWORD)
4809 {
4810 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4811 {
4812 error ("-mfloat128 requires -mfloat128-type");
4813 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4814 | OPTION_MASK_FLOAT128_KEYWORD
4815 | OPTION_MASK_FLOAT128_HW);
4816 }
4817 else
4818 rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE;
4819 }
4820
4821 if (TARGET_FLOAT128_HW)
4822 {
4823 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4824 {
4825 error ("-mfloat128-hardware requires -mfloat128-type");
4826 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4827 }
4828 else
4829 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE
4830 | OPTION_MASK_FLOAT128_KEYWORD
4831 | OPTION_MASK_FLOAT128_HW);
4832 }
4833 }
4834
4835 /* If we have -mfloat128-type and full ISA 3.0 support, enable
4836 -mfloat128-hardware by default. However, don't enable the __float128
4837 keyword. If the user explicitly turned on -mfloat128-hardware, enable the
4838 -mfloat128 option as well if it was not already set. */
4839 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW
4840 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4841 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4842 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4843
4844 if (TARGET_FLOAT128_HW
4845 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4846 {
4847 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4848 error ("-mfloat128-hardware requires full ISA 3.0 support");
4849
4850 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4851 }
4852
4853 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4854 {
4855 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4856 error ("-mfloat128-hardware requires -m64");
4857
4858 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4859 }
4860
4861 if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD
4862 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0
4863 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4864 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4865
4866 /* Print the options after updating the defaults. */
4867 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4868 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4869
4870 /* E500mc does "better" if we inline more aggressively. Respect the
4871 user's opinion, though. */
4872 if (rs6000_block_move_inline_limit == 0
4873 && (rs6000_cpu == PROCESSOR_PPCE500MC
4874 || rs6000_cpu == PROCESSOR_PPCE500MC64
4875 || rs6000_cpu == PROCESSOR_PPCE5500
4876 || rs6000_cpu == PROCESSOR_PPCE6500))
4877 rs6000_block_move_inline_limit = 128;
4878
4879 /* store_one_arg depends on expand_block_move to handle at least the
4880 size of reg_parm_stack_space. */
4881 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4882 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4883
4884 if (global_init_p)
4885 {
4886 /* If the appropriate debug option is enabled, replace the target hooks
4887 with debug versions that call the real version and then prints
4888 debugging information. */
4889 if (TARGET_DEBUG_COST)
4890 {
4891 targetm.rtx_costs = rs6000_debug_rtx_costs;
4892 targetm.address_cost = rs6000_debug_address_cost;
4893 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4894 }
4895
4896 if (TARGET_DEBUG_ADDR)
4897 {
4898 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4899 targetm.legitimize_address = rs6000_debug_legitimize_address;
4900 rs6000_secondary_reload_class_ptr
4901 = rs6000_debug_secondary_reload_class;
4902 rs6000_secondary_memory_needed_ptr
4903 = rs6000_debug_secondary_memory_needed;
4904 rs6000_cannot_change_mode_class_ptr
4905 = rs6000_debug_cannot_change_mode_class;
4906 rs6000_preferred_reload_class_ptr
4907 = rs6000_debug_preferred_reload_class;
4908 rs6000_legitimize_reload_address_ptr
4909 = rs6000_debug_legitimize_reload_address;
4910 rs6000_mode_dependent_address_ptr
4911 = rs6000_debug_mode_dependent_address;
4912 }
4913
4914 if (rs6000_veclibabi_name)
4915 {
4916 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4917 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4918 else
4919 {
4920 error ("unknown vectorization library ABI type (%s) for "
4921 "-mveclibabi= switch", rs6000_veclibabi_name);
4922 ret = false;
4923 }
4924 }
4925 }
4926
4927 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4928 target attribute or pragma which automatically enables both options,
4929 unless the altivec ABI was set. This is set by default for 64-bit, but
4930 not for 32-bit. */
4931 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4932 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4933 | OPTION_MASK_FLOAT128_TYPE
4934 | OPTION_MASK_FLOAT128_KEYWORD)
4935 & ~rs6000_isa_flags_explicit);
4936
4937 /* Enable Altivec ABI for AIX -maltivec. */
4938 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4939 {
4940 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4941 error ("target attribute or pragma changes AltiVec ABI");
4942 else
4943 rs6000_altivec_abi = 1;
4944 }
4945
4946 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4947 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4948 be explicitly overridden in either case. */
4949 if (TARGET_ELF)
4950 {
4951 if (!global_options_set.x_rs6000_altivec_abi
4952 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4953 {
4954 if (main_target_opt != NULL &&
4955 !main_target_opt->x_rs6000_altivec_abi)
4956 error ("target attribute or pragma changes AltiVec ABI");
4957 else
4958 rs6000_altivec_abi = 1;
4959 }
4960 }
4961
4962 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4963 So far, the only darwin64 targets are also MACH-O. */
4964 if (TARGET_MACHO
4965 && DEFAULT_ABI == ABI_DARWIN
4966 && TARGET_64BIT)
4967 {
4968 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4969 error ("target attribute or pragma changes darwin64 ABI");
4970 else
4971 {
4972 rs6000_darwin64_abi = 1;
4973 /* Default to natural alignment, for better performance. */
4974 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4975 }
4976 }
4977
4978 /* Place FP constants in the constant pool instead of TOC
4979 if section anchors enabled. */
4980 if (flag_section_anchors
4981 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4982 TARGET_NO_FP_IN_TOC = 1;
4983
4984 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4985 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4986
4987 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4988 SUBTARGET_OVERRIDE_OPTIONS;
4989 #endif
4990 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4991 SUBSUBTARGET_OVERRIDE_OPTIONS;
4992 #endif
4993 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4994 SUB3TARGET_OVERRIDE_OPTIONS;
4995 #endif
4996
4997 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4998 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4999
5000 /* For the E500 family of cores, reset the single/double FP flags to let us
5001 check that they remain constant across attributes or pragmas. Also,
5002 clear a possible request for string instructions, not supported and which
5003 we might have silently queried above for -Os.
5004
5005 For other families, clear ISEL in case it was set implicitly.
5006 */
5007
5008 switch (rs6000_cpu)
5009 {
5010 case PROCESSOR_PPC8540:
5011 case PROCESSOR_PPC8548:
5012 case PROCESSOR_PPCE500MC:
5013 case PROCESSOR_PPCE500MC64:
5014 case PROCESSOR_PPCE5500:
5015 case PROCESSOR_PPCE6500:
5016
5017 rs6000_single_float = 0;
5018 rs6000_double_float = 0;
5019
5020 rs6000_isa_flags &= ~OPTION_MASK_STRING;
5021
5022 break;
5023
5024 default:
5025
5026 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
5027 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
5028
5029 break;
5030 }
5031
5032 if (main_target_opt)
5033 {
5034 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
5035 error ("target attribute or pragma changes single precision floating "
5036 "point");
5037 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
5038 error ("target attribute or pragma changes double precision floating "
5039 "point");
5040 }
5041
5042 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
5043 && rs6000_cpu != PROCESSOR_POWER5
5044 && rs6000_cpu != PROCESSOR_POWER6
5045 && rs6000_cpu != PROCESSOR_POWER7
5046 && rs6000_cpu != PROCESSOR_POWER8
5047 && rs6000_cpu != PROCESSOR_POWER9
5048 && rs6000_cpu != PROCESSOR_PPCA2
5049 && rs6000_cpu != PROCESSOR_CELL
5050 && rs6000_cpu != PROCESSOR_PPC476);
5051 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
5052 || rs6000_cpu == PROCESSOR_POWER5
5053 || rs6000_cpu == PROCESSOR_POWER7
5054 || rs6000_cpu == PROCESSOR_POWER8);
5055 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
5056 || rs6000_cpu == PROCESSOR_POWER5
5057 || rs6000_cpu == PROCESSOR_POWER6
5058 || rs6000_cpu == PROCESSOR_POWER7
5059 || rs6000_cpu == PROCESSOR_POWER8
5060 || rs6000_cpu == PROCESSOR_POWER9
5061 || rs6000_cpu == PROCESSOR_PPCE500MC
5062 || rs6000_cpu == PROCESSOR_PPCE500MC64
5063 || rs6000_cpu == PROCESSOR_PPCE5500
5064 || rs6000_cpu == PROCESSOR_PPCE6500);
5065
5066 /* Allow debug switches to override the above settings. These are set to -1
5067 in rs6000.opt to indicate the user hasn't directly set the switch. */
5068 if (TARGET_ALWAYS_HINT >= 0)
5069 rs6000_always_hint = TARGET_ALWAYS_HINT;
5070
5071 if (TARGET_SCHED_GROUPS >= 0)
5072 rs6000_sched_groups = TARGET_SCHED_GROUPS;
5073
5074 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
5075 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
5076
5077 rs6000_sched_restricted_insns_priority
5078 = (rs6000_sched_groups ? 1 : 0);
5079
5080 /* Handle -msched-costly-dep option. */
5081 rs6000_sched_costly_dep
5082 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
5083
5084 if (rs6000_sched_costly_dep_str)
5085 {
5086 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
5087 rs6000_sched_costly_dep = no_dep_costly;
5088 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
5089 rs6000_sched_costly_dep = all_deps_costly;
5090 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
5091 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
5092 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
5093 rs6000_sched_costly_dep = store_to_load_dep_costly;
5094 else
5095 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
5096 atoi (rs6000_sched_costly_dep_str));
5097 }
5098
5099 /* Handle -minsert-sched-nops option. */
5100 rs6000_sched_insert_nops
5101 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
5102
5103 if (rs6000_sched_insert_nops_str)
5104 {
5105 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
5106 rs6000_sched_insert_nops = sched_finish_none;
5107 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
5108 rs6000_sched_insert_nops = sched_finish_pad_groups;
5109 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
5110 rs6000_sched_insert_nops = sched_finish_regroup_exact;
5111 else
5112 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
5113 atoi (rs6000_sched_insert_nops_str));
5114 }
5115
5116 /* Handle stack protector */
5117 if (!global_options_set.x_rs6000_stack_protector_guard)
5118 #ifdef TARGET_THREAD_SSP_OFFSET
5119 rs6000_stack_protector_guard = SSP_TLS;
5120 #else
5121 rs6000_stack_protector_guard = SSP_GLOBAL;
5122 #endif
5123
5124 #ifdef TARGET_THREAD_SSP_OFFSET
5125 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
5126 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
5127 #endif
5128
5129 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
5130 {
5131 char *endp;
5132 const char *str = rs6000_stack_protector_guard_offset_str;
5133
5134 errno = 0;
5135 long offset = strtol (str, &endp, 0);
5136 if (!*str || *endp || errno)
5137 error ("%qs is not a valid number "
5138 "in -mstack-protector-guard-offset=", str);
5139
5140 if (!IN_RANGE (offset, -0x8000, 0x7fff)
5141 || (TARGET_64BIT && (offset & 3)))
5142 error ("%qs is not a valid offset "
5143 "in -mstack-protector-guard-offset=", str);
5144
5145 rs6000_stack_protector_guard_offset = offset;
5146 }
5147
5148 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
5149 {
5150 const char *str = rs6000_stack_protector_guard_reg_str;
5151 int reg = decode_reg_name (str);
5152
5153 if (!IN_RANGE (reg, 1, 31))
5154 error ("%qs is not a valid base register "
5155 "in -mstack-protector-guard-reg=", str);
5156
5157 rs6000_stack_protector_guard_reg = reg;
5158 }
5159
5160 if (rs6000_stack_protector_guard == SSP_TLS
5161 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
5162 error ("-mstack-protector-guard=tls needs a valid base register");
5163
5164 if (global_init_p)
5165 {
5166 #ifdef TARGET_REGNAMES
5167 /* If the user desires alternate register names, copy in the
5168 alternate names now. */
5169 if (TARGET_REGNAMES)
5170 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
5171 #endif
5172
5173 /* Set aix_struct_return last, after the ABI is determined.
5174 If -maix-struct-return or -msvr4-struct-return was explicitly
5175 used, don't override with the ABI default. */
5176 if (!global_options_set.x_aix_struct_return)
5177 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
5178
5179 #if 0
5180 /* IBM XL compiler defaults to unsigned bitfields. */
5181 if (TARGET_XL_COMPAT)
5182 flag_signed_bitfields = 0;
5183 #endif
5184
5185 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
5186 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
5187
5188 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
5189
5190 /* We can only guarantee the availability of DI pseudo-ops when
5191 assembling for 64-bit targets. */
5192 if (!TARGET_64BIT)
5193 {
5194 targetm.asm_out.aligned_op.di = NULL;
5195 targetm.asm_out.unaligned_op.di = NULL;
5196 }
5197
5198
5199 /* Set branch target alignment, if not optimizing for size. */
5200 if (!optimize_size)
5201 {
5202 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5203 aligned 8byte to avoid misprediction by the branch predictor. */
5204 if (rs6000_cpu == PROCESSOR_TITAN
5205 || rs6000_cpu == PROCESSOR_CELL)
5206 {
5207 if (align_functions <= 0)
5208 align_functions = 8;
5209 if (align_jumps <= 0)
5210 align_jumps = 8;
5211 if (align_loops <= 0)
5212 align_loops = 8;
5213 }
5214 if (rs6000_align_branch_targets)
5215 {
5216 if (align_functions <= 0)
5217 align_functions = 16;
5218 if (align_jumps <= 0)
5219 align_jumps = 16;
5220 if (align_loops <= 0)
5221 {
5222 can_override_loop_align = 1;
5223 align_loops = 16;
5224 }
5225 }
5226 if (align_jumps_max_skip <= 0)
5227 align_jumps_max_skip = 15;
5228 if (align_loops_max_skip <= 0)
5229 align_loops_max_skip = 15;
5230 }
5231
5232 /* Arrange to save and restore machine status around nested functions. */
5233 init_machine_status = rs6000_init_machine_status;
5234
5235 /* We should always be splitting complex arguments, but we can't break
5236 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5237 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5238 targetm.calls.split_complex_arg = NULL;
5239
5240 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5241 if (DEFAULT_ABI == ABI_AIX)
5242 targetm.calls.custom_function_descriptors = 0;
5243 }
5244
5245 /* Initialize rs6000_cost with the appropriate target costs. */
5246 if (optimize_size)
5247 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5248 else
5249 switch (rs6000_cpu)
5250 {
5251 case PROCESSOR_RS64A:
5252 rs6000_cost = &rs64a_cost;
5253 break;
5254
5255 case PROCESSOR_MPCCORE:
5256 rs6000_cost = &mpccore_cost;
5257 break;
5258
5259 case PROCESSOR_PPC403:
5260 rs6000_cost = &ppc403_cost;
5261 break;
5262
5263 case PROCESSOR_PPC405:
5264 rs6000_cost = &ppc405_cost;
5265 break;
5266
5267 case PROCESSOR_PPC440:
5268 rs6000_cost = &ppc440_cost;
5269 break;
5270
5271 case PROCESSOR_PPC476:
5272 rs6000_cost = &ppc476_cost;
5273 break;
5274
5275 case PROCESSOR_PPC601:
5276 rs6000_cost = &ppc601_cost;
5277 break;
5278
5279 case PROCESSOR_PPC603:
5280 rs6000_cost = &ppc603_cost;
5281 break;
5282
5283 case PROCESSOR_PPC604:
5284 rs6000_cost = &ppc604_cost;
5285 break;
5286
5287 case PROCESSOR_PPC604e:
5288 rs6000_cost = &ppc604e_cost;
5289 break;
5290
5291 case PROCESSOR_PPC620:
5292 rs6000_cost = &ppc620_cost;
5293 break;
5294
5295 case PROCESSOR_PPC630:
5296 rs6000_cost = &ppc630_cost;
5297 break;
5298
5299 case PROCESSOR_CELL:
5300 rs6000_cost = &ppccell_cost;
5301 break;
5302
5303 case PROCESSOR_PPC750:
5304 case PROCESSOR_PPC7400:
5305 rs6000_cost = &ppc750_cost;
5306 break;
5307
5308 case PROCESSOR_PPC7450:
5309 rs6000_cost = &ppc7450_cost;
5310 break;
5311
5312 case PROCESSOR_PPC8540:
5313 case PROCESSOR_PPC8548:
5314 rs6000_cost = &ppc8540_cost;
5315 break;
5316
5317 case PROCESSOR_PPCE300C2:
5318 case PROCESSOR_PPCE300C3:
5319 rs6000_cost = &ppce300c2c3_cost;
5320 break;
5321
5322 case PROCESSOR_PPCE500MC:
5323 rs6000_cost = &ppce500mc_cost;
5324 break;
5325
5326 case PROCESSOR_PPCE500MC64:
5327 rs6000_cost = &ppce500mc64_cost;
5328 break;
5329
5330 case PROCESSOR_PPCE5500:
5331 rs6000_cost = &ppce5500_cost;
5332 break;
5333
5334 case PROCESSOR_PPCE6500:
5335 rs6000_cost = &ppce6500_cost;
5336 break;
5337
5338 case PROCESSOR_TITAN:
5339 rs6000_cost = &titan_cost;
5340 break;
5341
5342 case PROCESSOR_POWER4:
5343 case PROCESSOR_POWER5:
5344 rs6000_cost = &power4_cost;
5345 break;
5346
5347 case PROCESSOR_POWER6:
5348 rs6000_cost = &power6_cost;
5349 break;
5350
5351 case PROCESSOR_POWER7:
5352 rs6000_cost = &power7_cost;
5353 break;
5354
5355 case PROCESSOR_POWER8:
5356 rs6000_cost = &power8_cost;
5357 break;
5358
5359 case PROCESSOR_POWER9:
5360 rs6000_cost = &power9_cost;
5361 break;
5362
5363 case PROCESSOR_PPCA2:
5364 rs6000_cost = &ppca2_cost;
5365 break;
5366
5367 default:
5368 gcc_unreachable ();
5369 }
5370
5371 if (global_init_p)
5372 {
5373 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5374 rs6000_cost->simultaneous_prefetches,
5375 global_options.x_param_values,
5376 global_options_set.x_param_values);
5377 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5378 global_options.x_param_values,
5379 global_options_set.x_param_values);
5380 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5381 rs6000_cost->cache_line_size,
5382 global_options.x_param_values,
5383 global_options_set.x_param_values);
5384 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5385 global_options.x_param_values,
5386 global_options_set.x_param_values);
5387
5388 /* Increase loop peeling limits based on performance analysis. */
5389 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5390 global_options.x_param_values,
5391 global_options_set.x_param_values);
5392 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5393 global_options.x_param_values,
5394 global_options_set.x_param_values);
5395
5396 /* Use the 'model' -fsched-pressure algorithm by default. */
5397 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5398 SCHED_PRESSURE_MODEL,
5399 global_options.x_param_values,
5400 global_options_set.x_param_values);
5401
5402 /* If using typedef char *va_list, signal that
5403 __builtin_va_start (&ap, 0) can be optimized to
5404 ap = __builtin_next_arg (0). */
5405 if (DEFAULT_ABI != ABI_V4)
5406 targetm.expand_builtin_va_start = NULL;
5407 }
5408
5409 /* Set up single/double float flags.
5410 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5411 then set both flags. */
5412 if (TARGET_HARD_FLOAT && rs6000_single_float == 0 && rs6000_double_float == 0)
5413 rs6000_single_float = rs6000_double_float = 1;
5414
5415 /* If not explicitly specified via option, decide whether to generate indexed
5416 load/store instructions. A value of -1 indicates that the
5417 initial value of this variable has not been overwritten. During
5418 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5419 if (TARGET_AVOID_XFORM == -1)
5420 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5421 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5422 need indexed accesses and the type used is the scalar type of the element
5423 being loaded or stored. */
5424 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
5425 && !TARGET_ALTIVEC);
5426
5427 /* Set the -mrecip options. */
5428 if (rs6000_recip_name)
5429 {
5430 char *p = ASTRDUP (rs6000_recip_name);
5431 char *q;
5432 unsigned int mask, i;
5433 bool invert;
5434
5435 while ((q = strtok (p, ",")) != NULL)
5436 {
5437 p = NULL;
5438 if (*q == '!')
5439 {
5440 invert = true;
5441 q++;
5442 }
5443 else
5444 invert = false;
5445
5446 if (!strcmp (q, "default"))
5447 mask = ((TARGET_RECIP_PRECISION)
5448 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5449 else
5450 {
5451 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5452 if (!strcmp (q, recip_options[i].string))
5453 {
5454 mask = recip_options[i].mask;
5455 break;
5456 }
5457
5458 if (i == ARRAY_SIZE (recip_options))
5459 {
5460 error ("unknown option for -mrecip=%s", q);
5461 invert = false;
5462 mask = 0;
5463 ret = false;
5464 }
5465 }
5466
5467 if (invert)
5468 rs6000_recip_control &= ~mask;
5469 else
5470 rs6000_recip_control |= mask;
5471 }
5472 }
5473
5474 /* Set the builtin mask of the various options used that could affect which
5475 builtins were used. In the past we used target_flags, but we've run out
5476 of bits, and some options like PAIRED are no longer in target_flags. */
5477 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5478 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5479 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5480 rs6000_builtin_mask);
5481
5482 /* Initialize all of the registers. */
5483 rs6000_init_hard_regno_mode_ok (global_init_p);
5484
5485 /* Save the initial options in case the user does function specific options */
5486 if (global_init_p)
5487 target_option_default_node = target_option_current_node
5488 = build_target_option_node (&global_options);
5489
5490 /* If not explicitly specified via option, decide whether to generate the
5491 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5492 if (TARGET_LINK_STACK == -1)
5493 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
5494
5495 return ret;
5496 }
5497
5498 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5499 define the target cpu type. */
5500
5501 static void
5502 rs6000_option_override (void)
5503 {
5504 (void) rs6000_option_override_internal (true);
5505 }
5506
5507 \f
5508 /* Implement targetm.vectorize.builtin_mask_for_load. */
5509 static tree
5510 rs6000_builtin_mask_for_load (void)
5511 {
5512 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5513 if ((TARGET_ALTIVEC && !TARGET_VSX)
5514 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5515 return altivec_builtin_mask_for_load;
5516 else
5517 return 0;
5518 }
5519
5520 /* Implement LOOP_ALIGN. */
5521 int
5522 rs6000_loop_align (rtx label)
5523 {
5524 basic_block bb;
5525 int ninsns;
5526
5527 /* Don't override loop alignment if -falign-loops was specified. */
5528 if (!can_override_loop_align)
5529 return align_loops_log;
5530
5531 bb = BLOCK_FOR_INSN (label);
5532 ninsns = num_loop_insns(bb->loop_father);
5533
5534 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5535 if (ninsns > 4 && ninsns <= 8
5536 && (rs6000_cpu == PROCESSOR_POWER4
5537 || rs6000_cpu == PROCESSOR_POWER5
5538 || rs6000_cpu == PROCESSOR_POWER6
5539 || rs6000_cpu == PROCESSOR_POWER7
5540 || rs6000_cpu == PROCESSOR_POWER8
5541 || rs6000_cpu == PROCESSOR_POWER9))
5542 return 5;
5543 else
5544 return align_loops_log;
5545 }
5546
5547 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5548 static int
5549 rs6000_loop_align_max_skip (rtx_insn *label)
5550 {
5551 return (1 << rs6000_loop_align (label)) - 1;
5552 }
5553
5554 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5555 after applying N number of iterations. This routine does not determine
5556 how may iterations are required to reach desired alignment. */
5557
5558 static bool
5559 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5560 {
5561 if (is_packed)
5562 return false;
5563
5564 if (TARGET_32BIT)
5565 {
5566 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5567 return true;
5568
5569 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5570 return true;
5571
5572 return false;
5573 }
5574 else
5575 {
5576 if (TARGET_MACHO)
5577 return false;
5578
5579 /* Assuming that all other types are naturally aligned. CHECKME! */
5580 return true;
5581 }
5582 }
5583
5584 /* Return true if the vector misalignment factor is supported by the
5585 target. */
5586 static bool
5587 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5588 const_tree type,
5589 int misalignment,
5590 bool is_packed)
5591 {
5592 if (TARGET_VSX)
5593 {
5594 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5595 return true;
5596
5597 /* Return if movmisalign pattern is not supported for this mode. */
5598 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5599 return false;
5600
5601 if (misalignment == -1)
5602 {
5603 /* Misalignment factor is unknown at compile time but we know
5604 it's word aligned. */
5605 if (rs6000_vector_alignment_reachable (type, is_packed))
5606 {
5607 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5608
5609 if (element_size == 64 || element_size == 32)
5610 return true;
5611 }
5612
5613 return false;
5614 }
5615
5616 /* VSX supports word-aligned vector. */
5617 if (misalignment % 4 == 0)
5618 return true;
5619 }
5620 return false;
5621 }
5622
5623 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5624 static int
5625 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5626 tree vectype, int misalign)
5627 {
5628 unsigned elements;
5629 tree elem_type;
5630
5631 switch (type_of_cost)
5632 {
5633 case scalar_stmt:
5634 case scalar_load:
5635 case scalar_store:
5636 case vector_stmt:
5637 case vector_load:
5638 case vector_store:
5639 case vec_to_scalar:
5640 case scalar_to_vec:
5641 case cond_branch_not_taken:
5642 return 1;
5643
5644 case vec_perm:
5645 if (TARGET_VSX)
5646 return 3;
5647 else
5648 return 1;
5649
5650 case vec_promote_demote:
5651 if (TARGET_VSX)
5652 return 4;
5653 else
5654 return 1;
5655
5656 case cond_branch_taken:
5657 return 3;
5658
5659 case unaligned_load:
5660 if (TARGET_P9_VECTOR)
5661 return 3;
5662
5663 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5664 return 1;
5665
5666 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5667 {
5668 elements = TYPE_VECTOR_SUBPARTS (vectype);
5669 if (elements == 2)
5670 /* Double word aligned. */
5671 return 2;
5672
5673 if (elements == 4)
5674 {
5675 switch (misalign)
5676 {
5677 case 8:
5678 /* Double word aligned. */
5679 return 2;
5680
5681 case -1:
5682 /* Unknown misalignment. */
5683 case 4:
5684 case 12:
5685 /* Word aligned. */
5686 return 22;
5687
5688 default:
5689 gcc_unreachable ();
5690 }
5691 }
5692 }
5693
5694 if (TARGET_ALTIVEC)
5695 /* Misaligned loads are not supported. */
5696 gcc_unreachable ();
5697
5698 return 2;
5699
5700 case unaligned_store:
5701 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5702 return 1;
5703
5704 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5705 {
5706 elements = TYPE_VECTOR_SUBPARTS (vectype);
5707 if (elements == 2)
5708 /* Double word aligned. */
5709 return 2;
5710
5711 if (elements == 4)
5712 {
5713 switch (misalign)
5714 {
5715 case 8:
5716 /* Double word aligned. */
5717 return 2;
5718
5719 case -1:
5720 /* Unknown misalignment. */
5721 case 4:
5722 case 12:
5723 /* Word aligned. */
5724 return 23;
5725
5726 default:
5727 gcc_unreachable ();
5728 }
5729 }
5730 }
5731
5732 if (TARGET_ALTIVEC)
5733 /* Misaligned stores are not supported. */
5734 gcc_unreachable ();
5735
5736 return 2;
5737
5738 case vec_construct:
5739 /* This is a rough approximation assuming non-constant elements
5740 constructed into a vector via element insertion. FIXME:
5741 vec_construct is not granular enough for uniformly good
5742 decisions. If the initialization is a splat, this is
5743 cheaper than we estimate. Improve this someday. */
5744 elem_type = TREE_TYPE (vectype);
5745 /* 32-bit vectors loaded into registers are stored as double
5746 precision, so we need 2 permutes, 2 converts, and 1 merge
5747 to construct a vector of short floats from them. */
5748 if (SCALAR_FLOAT_TYPE_P (elem_type)
5749 && TYPE_PRECISION (elem_type) == 32)
5750 return 5;
5751 /* On POWER9, integer vector types are built up in GPRs and then
5752 use a direct move (2 cycles). For POWER8 this is even worse,
5753 as we need two direct moves and a merge, and the direct moves
5754 are five cycles. */
5755 else if (INTEGRAL_TYPE_P (elem_type))
5756 {
5757 if (TARGET_P9_VECTOR)
5758 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5759 else
5760 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
5761 }
5762 else
5763 /* V2DFmode doesn't need a direct move. */
5764 return 2;
5765
5766 default:
5767 gcc_unreachable ();
5768 }
5769 }
5770
5771 /* Implement targetm.vectorize.preferred_simd_mode. */
5772
5773 static machine_mode
5774 rs6000_preferred_simd_mode (machine_mode mode)
5775 {
5776 if (TARGET_VSX)
5777 switch (mode)
5778 {
5779 case DFmode:
5780 return V2DFmode;
5781 default:;
5782 }
5783 if (TARGET_ALTIVEC || TARGET_VSX)
5784 switch (mode)
5785 {
5786 case SFmode:
5787 return V4SFmode;
5788 case TImode:
5789 return V1TImode;
5790 case DImode:
5791 return V2DImode;
5792 case SImode:
5793 return V4SImode;
5794 case HImode:
5795 return V8HImode;
5796 case QImode:
5797 return V16QImode;
5798 default:;
5799 }
5800 if (TARGET_PAIRED_FLOAT
5801 && mode == SFmode)
5802 return V2SFmode;
5803 return word_mode;
5804 }
5805
5806 typedef struct _rs6000_cost_data
5807 {
5808 struct loop *loop_info;
5809 unsigned cost[3];
5810 } rs6000_cost_data;
5811
5812 /* Test for likely overcommitment of vector hardware resources. If a
5813 loop iteration is relatively large, and too large a percentage of
5814 instructions in the loop are vectorized, the cost model may not
5815 adequately reflect delays from unavailable vector resources.
5816 Penalize the loop body cost for this case. */
5817
5818 static void
5819 rs6000_density_test (rs6000_cost_data *data)
5820 {
5821 const int DENSITY_PCT_THRESHOLD = 85;
5822 const int DENSITY_SIZE_THRESHOLD = 70;
5823 const int DENSITY_PENALTY = 10;
5824 struct loop *loop = data->loop_info;
5825 basic_block *bbs = get_loop_body (loop);
5826 int nbbs = loop->num_nodes;
5827 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5828 int i, density_pct;
5829
5830 for (i = 0; i < nbbs; i++)
5831 {
5832 basic_block bb = bbs[i];
5833 gimple_stmt_iterator gsi;
5834
5835 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5836 {
5837 gimple *stmt = gsi_stmt (gsi);
5838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5839
5840 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5841 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5842 not_vec_cost++;
5843 }
5844 }
5845
5846 free (bbs);
5847 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5848
5849 if (density_pct > DENSITY_PCT_THRESHOLD
5850 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5851 {
5852 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5853 if (dump_enabled_p ())
5854 dump_printf_loc (MSG_NOTE, vect_location,
5855 "density %d%%, cost %d exceeds threshold, penalizing "
5856 "loop body cost by %d%%", density_pct,
5857 vec_cost + not_vec_cost, DENSITY_PENALTY);
5858 }
5859 }
5860
5861 /* Implement targetm.vectorize.init_cost. */
5862
5863 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5864 instruction is needed by the vectorization. */
5865 static bool rs6000_vect_nonmem;
5866
5867 static void *
5868 rs6000_init_cost (struct loop *loop_info)
5869 {
5870 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5871 data->loop_info = loop_info;
5872 data->cost[vect_prologue] = 0;
5873 data->cost[vect_body] = 0;
5874 data->cost[vect_epilogue] = 0;
5875 rs6000_vect_nonmem = false;
5876 return data;
5877 }
5878
5879 /* Implement targetm.vectorize.add_stmt_cost. */
5880
5881 static unsigned
5882 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5883 struct _stmt_vec_info *stmt_info, int misalign,
5884 enum vect_cost_model_location where)
5885 {
5886 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5887 unsigned retval = 0;
5888
5889 if (flag_vect_cost_model)
5890 {
5891 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5892 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5893 misalign);
5894 /* Statements in an inner loop relative to the loop being
5895 vectorized are weighted more heavily. The value here is
5896 arbitrary and could potentially be improved with analysis. */
5897 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5898 count *= 50; /* FIXME. */
5899
5900 retval = (unsigned) (count * stmt_cost);
5901 cost_data->cost[where] += retval;
5902
5903 /* Check whether we're doing something other than just a copy loop.
5904 Not all such loops may be profitably vectorized; see
5905 rs6000_finish_cost. */
5906 if ((kind == vec_to_scalar || kind == vec_perm
5907 || kind == vec_promote_demote || kind == vec_construct
5908 || kind == scalar_to_vec)
5909 || (where == vect_body && kind == vector_stmt))
5910 rs6000_vect_nonmem = true;
5911 }
5912
5913 return retval;
5914 }
5915
5916 /* Implement targetm.vectorize.finish_cost. */
5917
5918 static void
5919 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5920 unsigned *body_cost, unsigned *epilogue_cost)
5921 {
5922 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5923
5924 if (cost_data->loop_info)
5925 rs6000_density_test (cost_data);
5926
5927 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5928 that require versioning for any reason. The vectorization is at
5929 best a wash inside the loop, and the versioning checks make
5930 profitability highly unlikely and potentially quite harmful. */
5931 if (cost_data->loop_info)
5932 {
5933 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5934 if (!rs6000_vect_nonmem
5935 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5936 && LOOP_REQUIRES_VERSIONING (vec_info))
5937 cost_data->cost[vect_body] += 10000;
5938 }
5939
5940 *prologue_cost = cost_data->cost[vect_prologue];
5941 *body_cost = cost_data->cost[vect_body];
5942 *epilogue_cost = cost_data->cost[vect_epilogue];
5943 }
5944
5945 /* Implement targetm.vectorize.destroy_cost_data. */
5946
5947 static void
5948 rs6000_destroy_cost_data (void *data)
5949 {
5950 free (data);
5951 }
5952
5953 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5954 library with vectorized intrinsics. */
5955
5956 static tree
5957 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5958 tree type_in)
5959 {
5960 char name[32];
5961 const char *suffix = NULL;
5962 tree fntype, new_fndecl, bdecl = NULL_TREE;
5963 int n_args = 1;
5964 const char *bname;
5965 machine_mode el_mode, in_mode;
5966 int n, in_n;
5967
5968 /* Libmass is suitable for unsafe math only as it does not correctly support
5969 parts of IEEE with the required precision such as denormals. Only support
5970 it if we have VSX to use the simd d2 or f4 functions.
5971 XXX: Add variable length support. */
5972 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5973 return NULL_TREE;
5974
5975 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5976 n = TYPE_VECTOR_SUBPARTS (type_out);
5977 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5978 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5979 if (el_mode != in_mode
5980 || n != in_n)
5981 return NULL_TREE;
5982
5983 switch (fn)
5984 {
5985 CASE_CFN_ATAN2:
5986 CASE_CFN_HYPOT:
5987 CASE_CFN_POW:
5988 n_args = 2;
5989 gcc_fallthrough ();
5990
5991 CASE_CFN_ACOS:
5992 CASE_CFN_ACOSH:
5993 CASE_CFN_ASIN:
5994 CASE_CFN_ASINH:
5995 CASE_CFN_ATAN:
5996 CASE_CFN_ATANH:
5997 CASE_CFN_CBRT:
5998 CASE_CFN_COS:
5999 CASE_CFN_COSH:
6000 CASE_CFN_ERF:
6001 CASE_CFN_ERFC:
6002 CASE_CFN_EXP2:
6003 CASE_CFN_EXP:
6004 CASE_CFN_EXPM1:
6005 CASE_CFN_LGAMMA:
6006 CASE_CFN_LOG10:
6007 CASE_CFN_LOG1P:
6008 CASE_CFN_LOG2:
6009 CASE_CFN_LOG:
6010 CASE_CFN_SIN:
6011 CASE_CFN_SINH:
6012 CASE_CFN_SQRT:
6013 CASE_CFN_TAN:
6014 CASE_CFN_TANH:
6015 if (el_mode == DFmode && n == 2)
6016 {
6017 bdecl = mathfn_built_in (double_type_node, fn);
6018 suffix = "d2"; /* pow -> powd2 */
6019 }
6020 else if (el_mode == SFmode && n == 4)
6021 {
6022 bdecl = mathfn_built_in (float_type_node, fn);
6023 suffix = "4"; /* powf -> powf4 */
6024 }
6025 else
6026 return NULL_TREE;
6027 if (!bdecl)
6028 return NULL_TREE;
6029 break;
6030
6031 default:
6032 return NULL_TREE;
6033 }
6034
6035 gcc_assert (suffix != NULL);
6036 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
6037 if (!bname)
6038 return NULL_TREE;
6039
6040 strcpy (name, bname + sizeof ("__builtin_") - 1);
6041 strcat (name, suffix);
6042
6043 if (n_args == 1)
6044 fntype = build_function_type_list (type_out, type_in, NULL);
6045 else if (n_args == 2)
6046 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
6047 else
6048 gcc_unreachable ();
6049
6050 /* Build a function declaration for the vectorized function. */
6051 new_fndecl = build_decl (BUILTINS_LOCATION,
6052 FUNCTION_DECL, get_identifier (name), fntype);
6053 TREE_PUBLIC (new_fndecl) = 1;
6054 DECL_EXTERNAL (new_fndecl) = 1;
6055 DECL_IS_NOVOPS (new_fndecl) = 1;
6056 TREE_READONLY (new_fndecl) = 1;
6057
6058 return new_fndecl;
6059 }
6060
6061 /* Returns a function decl for a vectorized version of the builtin function
6062 with builtin function code FN and the result vector type TYPE, or NULL_TREE
6063 if it is not available. */
6064
6065 static tree
6066 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
6067 tree type_in)
6068 {
6069 machine_mode in_mode, out_mode;
6070 int in_n, out_n;
6071
6072 if (TARGET_DEBUG_BUILTIN)
6073 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
6074 combined_fn_name (combined_fn (fn)),
6075 GET_MODE_NAME (TYPE_MODE (type_out)),
6076 GET_MODE_NAME (TYPE_MODE (type_in)));
6077
6078 if (TREE_CODE (type_out) != VECTOR_TYPE
6079 || TREE_CODE (type_in) != VECTOR_TYPE
6080 || !TARGET_VECTORIZE_BUILTINS)
6081 return NULL_TREE;
6082
6083 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6084 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6085 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6086 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6087
6088 switch (fn)
6089 {
6090 CASE_CFN_COPYSIGN:
6091 if (VECTOR_UNIT_VSX_P (V2DFmode)
6092 && out_mode == DFmode && out_n == 2
6093 && in_mode == DFmode && in_n == 2)
6094 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
6095 if (VECTOR_UNIT_VSX_P (V4SFmode)
6096 && out_mode == SFmode && out_n == 4
6097 && in_mode == SFmode && in_n == 4)
6098 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
6099 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6100 && out_mode == SFmode && out_n == 4
6101 && in_mode == SFmode && in_n == 4)
6102 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
6103 break;
6104 CASE_CFN_CEIL:
6105 if (VECTOR_UNIT_VSX_P (V2DFmode)
6106 && out_mode == DFmode && out_n == 2
6107 && in_mode == DFmode && in_n == 2)
6108 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
6109 if (VECTOR_UNIT_VSX_P (V4SFmode)
6110 && out_mode == SFmode && out_n == 4
6111 && in_mode == SFmode && in_n == 4)
6112 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
6113 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6114 && out_mode == SFmode && out_n == 4
6115 && in_mode == SFmode && in_n == 4)
6116 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
6117 break;
6118 CASE_CFN_FLOOR:
6119 if (VECTOR_UNIT_VSX_P (V2DFmode)
6120 && out_mode == DFmode && out_n == 2
6121 && in_mode == DFmode && in_n == 2)
6122 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
6123 if (VECTOR_UNIT_VSX_P (V4SFmode)
6124 && out_mode == SFmode && out_n == 4
6125 && in_mode == SFmode && in_n == 4)
6126 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
6127 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6128 && out_mode == SFmode && out_n == 4
6129 && in_mode == SFmode && in_n == 4)
6130 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
6131 break;
6132 CASE_CFN_FMA:
6133 if (VECTOR_UNIT_VSX_P (V2DFmode)
6134 && out_mode == DFmode && out_n == 2
6135 && in_mode == DFmode && in_n == 2)
6136 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
6137 if (VECTOR_UNIT_VSX_P (V4SFmode)
6138 && out_mode == SFmode && out_n == 4
6139 && in_mode == SFmode && in_n == 4)
6140 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
6141 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6142 && out_mode == SFmode && out_n == 4
6143 && in_mode == SFmode && in_n == 4)
6144 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
6145 break;
6146 CASE_CFN_TRUNC:
6147 if (VECTOR_UNIT_VSX_P (V2DFmode)
6148 && out_mode == DFmode && out_n == 2
6149 && in_mode == DFmode && in_n == 2)
6150 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
6151 if (VECTOR_UNIT_VSX_P (V4SFmode)
6152 && out_mode == SFmode && out_n == 4
6153 && in_mode == SFmode && in_n == 4)
6154 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
6155 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
6156 && out_mode == SFmode && out_n == 4
6157 && in_mode == SFmode && in_n == 4)
6158 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
6159 break;
6160 CASE_CFN_NEARBYINT:
6161 if (VECTOR_UNIT_VSX_P (V2DFmode)
6162 && flag_unsafe_math_optimizations
6163 && out_mode == DFmode && out_n == 2
6164 && in_mode == DFmode && in_n == 2)
6165 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
6166 if (VECTOR_UNIT_VSX_P (V4SFmode)
6167 && flag_unsafe_math_optimizations
6168 && out_mode == SFmode && out_n == 4
6169 && in_mode == SFmode && in_n == 4)
6170 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
6171 break;
6172 CASE_CFN_RINT:
6173 if (VECTOR_UNIT_VSX_P (V2DFmode)
6174 && !flag_trapping_math
6175 && out_mode == DFmode && out_n == 2
6176 && in_mode == DFmode && in_n == 2)
6177 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
6178 if (VECTOR_UNIT_VSX_P (V4SFmode)
6179 && !flag_trapping_math
6180 && out_mode == SFmode && out_n == 4
6181 && in_mode == SFmode && in_n == 4)
6182 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
6183 break;
6184 default:
6185 break;
6186 }
6187
6188 /* Generate calls to libmass if appropriate. */
6189 if (rs6000_veclib_handler)
6190 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
6191
6192 return NULL_TREE;
6193 }
6194
6195 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
6196
6197 static tree
6198 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
6199 tree type_in)
6200 {
6201 machine_mode in_mode, out_mode;
6202 int in_n, out_n;
6203
6204 if (TARGET_DEBUG_BUILTIN)
6205 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6206 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6207 GET_MODE_NAME (TYPE_MODE (type_out)),
6208 GET_MODE_NAME (TYPE_MODE (type_in)));
6209
6210 if (TREE_CODE (type_out) != VECTOR_TYPE
6211 || TREE_CODE (type_in) != VECTOR_TYPE
6212 || !TARGET_VECTORIZE_BUILTINS)
6213 return NULL_TREE;
6214
6215 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6216 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6217 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6218 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6219
6220 enum rs6000_builtins fn
6221 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6222 switch (fn)
6223 {
6224 case RS6000_BUILTIN_RSQRTF:
6225 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6226 && out_mode == SFmode && out_n == 4
6227 && in_mode == SFmode && in_n == 4)
6228 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6229 break;
6230 case RS6000_BUILTIN_RSQRT:
6231 if (VECTOR_UNIT_VSX_P (V2DFmode)
6232 && out_mode == DFmode && out_n == 2
6233 && in_mode == DFmode && in_n == 2)
6234 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6235 break;
6236 case RS6000_BUILTIN_RECIPF:
6237 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6238 && out_mode == SFmode && out_n == 4
6239 && in_mode == SFmode && in_n == 4)
6240 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6241 break;
6242 case RS6000_BUILTIN_RECIP:
6243 if (VECTOR_UNIT_VSX_P (V2DFmode)
6244 && out_mode == DFmode && out_n == 2
6245 && in_mode == DFmode && in_n == 2)
6246 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6247 break;
6248 default:
6249 break;
6250 }
6251 return NULL_TREE;
6252 }
6253 \f
6254 /* Default CPU string for rs6000*_file_start functions. */
6255 static const char *rs6000_default_cpu;
6256
6257 /* Do anything needed at the start of the asm file. */
6258
6259 static void
6260 rs6000_file_start (void)
6261 {
6262 char buffer[80];
6263 const char *start = buffer;
6264 FILE *file = asm_out_file;
6265
6266 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6267
6268 default_file_start ();
6269
6270 if (flag_verbose_asm)
6271 {
6272 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6273
6274 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6275 {
6276 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6277 start = "";
6278 }
6279
6280 if (global_options_set.x_rs6000_cpu_index)
6281 {
6282 fprintf (file, "%s -mcpu=%s", start,
6283 processor_target_table[rs6000_cpu_index].name);
6284 start = "";
6285 }
6286
6287 if (global_options_set.x_rs6000_tune_index)
6288 {
6289 fprintf (file, "%s -mtune=%s", start,
6290 processor_target_table[rs6000_tune_index].name);
6291 start = "";
6292 }
6293
6294 if (PPC405_ERRATUM77)
6295 {
6296 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6297 start = "";
6298 }
6299
6300 #ifdef USING_ELFOS_H
6301 switch (rs6000_sdata)
6302 {
6303 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6304 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6305 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6306 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6307 }
6308
6309 if (rs6000_sdata && g_switch_value)
6310 {
6311 fprintf (file, "%s -G %d", start,
6312 g_switch_value);
6313 start = "";
6314 }
6315 #endif
6316
6317 if (*start == '\0')
6318 putc ('\n', file);
6319 }
6320
6321 #ifdef USING_ELFOS_H
6322 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6323 && !global_options_set.x_rs6000_cpu_index)
6324 {
6325 fputs ("\t.machine ", asm_out_file);
6326 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6327 fputs ("power9\n", asm_out_file);
6328 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6329 fputs ("power8\n", asm_out_file);
6330 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6331 fputs ("power7\n", asm_out_file);
6332 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6333 fputs ("power6\n", asm_out_file);
6334 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6335 fputs ("power5\n", asm_out_file);
6336 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6337 fputs ("power4\n", asm_out_file);
6338 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6339 fputs ("ppc64\n", asm_out_file);
6340 else
6341 fputs ("ppc\n", asm_out_file);
6342 }
6343 #endif
6344
6345 if (DEFAULT_ABI == ABI_ELFv2)
6346 fprintf (file, "\t.abiversion 2\n");
6347 }
6348
6349 \f
6350 /* Return nonzero if this function is known to have a null epilogue. */
6351
6352 int
6353 direct_return (void)
6354 {
6355 if (reload_completed)
6356 {
6357 rs6000_stack_t *info = rs6000_stack_info ();
6358
6359 if (info->first_gp_reg_save == 32
6360 && info->first_fp_reg_save == 64
6361 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6362 && ! info->lr_save_p
6363 && ! info->cr_save_p
6364 && info->vrsave_size == 0
6365 && ! info->push_p)
6366 return 1;
6367 }
6368
6369 return 0;
6370 }
6371
6372 /* Return the number of instructions it takes to form a constant in an
6373 integer register. */
6374
6375 int
6376 num_insns_constant_wide (HOST_WIDE_INT value)
6377 {
6378 /* signed constant loadable with addi */
6379 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6380 return 1;
6381
6382 /* constant loadable with addis */
6383 else if ((value & 0xffff) == 0
6384 && (value >> 31 == -1 || value >> 31 == 0))
6385 return 1;
6386
6387 else if (TARGET_POWERPC64)
6388 {
6389 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6390 HOST_WIDE_INT high = value >> 31;
6391
6392 if (high == 0 || high == -1)
6393 return 2;
6394
6395 high >>= 1;
6396
6397 if (low == 0)
6398 return num_insns_constant_wide (high) + 1;
6399 else if (high == 0)
6400 return num_insns_constant_wide (low) + 1;
6401 else
6402 return (num_insns_constant_wide (high)
6403 + num_insns_constant_wide (low) + 1);
6404 }
6405
6406 else
6407 return 2;
6408 }
6409
6410 int
6411 num_insns_constant (rtx op, machine_mode mode)
6412 {
6413 HOST_WIDE_INT low, high;
6414
6415 switch (GET_CODE (op))
6416 {
6417 case CONST_INT:
6418 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6419 && rs6000_is_valid_and_mask (op, mode))
6420 return 2;
6421 else
6422 return num_insns_constant_wide (INTVAL (op));
6423
6424 case CONST_WIDE_INT:
6425 {
6426 int i;
6427 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6428 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6429 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6430 return ins;
6431 }
6432
6433 case CONST_DOUBLE:
6434 if (mode == SFmode || mode == SDmode)
6435 {
6436 long l;
6437
6438 if (DECIMAL_FLOAT_MODE_P (mode))
6439 REAL_VALUE_TO_TARGET_DECIMAL32
6440 (*CONST_DOUBLE_REAL_VALUE (op), l);
6441 else
6442 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6443 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6444 }
6445
6446 long l[2];
6447 if (DECIMAL_FLOAT_MODE_P (mode))
6448 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6449 else
6450 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6451 high = l[WORDS_BIG_ENDIAN == 0];
6452 low = l[WORDS_BIG_ENDIAN != 0];
6453
6454 if (TARGET_32BIT)
6455 return (num_insns_constant_wide (low)
6456 + num_insns_constant_wide (high));
6457 else
6458 {
6459 if ((high == 0 && low >= 0)
6460 || (high == -1 && low < 0))
6461 return num_insns_constant_wide (low);
6462
6463 else if (rs6000_is_valid_and_mask (op, mode))
6464 return 2;
6465
6466 else if (low == 0)
6467 return num_insns_constant_wide (high) + 1;
6468
6469 else
6470 return (num_insns_constant_wide (high)
6471 + num_insns_constant_wide (low) + 1);
6472 }
6473
6474 default:
6475 gcc_unreachable ();
6476 }
6477 }
6478
6479 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6480 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6481 corresponding element of the vector, but for V4SFmode and V2SFmode,
6482 the corresponding "float" is interpreted as an SImode integer. */
6483
6484 HOST_WIDE_INT
6485 const_vector_elt_as_int (rtx op, unsigned int elt)
6486 {
6487 rtx tmp;
6488
6489 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6490 gcc_assert (GET_MODE (op) != V2DImode
6491 && GET_MODE (op) != V2DFmode);
6492
6493 tmp = CONST_VECTOR_ELT (op, elt);
6494 if (GET_MODE (op) == V4SFmode
6495 || GET_MODE (op) == V2SFmode)
6496 tmp = gen_lowpart (SImode, tmp);
6497 return INTVAL (tmp);
6498 }
6499
6500 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6501 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6502 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6503 all items are set to the same value and contain COPIES replicas of the
6504 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6505 operand and the others are set to the value of the operand's msb. */
6506
6507 static bool
6508 vspltis_constant (rtx op, unsigned step, unsigned copies)
6509 {
6510 machine_mode mode = GET_MODE (op);
6511 machine_mode inner = GET_MODE_INNER (mode);
6512
6513 unsigned i;
6514 unsigned nunits;
6515 unsigned bitsize;
6516 unsigned mask;
6517
6518 HOST_WIDE_INT val;
6519 HOST_WIDE_INT splat_val;
6520 HOST_WIDE_INT msb_val;
6521
6522 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6523 return false;
6524
6525 nunits = GET_MODE_NUNITS (mode);
6526 bitsize = GET_MODE_BITSIZE (inner);
6527 mask = GET_MODE_MASK (inner);
6528
6529 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6530 splat_val = val;
6531 msb_val = val >= 0 ? 0 : -1;
6532
6533 /* Construct the value to be splatted, if possible. If not, return 0. */
6534 for (i = 2; i <= copies; i *= 2)
6535 {
6536 HOST_WIDE_INT small_val;
6537 bitsize /= 2;
6538 small_val = splat_val >> bitsize;
6539 mask >>= bitsize;
6540 if (splat_val != ((HOST_WIDE_INT)
6541 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6542 | (small_val & mask)))
6543 return false;
6544 splat_val = small_val;
6545 }
6546
6547 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6548 if (EASY_VECTOR_15 (splat_val))
6549 ;
6550
6551 /* Also check if we can splat, and then add the result to itself. Do so if
6552 the value is positive, of if the splat instruction is using OP's mode;
6553 for splat_val < 0, the splat and the add should use the same mode. */
6554 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6555 && (splat_val >= 0 || (step == 1 && copies == 1)))
6556 ;
6557
6558 /* Also check if are loading up the most significant bit which can be done by
6559 loading up -1 and shifting the value left by -1. */
6560 else if (EASY_VECTOR_MSB (splat_val, inner))
6561 ;
6562
6563 else
6564 return false;
6565
6566 /* Check if VAL is present in every STEP-th element, and the
6567 other elements are filled with its most significant bit. */
6568 for (i = 1; i < nunits; ++i)
6569 {
6570 HOST_WIDE_INT desired_val;
6571 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6572 if ((i & (step - 1)) == 0)
6573 desired_val = val;
6574 else
6575 desired_val = msb_val;
6576
6577 if (desired_val != const_vector_elt_as_int (op, elt))
6578 return false;
6579 }
6580
6581 return true;
6582 }
6583
6584 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6585 instruction, filling in the bottom elements with 0 or -1.
6586
6587 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6588 for the number of zeroes to shift in, or negative for the number of 0xff
6589 bytes to shift in.
6590
6591 OP is a CONST_VECTOR. */
6592
6593 int
6594 vspltis_shifted (rtx op)
6595 {
6596 machine_mode mode = GET_MODE (op);
6597 machine_mode inner = GET_MODE_INNER (mode);
6598
6599 unsigned i, j;
6600 unsigned nunits;
6601 unsigned mask;
6602
6603 HOST_WIDE_INT val;
6604
6605 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6606 return false;
6607
6608 /* We need to create pseudo registers to do the shift, so don't recognize
6609 shift vector constants after reload. */
6610 if (!can_create_pseudo_p ())
6611 return false;
6612
6613 nunits = GET_MODE_NUNITS (mode);
6614 mask = GET_MODE_MASK (inner);
6615
6616 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6617
6618 /* Check if the value can really be the operand of a vspltis[bhw]. */
6619 if (EASY_VECTOR_15 (val))
6620 ;
6621
6622 /* Also check if we are loading up the most significant bit which can be done
6623 by loading up -1 and shifting the value left by -1. */
6624 else if (EASY_VECTOR_MSB (val, inner))
6625 ;
6626
6627 else
6628 return 0;
6629
6630 /* Check if VAL is present in every STEP-th element until we find elements
6631 that are 0 or all 1 bits. */
6632 for (i = 1; i < nunits; ++i)
6633 {
6634 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6635 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6636
6637 /* If the value isn't the splat value, check for the remaining elements
6638 being 0/-1. */
6639 if (val != elt_val)
6640 {
6641 if (elt_val == 0)
6642 {
6643 for (j = i+1; j < nunits; ++j)
6644 {
6645 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6646 if (const_vector_elt_as_int (op, elt2) != 0)
6647 return 0;
6648 }
6649
6650 return (nunits - i) * GET_MODE_SIZE (inner);
6651 }
6652
6653 else if ((elt_val & mask) == mask)
6654 {
6655 for (j = i+1; j < nunits; ++j)
6656 {
6657 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6658 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6659 return 0;
6660 }
6661
6662 return -((nunits - i) * GET_MODE_SIZE (inner));
6663 }
6664
6665 else
6666 return 0;
6667 }
6668 }
6669
6670 /* If all elements are equal, we don't need to do VLSDOI. */
6671 return 0;
6672 }
6673
6674
6675 /* Return true if OP is of the given MODE and can be synthesized
6676 with a vspltisb, vspltish or vspltisw. */
6677
6678 bool
6679 easy_altivec_constant (rtx op, machine_mode mode)
6680 {
6681 unsigned step, copies;
6682
6683 if (mode == VOIDmode)
6684 mode = GET_MODE (op);
6685 else if (mode != GET_MODE (op))
6686 return false;
6687
6688 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6689 constants. */
6690 if (mode == V2DFmode)
6691 return zero_constant (op, mode);
6692
6693 else if (mode == V2DImode)
6694 {
6695 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6696 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6697 return false;
6698
6699 if (zero_constant (op, mode))
6700 return true;
6701
6702 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6703 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6704 return true;
6705
6706 return false;
6707 }
6708
6709 /* V1TImode is a special container for TImode. Ignore for now. */
6710 else if (mode == V1TImode)
6711 return false;
6712
6713 /* Start with a vspltisw. */
6714 step = GET_MODE_NUNITS (mode) / 4;
6715 copies = 1;
6716
6717 if (vspltis_constant (op, step, copies))
6718 return true;
6719
6720 /* Then try with a vspltish. */
6721 if (step == 1)
6722 copies <<= 1;
6723 else
6724 step >>= 1;
6725
6726 if (vspltis_constant (op, step, copies))
6727 return true;
6728
6729 /* And finally a vspltisb. */
6730 if (step == 1)
6731 copies <<= 1;
6732 else
6733 step >>= 1;
6734
6735 if (vspltis_constant (op, step, copies))
6736 return true;
6737
6738 if (vspltis_shifted (op) != 0)
6739 return true;
6740
6741 return false;
6742 }
6743
6744 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6745 result is OP. Abort if it is not possible. */
6746
6747 rtx
6748 gen_easy_altivec_constant (rtx op)
6749 {
6750 machine_mode mode = GET_MODE (op);
6751 int nunits = GET_MODE_NUNITS (mode);
6752 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6753 unsigned step = nunits / 4;
6754 unsigned copies = 1;
6755
6756 /* Start with a vspltisw. */
6757 if (vspltis_constant (op, step, copies))
6758 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6759
6760 /* Then try with a vspltish. */
6761 if (step == 1)
6762 copies <<= 1;
6763 else
6764 step >>= 1;
6765
6766 if (vspltis_constant (op, step, copies))
6767 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6768
6769 /* And finally a vspltisb. */
6770 if (step == 1)
6771 copies <<= 1;
6772 else
6773 step >>= 1;
6774
6775 if (vspltis_constant (op, step, copies))
6776 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6777
6778 gcc_unreachable ();
6779 }
6780
6781 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6782 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6783
6784 Return the number of instructions needed (1 or 2) into the address pointed
6785 via NUM_INSNS_PTR.
6786
6787 Return the constant that is being split via CONSTANT_PTR. */
6788
6789 bool
6790 xxspltib_constant_p (rtx op,
6791 machine_mode mode,
6792 int *num_insns_ptr,
6793 int *constant_ptr)
6794 {
6795 size_t nunits = GET_MODE_NUNITS (mode);
6796 size_t i;
6797 HOST_WIDE_INT value;
6798 rtx element;
6799
6800 /* Set the returned values to out of bound values. */
6801 *num_insns_ptr = -1;
6802 *constant_ptr = 256;
6803
6804 if (!TARGET_P9_VECTOR)
6805 return false;
6806
6807 if (mode == VOIDmode)
6808 mode = GET_MODE (op);
6809
6810 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6811 return false;
6812
6813 /* Handle (vec_duplicate <constant>). */
6814 if (GET_CODE (op) == VEC_DUPLICATE)
6815 {
6816 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6817 && mode != V2DImode)
6818 return false;
6819
6820 element = XEXP (op, 0);
6821 if (!CONST_INT_P (element))
6822 return false;
6823
6824 value = INTVAL (element);
6825 if (!IN_RANGE (value, -128, 127))
6826 return false;
6827 }
6828
6829 /* Handle (const_vector [...]). */
6830 else if (GET_CODE (op) == CONST_VECTOR)
6831 {
6832 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6833 && mode != V2DImode)
6834 return false;
6835
6836 element = CONST_VECTOR_ELT (op, 0);
6837 if (!CONST_INT_P (element))
6838 return false;
6839
6840 value = INTVAL (element);
6841 if (!IN_RANGE (value, -128, 127))
6842 return false;
6843
6844 for (i = 1; i < nunits; i++)
6845 {
6846 element = CONST_VECTOR_ELT (op, i);
6847 if (!CONST_INT_P (element))
6848 return false;
6849
6850 if (value != INTVAL (element))
6851 return false;
6852 }
6853 }
6854
6855 /* Handle integer constants being loaded into the upper part of the VSX
6856 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6857 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6858 else if (CONST_INT_P (op))
6859 {
6860 if (!SCALAR_INT_MODE_P (mode))
6861 return false;
6862
6863 value = INTVAL (op);
6864 if (!IN_RANGE (value, -128, 127))
6865 return false;
6866
6867 if (!IN_RANGE (value, -1, 0))
6868 {
6869 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6870 return false;
6871
6872 if (EASY_VECTOR_15 (value))
6873 return false;
6874 }
6875 }
6876
6877 else
6878 return false;
6879
6880 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6881 sign extend. Special case 0/-1 to allow getting any VSX register instead
6882 of an Altivec register. */
6883 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6884 && EASY_VECTOR_15 (value))
6885 return false;
6886
6887 /* Return # of instructions and the constant byte for XXSPLTIB. */
6888 if (mode == V16QImode)
6889 *num_insns_ptr = 1;
6890
6891 else if (IN_RANGE (value, -1, 0))
6892 *num_insns_ptr = 1;
6893
6894 else
6895 *num_insns_ptr = 2;
6896
6897 *constant_ptr = (int) value;
6898 return true;
6899 }
6900
6901 const char *
6902 output_vec_const_move (rtx *operands)
6903 {
6904 int shift;
6905 machine_mode mode;
6906 rtx dest, vec;
6907
6908 dest = operands[0];
6909 vec = operands[1];
6910 mode = GET_MODE (dest);
6911
6912 if (TARGET_VSX)
6913 {
6914 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6915 int xxspltib_value = 256;
6916 int num_insns = -1;
6917
6918 if (zero_constant (vec, mode))
6919 {
6920 if (TARGET_P9_VECTOR)
6921 return "xxspltib %x0,0";
6922
6923 else if (dest_vmx_p)
6924 return "vspltisw %0,0";
6925
6926 else
6927 return "xxlxor %x0,%x0,%x0";
6928 }
6929
6930 if (all_ones_constant (vec, mode))
6931 {
6932 if (TARGET_P9_VECTOR)
6933 return "xxspltib %x0,255";
6934
6935 else if (dest_vmx_p)
6936 return "vspltisw %0,-1";
6937
6938 else if (TARGET_P8_VECTOR)
6939 return "xxlorc %x0,%x0,%x0";
6940
6941 else
6942 gcc_unreachable ();
6943 }
6944
6945 if (TARGET_P9_VECTOR
6946 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6947 {
6948 if (num_insns == 1)
6949 {
6950 operands[2] = GEN_INT (xxspltib_value & 0xff);
6951 return "xxspltib %x0,%2";
6952 }
6953
6954 return "#";
6955 }
6956 }
6957
6958 if (TARGET_ALTIVEC)
6959 {
6960 rtx splat_vec;
6961
6962 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6963 if (zero_constant (vec, mode))
6964 return "vspltisw %0,0";
6965
6966 if (all_ones_constant (vec, mode))
6967 return "vspltisw %0,-1";
6968
6969 /* Do we need to construct a value using VSLDOI? */
6970 shift = vspltis_shifted (vec);
6971 if (shift != 0)
6972 return "#";
6973
6974 splat_vec = gen_easy_altivec_constant (vec);
6975 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6976 operands[1] = XEXP (splat_vec, 0);
6977 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6978 return "#";
6979
6980 switch (GET_MODE (splat_vec))
6981 {
6982 case V4SImode:
6983 return "vspltisw %0,%1";
6984
6985 case V8HImode:
6986 return "vspltish %0,%1";
6987
6988 case V16QImode:
6989 return "vspltisb %0,%1";
6990
6991 default:
6992 gcc_unreachable ();
6993 }
6994 }
6995
6996 gcc_unreachable ();
6997 }
6998
6999 /* Initialize TARGET of vector PAIRED to VALS. */
7000
7001 void
7002 paired_expand_vector_init (rtx target, rtx vals)
7003 {
7004 machine_mode mode = GET_MODE (target);
7005 int n_elts = GET_MODE_NUNITS (mode);
7006 int n_var = 0;
7007 rtx x, new_rtx, tmp, constant_op, op1, op2;
7008 int i;
7009
7010 for (i = 0; i < n_elts; ++i)
7011 {
7012 x = XVECEXP (vals, 0, i);
7013 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7014 ++n_var;
7015 }
7016 if (n_var == 0)
7017 {
7018 /* Load from constant pool. */
7019 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
7020 return;
7021 }
7022
7023 if (n_var == 2)
7024 {
7025 /* The vector is initialized only with non-constants. */
7026 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
7027 XVECEXP (vals, 0, 1));
7028
7029 emit_move_insn (target, new_rtx);
7030 return;
7031 }
7032
7033 /* One field is non-constant and the other one is a constant. Load the
7034 constant from the constant pool and use ps_merge instruction to
7035 construct the whole vector. */
7036 op1 = XVECEXP (vals, 0, 0);
7037 op2 = XVECEXP (vals, 0, 1);
7038
7039 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
7040
7041 tmp = gen_reg_rtx (GET_MODE (constant_op));
7042 emit_move_insn (tmp, constant_op);
7043
7044 if (CONSTANT_P (op1))
7045 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
7046 else
7047 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
7048
7049 emit_move_insn (target, new_rtx);
7050 }
7051
7052 void
7053 paired_expand_vector_move (rtx operands[])
7054 {
7055 rtx op0 = operands[0], op1 = operands[1];
7056
7057 emit_move_insn (op0, op1);
7058 }
7059
7060 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
7061 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
7062 operands for the relation operation COND. This is a recursive
7063 function. */
7064
7065 static void
7066 paired_emit_vector_compare (enum rtx_code rcode,
7067 rtx dest, rtx op0, rtx op1,
7068 rtx cc_op0, rtx cc_op1)
7069 {
7070 rtx tmp = gen_reg_rtx (V2SFmode);
7071 rtx tmp1, max, min;
7072
7073 gcc_assert (TARGET_PAIRED_FLOAT);
7074 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
7075
7076 switch (rcode)
7077 {
7078 case LT:
7079 case LTU:
7080 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7081 return;
7082 case GE:
7083 case GEU:
7084 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7085 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
7086 return;
7087 case LE:
7088 case LEU:
7089 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
7090 return;
7091 case GT:
7092 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7093 return;
7094 case EQ:
7095 tmp1 = gen_reg_rtx (V2SFmode);
7096 max = gen_reg_rtx (V2SFmode);
7097 min = gen_reg_rtx (V2SFmode);
7098 gen_reg_rtx (V2SFmode);
7099
7100 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
7101 emit_insn (gen_selv2sf4
7102 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7103 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
7104 emit_insn (gen_selv2sf4
7105 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
7106 emit_insn (gen_subv2sf3 (tmp1, min, max));
7107 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
7108 return;
7109 case NE:
7110 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
7111 return;
7112 case UNLE:
7113 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
7114 return;
7115 case UNLT:
7116 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
7117 return;
7118 case UNGE:
7119 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
7120 return;
7121 case UNGT:
7122 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
7123 return;
7124 default:
7125 gcc_unreachable ();
7126 }
7127
7128 return;
7129 }
7130
7131 /* Emit vector conditional expression.
7132 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
7133 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
7134
7135 int
7136 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
7137 rtx cond, rtx cc_op0, rtx cc_op1)
7138 {
7139 enum rtx_code rcode = GET_CODE (cond);
7140
7141 if (!TARGET_PAIRED_FLOAT)
7142 return 0;
7143
7144 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
7145
7146 return 1;
7147 }
7148
7149 /* Initialize vector TARGET to VALS. */
7150
7151 void
7152 rs6000_expand_vector_init (rtx target, rtx vals)
7153 {
7154 machine_mode mode = GET_MODE (target);
7155 machine_mode inner_mode = GET_MODE_INNER (mode);
7156 int n_elts = GET_MODE_NUNITS (mode);
7157 int n_var = 0, one_var = -1;
7158 bool all_same = true, all_const_zero = true;
7159 rtx x, mem;
7160 int i;
7161
7162 for (i = 0; i < n_elts; ++i)
7163 {
7164 x = XVECEXP (vals, 0, i);
7165 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
7166 ++n_var, one_var = i;
7167 else if (x != CONST0_RTX (inner_mode))
7168 all_const_zero = false;
7169
7170 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7171 all_same = false;
7172 }
7173
7174 if (n_var == 0)
7175 {
7176 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7177 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
7178 if ((int_vector_p || TARGET_VSX) && all_const_zero)
7179 {
7180 /* Zero register. */
7181 emit_move_insn (target, CONST0_RTX (mode));
7182 return;
7183 }
7184 else if (int_vector_p && easy_vector_constant (const_vec, mode))
7185 {
7186 /* Splat immediate. */
7187 emit_insn (gen_rtx_SET (target, const_vec));
7188 return;
7189 }
7190 else
7191 {
7192 /* Load from constant pool. */
7193 emit_move_insn (target, const_vec);
7194 return;
7195 }
7196 }
7197
7198 /* Double word values on VSX can use xxpermdi or lxvdsx. */
7199 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
7200 {
7201 rtx op[2];
7202 size_t i;
7203 size_t num_elements = all_same ? 1 : 2;
7204 for (i = 0; i < num_elements; i++)
7205 {
7206 op[i] = XVECEXP (vals, 0, i);
7207 /* Just in case there is a SUBREG with a smaller mode, do a
7208 conversion. */
7209 if (GET_MODE (op[i]) != inner_mode)
7210 {
7211 rtx tmp = gen_reg_rtx (inner_mode);
7212 convert_move (tmp, op[i], 0);
7213 op[i] = tmp;
7214 }
7215 /* Allow load with splat double word. */
7216 else if (MEM_P (op[i]))
7217 {
7218 if (!all_same)
7219 op[i] = force_reg (inner_mode, op[i]);
7220 }
7221 else if (!REG_P (op[i]))
7222 op[i] = force_reg (inner_mode, op[i]);
7223 }
7224
7225 if (all_same)
7226 {
7227 if (mode == V2DFmode)
7228 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7229 else
7230 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7231 }
7232 else
7233 {
7234 if (mode == V2DFmode)
7235 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7236 else
7237 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7238 }
7239 return;
7240 }
7241
7242 /* Special case initializing vector int if we are on 64-bit systems with
7243 direct move or we have the ISA 3.0 instructions. */
7244 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7245 && TARGET_DIRECT_MOVE_64BIT)
7246 {
7247 if (all_same)
7248 {
7249 rtx element0 = XVECEXP (vals, 0, 0);
7250 if (MEM_P (element0))
7251 element0 = rs6000_address_for_fpconvert (element0);
7252 else
7253 element0 = force_reg (SImode, element0);
7254
7255 if (TARGET_P9_VECTOR)
7256 emit_insn (gen_vsx_splat_v4si (target, element0));
7257 else
7258 {
7259 rtx tmp = gen_reg_rtx (DImode);
7260 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7261 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7262 }
7263 return;
7264 }
7265 else
7266 {
7267 rtx elements[4];
7268 size_t i;
7269
7270 for (i = 0; i < 4; i++)
7271 {
7272 elements[i] = XVECEXP (vals, 0, i);
7273 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7274 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7275 }
7276
7277 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7278 elements[2], elements[3]));
7279 return;
7280 }
7281 }
7282
7283 /* With single precision floating point on VSX, know that internally single
7284 precision is actually represented as a double, and either make 2 V2DF
7285 vectors, and convert these vectors to single precision, or do one
7286 conversion, and splat the result to the other elements. */
7287 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7288 {
7289 if (all_same)
7290 {
7291 rtx element0 = XVECEXP (vals, 0, 0);
7292
7293 if (TARGET_P9_VECTOR)
7294 {
7295 if (MEM_P (element0))
7296 element0 = rs6000_address_for_fpconvert (element0);
7297
7298 emit_insn (gen_vsx_splat_v4sf (target, element0));
7299 }
7300
7301 else
7302 {
7303 rtx freg = gen_reg_rtx (V4SFmode);
7304 rtx sreg = force_reg (SFmode, element0);
7305 rtx cvt = (TARGET_XSCVDPSPN
7306 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7307 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7308
7309 emit_insn (cvt);
7310 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7311 const0_rtx));
7312 }
7313 }
7314 else
7315 {
7316 rtx dbl_even = gen_reg_rtx (V2DFmode);
7317 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7318 rtx flt_even = gen_reg_rtx (V4SFmode);
7319 rtx flt_odd = gen_reg_rtx (V4SFmode);
7320 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7321 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7322 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7323 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7324
7325 /* Use VMRGEW if we can instead of doing a permute. */
7326 if (TARGET_P8_VECTOR)
7327 {
7328 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7329 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7330 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7331 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7332 if (BYTES_BIG_ENDIAN)
7333 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7334 else
7335 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7336 }
7337 else
7338 {
7339 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7340 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7341 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7342 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7343 rs6000_expand_extract_even (target, flt_even, flt_odd);
7344 }
7345 }
7346 return;
7347 }
7348
7349 /* Special case initializing vector short/char that are splats if we are on
7350 64-bit systems with direct move. */
7351 if (all_same && TARGET_DIRECT_MOVE_64BIT
7352 && (mode == V16QImode || mode == V8HImode))
7353 {
7354 rtx op0 = XVECEXP (vals, 0, 0);
7355 rtx di_tmp = gen_reg_rtx (DImode);
7356
7357 if (!REG_P (op0))
7358 op0 = force_reg (GET_MODE_INNER (mode), op0);
7359
7360 if (mode == V16QImode)
7361 {
7362 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7363 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7364 return;
7365 }
7366
7367 if (mode == V8HImode)
7368 {
7369 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7370 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7371 return;
7372 }
7373 }
7374
7375 /* Store value to stack temp. Load vector element. Splat. However, splat
7376 of 64-bit items is not supported on Altivec. */
7377 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7378 {
7379 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7380 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7381 XVECEXP (vals, 0, 0));
7382 x = gen_rtx_UNSPEC (VOIDmode,
7383 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7384 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7385 gen_rtvec (2,
7386 gen_rtx_SET (target, mem),
7387 x)));
7388 x = gen_rtx_VEC_SELECT (inner_mode, target,
7389 gen_rtx_PARALLEL (VOIDmode,
7390 gen_rtvec (1, const0_rtx)));
7391 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7392 return;
7393 }
7394
7395 /* One field is non-constant. Load constant then overwrite
7396 varying field. */
7397 if (n_var == 1)
7398 {
7399 rtx copy = copy_rtx (vals);
7400
7401 /* Load constant part of vector, substitute neighboring value for
7402 varying element. */
7403 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7404 rs6000_expand_vector_init (target, copy);
7405
7406 /* Insert variable. */
7407 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7408 return;
7409 }
7410
7411 /* Construct the vector in memory one field at a time
7412 and load the whole vector. */
7413 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7414 for (i = 0; i < n_elts; i++)
7415 emit_move_insn (adjust_address_nv (mem, inner_mode,
7416 i * GET_MODE_SIZE (inner_mode)),
7417 XVECEXP (vals, 0, i));
7418 emit_move_insn (target, mem);
7419 }
7420
7421 /* Set field ELT of TARGET to VAL. */
7422
7423 void
7424 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7425 {
7426 machine_mode mode = GET_MODE (target);
7427 machine_mode inner_mode = GET_MODE_INNER (mode);
7428 rtx reg = gen_reg_rtx (mode);
7429 rtx mask, mem, x;
7430 int width = GET_MODE_SIZE (inner_mode);
7431 int i;
7432
7433 val = force_reg (GET_MODE (val), val);
7434
7435 if (VECTOR_MEM_VSX_P (mode))
7436 {
7437 rtx insn = NULL_RTX;
7438 rtx elt_rtx = GEN_INT (elt);
7439
7440 if (mode == V2DFmode)
7441 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7442
7443 else if (mode == V2DImode)
7444 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7445
7446 else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
7447 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
7448 {
7449 if (mode == V4SImode)
7450 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7451 else if (mode == V8HImode)
7452 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7453 else if (mode == V16QImode)
7454 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7455 else if (mode == V4SFmode)
7456 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7457 }
7458
7459 if (insn)
7460 {
7461 emit_insn (insn);
7462 return;
7463 }
7464 }
7465
7466 /* Simplify setting single element vectors like V1TImode. */
7467 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7468 {
7469 emit_move_insn (target, gen_lowpart (mode, val));
7470 return;
7471 }
7472
7473 /* Load single variable value. */
7474 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7475 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7476 x = gen_rtx_UNSPEC (VOIDmode,
7477 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7478 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7479 gen_rtvec (2,
7480 gen_rtx_SET (reg, mem),
7481 x)));
7482
7483 /* Linear sequence. */
7484 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7485 for (i = 0; i < 16; ++i)
7486 XVECEXP (mask, 0, i) = GEN_INT (i);
7487
7488 /* Set permute mask to insert element into target. */
7489 for (i = 0; i < width; ++i)
7490 XVECEXP (mask, 0, elt*width + i)
7491 = GEN_INT (i + 0x10);
7492 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7493
7494 if (BYTES_BIG_ENDIAN)
7495 x = gen_rtx_UNSPEC (mode,
7496 gen_rtvec (3, target, reg,
7497 force_reg (V16QImode, x)),
7498 UNSPEC_VPERM);
7499 else
7500 {
7501 if (TARGET_P9_VECTOR)
7502 x = gen_rtx_UNSPEC (mode,
7503 gen_rtvec (3, target, reg,
7504 force_reg (V16QImode, x)),
7505 UNSPEC_VPERMR);
7506 else
7507 {
7508 /* Invert selector. We prefer to generate VNAND on P8 so
7509 that future fusion opportunities can kick in, but must
7510 generate VNOR elsewhere. */
7511 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7512 rtx iorx = (TARGET_P8_VECTOR
7513 ? gen_rtx_IOR (V16QImode, notx, notx)
7514 : gen_rtx_AND (V16QImode, notx, notx));
7515 rtx tmp = gen_reg_rtx (V16QImode);
7516 emit_insn (gen_rtx_SET (tmp, iorx));
7517
7518 /* Permute with operands reversed and adjusted selector. */
7519 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7520 UNSPEC_VPERM);
7521 }
7522 }
7523
7524 emit_insn (gen_rtx_SET (target, x));
7525 }
7526
7527 /* Extract field ELT from VEC into TARGET. */
7528
7529 void
7530 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7531 {
7532 machine_mode mode = GET_MODE (vec);
7533 machine_mode inner_mode = GET_MODE_INNER (mode);
7534 rtx mem;
7535
7536 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7537 {
7538 switch (mode)
7539 {
7540 default:
7541 break;
7542 case V1TImode:
7543 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7544 emit_move_insn (target, gen_lowpart (TImode, vec));
7545 break;
7546 case V2DFmode:
7547 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7548 return;
7549 case V2DImode:
7550 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7551 return;
7552 case V4SFmode:
7553 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7554 return;
7555 case V16QImode:
7556 if (TARGET_DIRECT_MOVE_64BIT)
7557 {
7558 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7559 return;
7560 }
7561 else
7562 break;
7563 case V8HImode:
7564 if (TARGET_DIRECT_MOVE_64BIT)
7565 {
7566 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7567 return;
7568 }
7569 else
7570 break;
7571 case V4SImode:
7572 if (TARGET_DIRECT_MOVE_64BIT)
7573 {
7574 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7575 return;
7576 }
7577 break;
7578 }
7579 }
7580 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7581 && TARGET_DIRECT_MOVE_64BIT)
7582 {
7583 if (GET_MODE (elt) != DImode)
7584 {
7585 rtx tmp = gen_reg_rtx (DImode);
7586 convert_move (tmp, elt, 0);
7587 elt = tmp;
7588 }
7589 else if (!REG_P (elt))
7590 elt = force_reg (DImode, elt);
7591
7592 switch (mode)
7593 {
7594 case V2DFmode:
7595 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7596 return;
7597
7598 case V2DImode:
7599 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7600 return;
7601
7602 case V4SFmode:
7603 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7604 return;
7605
7606 case V4SImode:
7607 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7608 return;
7609
7610 case V8HImode:
7611 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7612 return;
7613
7614 case V16QImode:
7615 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7616 return;
7617
7618 default:
7619 gcc_unreachable ();
7620 }
7621 }
7622
7623 gcc_assert (CONST_INT_P (elt));
7624
7625 /* Allocate mode-sized buffer. */
7626 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7627
7628 emit_move_insn (mem, vec);
7629
7630 /* Add offset to field within buffer matching vector element. */
7631 mem = adjust_address_nv (mem, inner_mode,
7632 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7633
7634 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7635 }
7636
7637 /* Helper function to return the register number of a RTX. */
7638 static inline int
7639 regno_or_subregno (rtx op)
7640 {
7641 if (REG_P (op))
7642 return REGNO (op);
7643 else if (SUBREG_P (op))
7644 return subreg_regno (op);
7645 else
7646 gcc_unreachable ();
7647 }
7648
7649 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7650 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7651 temporary (BASE_TMP) to fixup the address. Return the new memory address
7652 that is valid for reads or writes to a given register (SCALAR_REG). */
7653
7654 rtx
7655 rs6000_adjust_vec_address (rtx scalar_reg,
7656 rtx mem,
7657 rtx element,
7658 rtx base_tmp,
7659 machine_mode scalar_mode)
7660 {
7661 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7662 rtx addr = XEXP (mem, 0);
7663 rtx element_offset;
7664 rtx new_addr;
7665 bool valid_addr_p;
7666
7667 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7668 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7669
7670 /* Calculate what we need to add to the address to get the element
7671 address. */
7672 if (CONST_INT_P (element))
7673 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7674 else
7675 {
7676 int byte_shift = exact_log2 (scalar_size);
7677 gcc_assert (byte_shift >= 0);
7678
7679 if (byte_shift == 0)
7680 element_offset = element;
7681
7682 else
7683 {
7684 if (TARGET_POWERPC64)
7685 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7686 else
7687 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7688
7689 element_offset = base_tmp;
7690 }
7691 }
7692
7693 /* Create the new address pointing to the element within the vector. If we
7694 are adding 0, we don't have to change the address. */
7695 if (element_offset == const0_rtx)
7696 new_addr = addr;
7697
7698 /* A simple indirect address can be converted into a reg + offset
7699 address. */
7700 else if (REG_P (addr) || SUBREG_P (addr))
7701 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7702
7703 /* Optimize D-FORM addresses with constant offset with a constant element, to
7704 include the element offset in the address directly. */
7705 else if (GET_CODE (addr) == PLUS)
7706 {
7707 rtx op0 = XEXP (addr, 0);
7708 rtx op1 = XEXP (addr, 1);
7709 rtx insn;
7710
7711 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7712 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7713 {
7714 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7715 rtx offset_rtx = GEN_INT (offset);
7716
7717 if (IN_RANGE (offset, -32768, 32767)
7718 && (scalar_size < 8 || (offset & 0x3) == 0))
7719 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7720 else
7721 {
7722 emit_move_insn (base_tmp, offset_rtx);
7723 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7724 }
7725 }
7726 else
7727 {
7728 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7729 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7730
7731 /* Note, ADDI requires the register being added to be a base
7732 register. If the register was R0, load it up into the temporary
7733 and do the add. */
7734 if (op1_reg_p
7735 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7736 {
7737 insn = gen_add3_insn (base_tmp, op1, element_offset);
7738 gcc_assert (insn != NULL_RTX);
7739 emit_insn (insn);
7740 }
7741
7742 else if (ele_reg_p
7743 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7744 {
7745 insn = gen_add3_insn (base_tmp, element_offset, op1);
7746 gcc_assert (insn != NULL_RTX);
7747 emit_insn (insn);
7748 }
7749
7750 else
7751 {
7752 emit_move_insn (base_tmp, op1);
7753 emit_insn (gen_add2_insn (base_tmp, element_offset));
7754 }
7755
7756 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7757 }
7758 }
7759
7760 else
7761 {
7762 emit_move_insn (base_tmp, addr);
7763 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7764 }
7765
7766 /* If we have a PLUS, we need to see whether the particular register class
7767 allows for D-FORM or X-FORM addressing. */
7768 if (GET_CODE (new_addr) == PLUS)
7769 {
7770 rtx op1 = XEXP (new_addr, 1);
7771 addr_mask_type addr_mask;
7772 int scalar_regno = regno_or_subregno (scalar_reg);
7773
7774 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7775 if (INT_REGNO_P (scalar_regno))
7776 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7777
7778 else if (FP_REGNO_P (scalar_regno))
7779 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7780
7781 else if (ALTIVEC_REGNO_P (scalar_regno))
7782 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7783
7784 else
7785 gcc_unreachable ();
7786
7787 if (REG_P (op1) || SUBREG_P (op1))
7788 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7789 else
7790 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7791 }
7792
7793 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7794 valid_addr_p = true;
7795
7796 else
7797 valid_addr_p = false;
7798
7799 if (!valid_addr_p)
7800 {
7801 emit_move_insn (base_tmp, new_addr);
7802 new_addr = base_tmp;
7803 }
7804
7805 return change_address (mem, scalar_mode, new_addr);
7806 }
7807
7808 /* Split a variable vec_extract operation into the component instructions. */
7809
7810 void
7811 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7812 rtx tmp_altivec)
7813 {
7814 machine_mode mode = GET_MODE (src);
7815 machine_mode scalar_mode = GET_MODE (dest);
7816 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7817 int byte_shift = exact_log2 (scalar_size);
7818
7819 gcc_assert (byte_shift >= 0);
7820
7821 /* If we are given a memory address, optimize to load just the element. We
7822 don't have to adjust the vector element number on little endian
7823 systems. */
7824 if (MEM_P (src))
7825 {
7826 gcc_assert (REG_P (tmp_gpr));
7827 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7828 tmp_gpr, scalar_mode));
7829 return;
7830 }
7831
7832 else if (REG_P (src) || SUBREG_P (src))
7833 {
7834 int bit_shift = byte_shift + 3;
7835 rtx element2;
7836 int dest_regno = regno_or_subregno (dest);
7837 int src_regno = regno_or_subregno (src);
7838 int element_regno = regno_or_subregno (element);
7839
7840 gcc_assert (REG_P (tmp_gpr));
7841
7842 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7843 a general purpose register. */
7844 if (TARGET_P9_VECTOR
7845 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7846 && INT_REGNO_P (dest_regno)
7847 && ALTIVEC_REGNO_P (src_regno)
7848 && INT_REGNO_P (element_regno))
7849 {
7850 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7851 rtx element_si = gen_rtx_REG (SImode, element_regno);
7852
7853 if (mode == V16QImode)
7854 emit_insn (VECTOR_ELT_ORDER_BIG
7855 ? gen_vextublx (dest_si, element_si, src)
7856 : gen_vextubrx (dest_si, element_si, src));
7857
7858 else if (mode == V8HImode)
7859 {
7860 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7861 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7862 emit_insn (VECTOR_ELT_ORDER_BIG
7863 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7864 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7865 }
7866
7867
7868 else
7869 {
7870 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7871 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7872 emit_insn (VECTOR_ELT_ORDER_BIG
7873 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7874 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7875 }
7876
7877 return;
7878 }
7879
7880
7881 gcc_assert (REG_P (tmp_altivec));
7882
7883 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7884 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7885 will shift the element into the upper position (adding 3 to convert a
7886 byte shift into a bit shift). */
7887 if (scalar_size == 8)
7888 {
7889 if (!VECTOR_ELT_ORDER_BIG)
7890 {
7891 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7892 element2 = tmp_gpr;
7893 }
7894 else
7895 element2 = element;
7896
7897 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7898 bit. */
7899 emit_insn (gen_rtx_SET (tmp_gpr,
7900 gen_rtx_AND (DImode,
7901 gen_rtx_ASHIFT (DImode,
7902 element2,
7903 GEN_INT (6)),
7904 GEN_INT (64))));
7905 }
7906 else
7907 {
7908 if (!VECTOR_ELT_ORDER_BIG)
7909 {
7910 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7911
7912 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7913 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7914 element2 = tmp_gpr;
7915 }
7916 else
7917 element2 = element;
7918
7919 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7920 }
7921
7922 /* Get the value into the lower byte of the Altivec register where VSLO
7923 expects it. */
7924 if (TARGET_P9_VECTOR)
7925 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7926 else if (can_create_pseudo_p ())
7927 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7928 else
7929 {
7930 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7931 emit_move_insn (tmp_di, tmp_gpr);
7932 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7933 }
7934
7935 /* Do the VSLO to get the value into the final location. */
7936 switch (mode)
7937 {
7938 case V2DFmode:
7939 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7940 return;
7941
7942 case V2DImode:
7943 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7944 return;
7945
7946 case V4SFmode:
7947 {
7948 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7949 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7950 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7951 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7952 tmp_altivec));
7953
7954 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7955 return;
7956 }
7957
7958 case V4SImode:
7959 case V8HImode:
7960 case V16QImode:
7961 {
7962 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7963 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7964 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7965 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7966 tmp_altivec));
7967 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7968 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7969 GEN_INT (64 - (8 * scalar_size))));
7970 return;
7971 }
7972
7973 default:
7974 gcc_unreachable ();
7975 }
7976
7977 return;
7978 }
7979 else
7980 gcc_unreachable ();
7981 }
7982
7983 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7984 two SImode values. */
7985
7986 static void
7987 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7988 {
7989 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7990
7991 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7992 {
7993 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7994 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7995
7996 emit_move_insn (dest, GEN_INT (const1 | const2));
7997 return;
7998 }
7999
8000 /* Put si1 into upper 32-bits of dest. */
8001 if (CONST_INT_P (si1))
8002 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
8003 else
8004 {
8005 /* Generate RLDIC. */
8006 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
8007 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
8008 rtx mask_rtx = GEN_INT (mask_32bit << 32);
8009 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
8010 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
8011 emit_insn (gen_rtx_SET (dest, and_rtx));
8012 }
8013
8014 /* Put si2 into the temporary. */
8015 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
8016 if (CONST_INT_P (si2))
8017 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
8018 else
8019 emit_insn (gen_zero_extendsidi2 (tmp, si2));
8020
8021 /* Combine the two parts. */
8022 emit_insn (gen_iordi3 (dest, dest, tmp));
8023 return;
8024 }
8025
8026 /* Split a V4SI initialization. */
8027
8028 void
8029 rs6000_split_v4si_init (rtx operands[])
8030 {
8031 rtx dest = operands[0];
8032
8033 /* Destination is a GPR, build up the two DImode parts in place. */
8034 if (REG_P (dest) || SUBREG_P (dest))
8035 {
8036 int d_regno = regno_or_subregno (dest);
8037 rtx scalar1 = operands[1];
8038 rtx scalar2 = operands[2];
8039 rtx scalar3 = operands[3];
8040 rtx scalar4 = operands[4];
8041 rtx tmp1 = operands[5];
8042 rtx tmp2 = operands[6];
8043
8044 /* Even though we only need one temporary (plus the destination, which
8045 has an early clobber constraint, try to use two temporaries, one for
8046 each double word created. That way the 2nd insn scheduling pass can
8047 rearrange things so the two parts are done in parallel. */
8048 if (BYTES_BIG_ENDIAN)
8049 {
8050 rtx di_lo = gen_rtx_REG (DImode, d_regno);
8051 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
8052 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
8053 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
8054 }
8055 else
8056 {
8057 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
8058 rtx di_hi = gen_rtx_REG (DImode, d_regno);
8059 gcc_assert (!VECTOR_ELT_ORDER_BIG);
8060 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
8061 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
8062 }
8063 return;
8064 }
8065
8066 else
8067 gcc_unreachable ();
8068 }
8069
8070 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8071 selects whether the alignment is abi mandated, optional, or
8072 both abi and optional alignment. */
8073
8074 unsigned int
8075 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8076 {
8077 if (how != align_opt)
8078 {
8079 if (TREE_CODE (type) == VECTOR_TYPE)
8080 {
8081 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))
8082 {
8083 if (align < 64)
8084 align = 64;
8085 }
8086 else if (align < 128)
8087 align = 128;
8088 }
8089 }
8090
8091 if (how != align_abi)
8092 {
8093 if (TREE_CODE (type) == ARRAY_TYPE
8094 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8095 {
8096 if (align < BITS_PER_WORD)
8097 align = BITS_PER_WORD;
8098 }
8099 }
8100
8101 return align;
8102 }
8103
8104 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
8105
8106 bool
8107 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
8108 {
8109 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
8110 {
8111 if (computed != 128)
8112 {
8113 static bool warned;
8114 if (!warned && warn_psabi)
8115 {
8116 warned = true;
8117 inform (input_location,
8118 "the layout of aggregates containing vectors with"
8119 " %d-byte alignment has changed in GCC 5",
8120 computed / BITS_PER_UNIT);
8121 }
8122 }
8123 /* In current GCC there is no special case. */
8124 return false;
8125 }
8126
8127 return false;
8128 }
8129
8130 /* AIX increases natural record alignment to doubleword if the first
8131 field is an FP double while the FP fields remain word aligned. */
8132
8133 unsigned int
8134 rs6000_special_round_type_align (tree type, unsigned int computed,
8135 unsigned int specified)
8136 {
8137 unsigned int align = MAX (computed, specified);
8138 tree field = TYPE_FIELDS (type);
8139
8140 /* Skip all non field decls */
8141 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8142 field = DECL_CHAIN (field);
8143
8144 if (field != NULL && field != type)
8145 {
8146 type = TREE_TYPE (field);
8147 while (TREE_CODE (type) == ARRAY_TYPE)
8148 type = TREE_TYPE (type);
8149
8150 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
8151 align = MAX (align, 64);
8152 }
8153
8154 return align;
8155 }
8156
8157 /* Darwin increases record alignment to the natural alignment of
8158 the first field. */
8159
8160 unsigned int
8161 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8162 unsigned int specified)
8163 {
8164 unsigned int align = MAX (computed, specified);
8165
8166 if (TYPE_PACKED (type))
8167 return align;
8168
8169 /* Find the first field, looking down into aggregates. */
8170 do {
8171 tree field = TYPE_FIELDS (type);
8172 /* Skip all non field decls */
8173 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
8174 field = DECL_CHAIN (field);
8175 if (! field)
8176 break;
8177 /* A packed field does not contribute any extra alignment. */
8178 if (DECL_PACKED (field))
8179 return align;
8180 type = TREE_TYPE (field);
8181 while (TREE_CODE (type) == ARRAY_TYPE)
8182 type = TREE_TYPE (type);
8183 } while (AGGREGATE_TYPE_P (type));
8184
8185 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8186 align = MAX (align, TYPE_ALIGN (type));
8187
8188 return align;
8189 }
8190
8191 /* Return 1 for an operand in small memory on V.4/eabi. */
8192
8193 int
8194 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8195 machine_mode mode ATTRIBUTE_UNUSED)
8196 {
8197 #if TARGET_ELF
8198 rtx sym_ref;
8199
8200 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8201 return 0;
8202
8203 if (DEFAULT_ABI != ABI_V4)
8204 return 0;
8205
8206 if (GET_CODE (op) == SYMBOL_REF)
8207 sym_ref = op;
8208
8209 else if (GET_CODE (op) != CONST
8210 || GET_CODE (XEXP (op, 0)) != PLUS
8211 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8212 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8213 return 0;
8214
8215 else
8216 {
8217 rtx sum = XEXP (op, 0);
8218 HOST_WIDE_INT summand;
8219
8220 /* We have to be careful here, because it is the referenced address
8221 that must be 32k from _SDA_BASE_, not just the symbol. */
8222 summand = INTVAL (XEXP (sum, 1));
8223 if (summand < 0 || summand > g_switch_value)
8224 return 0;
8225
8226 sym_ref = XEXP (sum, 0);
8227 }
8228
8229 return SYMBOL_REF_SMALL_P (sym_ref);
8230 #else
8231 return 0;
8232 #endif
8233 }
8234
8235 /* Return true if either operand is a general purpose register. */
8236
8237 bool
8238 gpr_or_gpr_p (rtx op0, rtx op1)
8239 {
8240 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8241 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8242 }
8243
8244 /* Return true if this is a move direct operation between GPR registers and
8245 floating point/VSX registers. */
8246
8247 bool
8248 direct_move_p (rtx op0, rtx op1)
8249 {
8250 int regno0, regno1;
8251
8252 if (!REG_P (op0) || !REG_P (op1))
8253 return false;
8254
8255 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8256 return false;
8257
8258 regno0 = REGNO (op0);
8259 regno1 = REGNO (op1);
8260 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8261 return false;
8262
8263 if (INT_REGNO_P (regno0))
8264 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8265
8266 else if (INT_REGNO_P (regno1))
8267 {
8268 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8269 return true;
8270
8271 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8272 return true;
8273 }
8274
8275 return false;
8276 }
8277
8278 /* Return true if the OFFSET is valid for the quad address instructions that
8279 use d-form (register + offset) addressing. */
8280
8281 static inline bool
8282 quad_address_offset_p (HOST_WIDE_INT offset)
8283 {
8284 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8285 }
8286
8287 /* Return true if the ADDR is an acceptable address for a quad memory
8288 operation of mode MODE (either LQ/STQ for general purpose registers, or
8289 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8290 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8291 3.0 LXV/STXV instruction. */
8292
8293 bool
8294 quad_address_p (rtx addr, machine_mode mode, bool strict)
8295 {
8296 rtx op0, op1;
8297
8298 if (GET_MODE_SIZE (mode) != 16)
8299 return false;
8300
8301 if (legitimate_indirect_address_p (addr, strict))
8302 return true;
8303
8304 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8305 return false;
8306
8307 if (GET_CODE (addr) != PLUS)
8308 return false;
8309
8310 op0 = XEXP (addr, 0);
8311 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8312 return false;
8313
8314 op1 = XEXP (addr, 1);
8315 if (!CONST_INT_P (op1))
8316 return false;
8317
8318 return quad_address_offset_p (INTVAL (op1));
8319 }
8320
8321 /* Return true if this is a load or store quad operation. This function does
8322 not handle the atomic quad memory instructions. */
8323
8324 bool
8325 quad_load_store_p (rtx op0, rtx op1)
8326 {
8327 bool ret;
8328
8329 if (!TARGET_QUAD_MEMORY)
8330 ret = false;
8331
8332 else if (REG_P (op0) && MEM_P (op1))
8333 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8334 && quad_memory_operand (op1, GET_MODE (op1))
8335 && !reg_overlap_mentioned_p (op0, op1));
8336
8337 else if (MEM_P (op0) && REG_P (op1))
8338 ret = (quad_memory_operand (op0, GET_MODE (op0))
8339 && quad_int_reg_operand (op1, GET_MODE (op1)));
8340
8341 else
8342 ret = false;
8343
8344 if (TARGET_DEBUG_ADDR)
8345 {
8346 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8347 ret ? "true" : "false");
8348 debug_rtx (gen_rtx_SET (op0, op1));
8349 }
8350
8351 return ret;
8352 }
8353
8354 /* Given an address, return a constant offset term if one exists. */
8355
8356 static rtx
8357 address_offset (rtx op)
8358 {
8359 if (GET_CODE (op) == PRE_INC
8360 || GET_CODE (op) == PRE_DEC)
8361 op = XEXP (op, 0);
8362 else if (GET_CODE (op) == PRE_MODIFY
8363 || GET_CODE (op) == LO_SUM)
8364 op = XEXP (op, 1);
8365
8366 if (GET_CODE (op) == CONST)
8367 op = XEXP (op, 0);
8368
8369 if (GET_CODE (op) == PLUS)
8370 op = XEXP (op, 1);
8371
8372 if (CONST_INT_P (op))
8373 return op;
8374
8375 return NULL_RTX;
8376 }
8377
8378 /* Return true if the MEM operand is a memory operand suitable for use
8379 with a (full width, possibly multiple) gpr load/store. On
8380 powerpc64 this means the offset must be divisible by 4.
8381 Implements 'Y' constraint.
8382
8383 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8384 a constraint function we know the operand has satisfied a suitable
8385 memory predicate. Also accept some odd rtl generated by reload
8386 (see rs6000_legitimize_reload_address for various forms). It is
8387 important that reload rtl be accepted by appropriate constraints
8388 but not by the operand predicate.
8389
8390 Offsetting a lo_sum should not be allowed, except where we know by
8391 alignment that a 32k boundary is not crossed, but see the ???
8392 comment in rs6000_legitimize_reload_address. Note that by
8393 "offsetting" here we mean a further offset to access parts of the
8394 MEM. It's fine to have a lo_sum where the inner address is offset
8395 from a sym, since the same sym+offset will appear in the high part
8396 of the address calculation. */
8397
8398 bool
8399 mem_operand_gpr (rtx op, machine_mode mode)
8400 {
8401 unsigned HOST_WIDE_INT offset;
8402 int extra;
8403 rtx addr = XEXP (op, 0);
8404
8405 op = address_offset (addr);
8406 if (op == NULL_RTX)
8407 return true;
8408
8409 offset = INTVAL (op);
8410 if (TARGET_POWERPC64 && (offset & 3) != 0)
8411 return false;
8412
8413 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8414 if (extra < 0)
8415 extra = 0;
8416
8417 if (GET_CODE (addr) == LO_SUM)
8418 /* For lo_sum addresses, we must allow any offset except one that
8419 causes a wrap, so test only the low 16 bits. */
8420 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8421
8422 return offset + 0x8000 < 0x10000u - extra;
8423 }
8424
8425 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8426 enforce an offset divisible by 4 even for 32-bit. */
8427
8428 bool
8429 mem_operand_ds_form (rtx op, machine_mode mode)
8430 {
8431 unsigned HOST_WIDE_INT offset;
8432 int extra;
8433 rtx addr = XEXP (op, 0);
8434
8435 if (!offsettable_address_p (false, mode, addr))
8436 return false;
8437
8438 op = address_offset (addr);
8439 if (op == NULL_RTX)
8440 return true;
8441
8442 offset = INTVAL (op);
8443 if ((offset & 3) != 0)
8444 return false;
8445
8446 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8447 if (extra < 0)
8448 extra = 0;
8449
8450 if (GET_CODE (addr) == LO_SUM)
8451 /* For lo_sum addresses, we must allow any offset except one that
8452 causes a wrap, so test only the low 16 bits. */
8453 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8454
8455 return offset + 0x8000 < 0x10000u - extra;
8456 }
8457 \f
8458 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8459
8460 static bool
8461 reg_offset_addressing_ok_p (machine_mode mode)
8462 {
8463 switch (mode)
8464 {
8465 case V16QImode:
8466 case V8HImode:
8467 case V4SFmode:
8468 case V4SImode:
8469 case V2DFmode:
8470 case V2DImode:
8471 case V1TImode:
8472 case TImode:
8473 case TFmode:
8474 case KFmode:
8475 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8476 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8477 a vector mode, if we want to use the VSX registers to move it around,
8478 we need to restrict ourselves to reg+reg addressing. Similarly for
8479 IEEE 128-bit floating point that is passed in a single vector
8480 register. */
8481 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8482 return mode_supports_vsx_dform_quad (mode);
8483 break;
8484
8485 case V2SImode:
8486 case V2SFmode:
8487 /* Paired vector modes. Only reg+reg addressing is valid. */
8488 if (TARGET_PAIRED_FLOAT)
8489 return false;
8490 break;
8491
8492 case SDmode:
8493 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8494 addressing for the LFIWZX and STFIWX instructions. */
8495 if (TARGET_NO_SDMODE_STACK)
8496 return false;
8497 break;
8498
8499 default:
8500 break;
8501 }
8502
8503 return true;
8504 }
8505
8506 static bool
8507 virtual_stack_registers_memory_p (rtx op)
8508 {
8509 int regnum;
8510
8511 if (GET_CODE (op) == REG)
8512 regnum = REGNO (op);
8513
8514 else if (GET_CODE (op) == PLUS
8515 && GET_CODE (XEXP (op, 0)) == REG
8516 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8517 regnum = REGNO (XEXP (op, 0));
8518
8519 else
8520 return false;
8521
8522 return (regnum >= FIRST_VIRTUAL_REGISTER
8523 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8524 }
8525
8526 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8527 is known to not straddle a 32k boundary. This function is used
8528 to determine whether -mcmodel=medium code can use TOC pointer
8529 relative addressing for OP. This means the alignment of the TOC
8530 pointer must also be taken into account, and unfortunately that is
8531 only 8 bytes. */
8532
8533 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8534 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8535 #endif
8536
8537 static bool
8538 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8539 machine_mode mode)
8540 {
8541 tree decl;
8542 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8543
8544 if (GET_CODE (op) != SYMBOL_REF)
8545 return false;
8546
8547 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8548 SYMBOL_REF. */
8549 if (mode_supports_vsx_dform_quad (mode))
8550 return false;
8551
8552 dsize = GET_MODE_SIZE (mode);
8553 decl = SYMBOL_REF_DECL (op);
8554 if (!decl)
8555 {
8556 if (dsize == 0)
8557 return false;
8558
8559 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8560 replacing memory addresses with an anchor plus offset. We
8561 could find the decl by rummaging around in the block->objects
8562 VEC for the given offset but that seems like too much work. */
8563 dalign = BITS_PER_UNIT;
8564 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8565 && SYMBOL_REF_ANCHOR_P (op)
8566 && SYMBOL_REF_BLOCK (op) != NULL)
8567 {
8568 struct object_block *block = SYMBOL_REF_BLOCK (op);
8569
8570 dalign = block->alignment;
8571 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8572 }
8573 else if (CONSTANT_POOL_ADDRESS_P (op))
8574 {
8575 /* It would be nice to have get_pool_align().. */
8576 machine_mode cmode = get_pool_mode (op);
8577
8578 dalign = GET_MODE_ALIGNMENT (cmode);
8579 }
8580 }
8581 else if (DECL_P (decl))
8582 {
8583 dalign = DECL_ALIGN (decl);
8584
8585 if (dsize == 0)
8586 {
8587 /* Allow BLKmode when the entire object is known to not
8588 cross a 32k boundary. */
8589 if (!DECL_SIZE_UNIT (decl))
8590 return false;
8591
8592 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8593 return false;
8594
8595 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8596 if (dsize > 32768)
8597 return false;
8598
8599 dalign /= BITS_PER_UNIT;
8600 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8601 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8602 return dalign >= dsize;
8603 }
8604 }
8605 else
8606 gcc_unreachable ();
8607
8608 /* Find how many bits of the alignment we know for this access. */
8609 dalign /= BITS_PER_UNIT;
8610 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8611 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8612 mask = dalign - 1;
8613 lsb = offset & -offset;
8614 mask &= lsb - 1;
8615 dalign = mask + 1;
8616
8617 return dalign >= dsize;
8618 }
8619
8620 static bool
8621 constant_pool_expr_p (rtx op)
8622 {
8623 rtx base, offset;
8624
8625 split_const (op, &base, &offset);
8626 return (GET_CODE (base) == SYMBOL_REF
8627 && CONSTANT_POOL_ADDRESS_P (base)
8628 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8629 }
8630
8631 /* These are only used to pass through from print_operand/print_operand_address
8632 to rs6000_output_addr_const_extra over the intervening function
8633 output_addr_const which is not target code. */
8634 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8635
8636 /* Return true if OP is a toc pointer relative address (the output
8637 of create_TOC_reference). If STRICT, do not match non-split
8638 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8639 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8640 TOCREL_OFFSET_RET respectively. */
8641
8642 bool
8643 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8644 const_rtx *tocrel_offset_ret)
8645 {
8646 if (!TARGET_TOC)
8647 return false;
8648
8649 if (TARGET_CMODEL != CMODEL_SMALL)
8650 {
8651 /* When strict ensure we have everything tidy. */
8652 if (strict
8653 && !(GET_CODE (op) == LO_SUM
8654 && REG_P (XEXP (op, 0))
8655 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8656 return false;
8657
8658 /* When not strict, allow non-split TOC addresses and also allow
8659 (lo_sum (high ..)) TOC addresses created during reload. */
8660 if (GET_CODE (op) == LO_SUM)
8661 op = XEXP (op, 1);
8662 }
8663
8664 const_rtx tocrel_base = op;
8665 const_rtx tocrel_offset = const0_rtx;
8666
8667 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8668 {
8669 tocrel_base = XEXP (op, 0);
8670 tocrel_offset = XEXP (op, 1);
8671 }
8672
8673 if (tocrel_base_ret)
8674 *tocrel_base_ret = tocrel_base;
8675 if (tocrel_offset_ret)
8676 *tocrel_offset_ret = tocrel_offset;
8677
8678 return (GET_CODE (tocrel_base) == UNSPEC
8679 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8680 }
8681
8682 /* Return true if X is a constant pool address, and also for cmodel=medium
8683 if X is a toc-relative address known to be offsettable within MODE. */
8684
8685 bool
8686 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8687 bool strict)
8688 {
8689 const_rtx tocrel_base, tocrel_offset;
8690 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8691 && (TARGET_CMODEL != CMODEL_MEDIUM
8692 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8693 || mode == QImode
8694 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8695 INTVAL (tocrel_offset), mode)));
8696 }
8697
8698 static bool
8699 legitimate_small_data_p (machine_mode mode, rtx x)
8700 {
8701 return (DEFAULT_ABI == ABI_V4
8702 && !flag_pic && !TARGET_TOC
8703 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8704 && small_data_operand (x, mode));
8705 }
8706
8707 bool
8708 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8709 bool strict, bool worst_case)
8710 {
8711 unsigned HOST_WIDE_INT offset;
8712 unsigned int extra;
8713
8714 if (GET_CODE (x) != PLUS)
8715 return false;
8716 if (!REG_P (XEXP (x, 0)))
8717 return false;
8718 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8719 return false;
8720 if (mode_supports_vsx_dform_quad (mode))
8721 return quad_address_p (x, mode, strict);
8722 if (!reg_offset_addressing_ok_p (mode))
8723 return virtual_stack_registers_memory_p (x);
8724 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8725 return true;
8726 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8727 return false;
8728
8729 offset = INTVAL (XEXP (x, 1));
8730 extra = 0;
8731 switch (mode)
8732 {
8733 case V2SImode:
8734 case V2SFmode:
8735 /* Paired single modes: offset addressing isn't valid. */
8736 return false;
8737
8738 case DFmode:
8739 case DDmode:
8740 case DImode:
8741 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8742 addressing. */
8743 if (VECTOR_MEM_VSX_P (mode))
8744 return false;
8745
8746 if (!worst_case)
8747 break;
8748 if (!TARGET_POWERPC64)
8749 extra = 4;
8750 else if (offset & 3)
8751 return false;
8752 break;
8753
8754 case TFmode:
8755 case IFmode:
8756 case KFmode:
8757 case TDmode:
8758 case TImode:
8759 case PTImode:
8760 extra = 8;
8761 if (!worst_case)
8762 break;
8763 if (!TARGET_POWERPC64)
8764 extra = 12;
8765 else if (offset & 3)
8766 return false;
8767 break;
8768
8769 default:
8770 break;
8771 }
8772
8773 offset += 0x8000;
8774 return offset < 0x10000 - extra;
8775 }
8776
8777 bool
8778 legitimate_indexed_address_p (rtx x, int strict)
8779 {
8780 rtx op0, op1;
8781
8782 if (GET_CODE (x) != PLUS)
8783 return false;
8784
8785 op0 = XEXP (x, 0);
8786 op1 = XEXP (x, 1);
8787
8788 /* Recognize the rtl generated by reload which we know will later be
8789 replaced with proper base and index regs. */
8790 if (!strict
8791 && reload_in_progress
8792 && (REG_P (op0) || GET_CODE (op0) == PLUS)
8793 && REG_P (op1))
8794 return true;
8795
8796 return (REG_P (op0) && REG_P (op1)
8797 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8798 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8799 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8800 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8801 }
8802
8803 bool
8804 avoiding_indexed_address_p (machine_mode mode)
8805 {
8806 /* Avoid indexed addressing for modes that have non-indexed
8807 load/store instruction forms. */
8808 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8809 }
8810
8811 bool
8812 legitimate_indirect_address_p (rtx x, int strict)
8813 {
8814 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8815 }
8816
8817 bool
8818 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8819 {
8820 if (!TARGET_MACHO || !flag_pic
8821 || mode != SImode || GET_CODE (x) != MEM)
8822 return false;
8823 x = XEXP (x, 0);
8824
8825 if (GET_CODE (x) != LO_SUM)
8826 return false;
8827 if (GET_CODE (XEXP (x, 0)) != REG)
8828 return false;
8829 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8830 return false;
8831 x = XEXP (x, 1);
8832
8833 return CONSTANT_P (x);
8834 }
8835
8836 static bool
8837 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8838 {
8839 if (GET_CODE (x) != LO_SUM)
8840 return false;
8841 if (GET_CODE (XEXP (x, 0)) != REG)
8842 return false;
8843 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8844 return false;
8845 /* quad word addresses are restricted, and we can't use LO_SUM. */
8846 if (mode_supports_vsx_dform_quad (mode))
8847 return false;
8848 x = XEXP (x, 1);
8849
8850 if (TARGET_ELF || TARGET_MACHO)
8851 {
8852 bool large_toc_ok;
8853
8854 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8855 return false;
8856 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8857 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8858 recognizes some LO_SUM addresses as valid although this
8859 function says opposite. In most cases, LRA through different
8860 transformations can generate correct code for address reloads.
8861 It can not manage only some LO_SUM cases. So we need to add
8862 code analogous to one in rs6000_legitimize_reload_address for
8863 LOW_SUM here saying that some addresses are still valid. */
8864 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8865 && small_toc_ref (x, VOIDmode));
8866 if (TARGET_TOC && ! large_toc_ok)
8867 return false;
8868 if (GET_MODE_NUNITS (mode) != 1)
8869 return false;
8870 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8871 && !(/* ??? Assume floating point reg based on mode? */
8872 TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
8873 && (mode == DFmode || mode == DDmode)))
8874 return false;
8875
8876 return CONSTANT_P (x) || large_toc_ok;
8877 }
8878
8879 return false;
8880 }
8881
8882
8883 /* Try machine-dependent ways of modifying an illegitimate address
8884 to be legitimate. If we find one, return the new, valid address.
8885 This is used from only one place: `memory_address' in explow.c.
8886
8887 OLDX is the address as it was before break_out_memory_refs was
8888 called. In some cases it is useful to look at this to decide what
8889 needs to be done.
8890
8891 It is always safe for this function to do nothing. It exists to
8892 recognize opportunities to optimize the output.
8893
8894 On RS/6000, first check for the sum of a register with a constant
8895 integer that is out of range. If so, generate code to add the
8896 constant with the low-order 16 bits masked to the register and force
8897 this result into another register (this can be done with `cau').
8898 Then generate an address of REG+(CONST&0xffff), allowing for the
8899 possibility of bit 16 being a one.
8900
8901 Then check for the sum of a register and something not constant, try to
8902 load the other things into a register and return the sum. */
8903
8904 static rtx
8905 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8906 machine_mode mode)
8907 {
8908 unsigned int extra;
8909
8910 if (!reg_offset_addressing_ok_p (mode)
8911 || mode_supports_vsx_dform_quad (mode))
8912 {
8913 if (virtual_stack_registers_memory_p (x))
8914 return x;
8915
8916 /* In theory we should not be seeing addresses of the form reg+0,
8917 but just in case it is generated, optimize it away. */
8918 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8919 return force_reg (Pmode, XEXP (x, 0));
8920
8921 /* For TImode with load/store quad, restrict addresses to just a single
8922 pointer, so it works with both GPRs and VSX registers. */
8923 /* Make sure both operands are registers. */
8924 else if (GET_CODE (x) == PLUS
8925 && (mode != TImode || !TARGET_VSX_TIMODE))
8926 return gen_rtx_PLUS (Pmode,
8927 force_reg (Pmode, XEXP (x, 0)),
8928 force_reg (Pmode, XEXP (x, 1)));
8929 else
8930 return force_reg (Pmode, x);
8931 }
8932 if (GET_CODE (x) == SYMBOL_REF)
8933 {
8934 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8935 if (model != 0)
8936 return rs6000_legitimize_tls_address (x, model);
8937 }
8938
8939 extra = 0;
8940 switch (mode)
8941 {
8942 case TFmode:
8943 case TDmode:
8944 case TImode:
8945 case PTImode:
8946 case IFmode:
8947 case KFmode:
8948 /* As in legitimate_offset_address_p we do not assume
8949 worst-case. The mode here is just a hint as to the registers
8950 used. A TImode is usually in gprs, but may actually be in
8951 fprs. Leave worst-case scenario for reload to handle via
8952 insn constraints. PTImode is only GPRs. */
8953 extra = 8;
8954 break;
8955 default:
8956 break;
8957 }
8958
8959 if (GET_CODE (x) == PLUS
8960 && GET_CODE (XEXP (x, 0)) == REG
8961 && GET_CODE (XEXP (x, 1)) == CONST_INT
8962 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8963 >= 0x10000 - extra)
8964 && !PAIRED_VECTOR_MODE (mode))
8965 {
8966 HOST_WIDE_INT high_int, low_int;
8967 rtx sum;
8968 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8969 if (low_int >= 0x8000 - extra)
8970 low_int = 0;
8971 high_int = INTVAL (XEXP (x, 1)) - low_int;
8972 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8973 GEN_INT (high_int)), 0);
8974 return plus_constant (Pmode, sum, low_int);
8975 }
8976 else if (GET_CODE (x) == PLUS
8977 && GET_CODE (XEXP (x, 0)) == REG
8978 && GET_CODE (XEXP (x, 1)) != CONST_INT
8979 && GET_MODE_NUNITS (mode) == 1
8980 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8981 || (/* ??? Assume floating point reg based on mode? */
8982 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8983 && (mode == DFmode || mode == DDmode)))
8984 && !avoiding_indexed_address_p (mode))
8985 {
8986 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8987 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8988 }
8989 else if (PAIRED_VECTOR_MODE (mode))
8990 {
8991 if (mode == DImode)
8992 return x;
8993 /* We accept [reg + reg]. */
8994
8995 if (GET_CODE (x) == PLUS)
8996 {
8997 rtx op1 = XEXP (x, 0);
8998 rtx op2 = XEXP (x, 1);
8999 rtx y;
9000
9001 op1 = force_reg (Pmode, op1);
9002 op2 = force_reg (Pmode, op2);
9003
9004 /* We can't always do [reg + reg] for these, because [reg +
9005 reg + offset] is not a legitimate addressing mode. */
9006 y = gen_rtx_PLUS (Pmode, op1, op2);
9007
9008 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
9009 return force_reg (Pmode, y);
9010 else
9011 return y;
9012 }
9013
9014 return force_reg (Pmode, x);
9015 }
9016 else if ((TARGET_ELF
9017 #if TARGET_MACHO
9018 || !MACHO_DYNAMIC_NO_PIC_P
9019 #endif
9020 )
9021 && TARGET_32BIT
9022 && TARGET_NO_TOC
9023 && ! flag_pic
9024 && GET_CODE (x) != CONST_INT
9025 && GET_CODE (x) != CONST_WIDE_INT
9026 && GET_CODE (x) != CONST_DOUBLE
9027 && CONSTANT_P (x)
9028 && GET_MODE_NUNITS (mode) == 1
9029 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9030 || (/* ??? Assume floating point reg based on mode? */
9031 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
9032 && (mode == DFmode || mode == DDmode))))
9033 {
9034 rtx reg = gen_reg_rtx (Pmode);
9035 if (TARGET_ELF)
9036 emit_insn (gen_elf_high (reg, x));
9037 else
9038 emit_insn (gen_macho_high (reg, x));
9039 return gen_rtx_LO_SUM (Pmode, reg, x);
9040 }
9041 else if (TARGET_TOC
9042 && GET_CODE (x) == SYMBOL_REF
9043 && constant_pool_expr_p (x)
9044 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9045 return create_TOC_reference (x, NULL_RTX);
9046 else
9047 return x;
9048 }
9049
9050 /* Debug version of rs6000_legitimize_address. */
9051 static rtx
9052 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9053 {
9054 rtx ret;
9055 rtx_insn *insns;
9056
9057 start_sequence ();
9058 ret = rs6000_legitimize_address (x, oldx, mode);
9059 insns = get_insns ();
9060 end_sequence ();
9061
9062 if (ret != x)
9063 {
9064 fprintf (stderr,
9065 "\nrs6000_legitimize_address: mode %s, old code %s, "
9066 "new code %s, modified\n",
9067 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9068 GET_RTX_NAME (GET_CODE (ret)));
9069
9070 fprintf (stderr, "Original address:\n");
9071 debug_rtx (x);
9072
9073 fprintf (stderr, "oldx:\n");
9074 debug_rtx (oldx);
9075
9076 fprintf (stderr, "New address:\n");
9077 debug_rtx (ret);
9078
9079 if (insns)
9080 {
9081 fprintf (stderr, "Insns added:\n");
9082 debug_rtx_list (insns, 20);
9083 }
9084 }
9085 else
9086 {
9087 fprintf (stderr,
9088 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9089 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9090
9091 debug_rtx (x);
9092 }
9093
9094 if (insns)
9095 emit_insn (insns);
9096
9097 return ret;
9098 }
9099
9100 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9101 We need to emit DTP-relative relocations. */
9102
9103 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9104 static void
9105 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9106 {
9107 switch (size)
9108 {
9109 case 4:
9110 fputs ("\t.long\t", file);
9111 break;
9112 case 8:
9113 fputs (DOUBLE_INT_ASM_OP, file);
9114 break;
9115 default:
9116 gcc_unreachable ();
9117 }
9118 output_addr_const (file, x);
9119 if (TARGET_ELF)
9120 fputs ("@dtprel+0x8000", file);
9121 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
9122 {
9123 switch (SYMBOL_REF_TLS_MODEL (x))
9124 {
9125 case 0:
9126 break;
9127 case TLS_MODEL_LOCAL_EXEC:
9128 fputs ("@le", file);
9129 break;
9130 case TLS_MODEL_INITIAL_EXEC:
9131 fputs ("@ie", file);
9132 break;
9133 case TLS_MODEL_GLOBAL_DYNAMIC:
9134 case TLS_MODEL_LOCAL_DYNAMIC:
9135 fputs ("@m", file);
9136 break;
9137 default:
9138 gcc_unreachable ();
9139 }
9140 }
9141 }
9142
9143 /* Return true if X is a symbol that refers to real (rather than emulated)
9144 TLS. */
9145
9146 static bool
9147 rs6000_real_tls_symbol_ref_p (rtx x)
9148 {
9149 return (GET_CODE (x) == SYMBOL_REF
9150 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9151 }
9152
9153 /* In the name of slightly smaller debug output, and to cater to
9154 general assembler lossage, recognize various UNSPEC sequences
9155 and turn them back into a direct symbol reference. */
9156
9157 static rtx
9158 rs6000_delegitimize_address (rtx orig_x)
9159 {
9160 rtx x, y, offset;
9161
9162 orig_x = delegitimize_mem_from_attrs (orig_x);
9163 x = orig_x;
9164 if (MEM_P (x))
9165 x = XEXP (x, 0);
9166
9167 y = x;
9168 if (TARGET_CMODEL != CMODEL_SMALL
9169 && GET_CODE (y) == LO_SUM)
9170 y = XEXP (y, 1);
9171
9172 offset = NULL_RTX;
9173 if (GET_CODE (y) == PLUS
9174 && GET_MODE (y) == Pmode
9175 && CONST_INT_P (XEXP (y, 1)))
9176 {
9177 offset = XEXP (y, 1);
9178 y = XEXP (y, 0);
9179 }
9180
9181 if (GET_CODE (y) == UNSPEC
9182 && XINT (y, 1) == UNSPEC_TOCREL)
9183 {
9184 y = XVECEXP (y, 0, 0);
9185
9186 #ifdef HAVE_AS_TLS
9187 /* Do not associate thread-local symbols with the original
9188 constant pool symbol. */
9189 if (TARGET_XCOFF
9190 && GET_CODE (y) == SYMBOL_REF
9191 && CONSTANT_POOL_ADDRESS_P (y)
9192 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9193 return orig_x;
9194 #endif
9195
9196 if (offset != NULL_RTX)
9197 y = gen_rtx_PLUS (Pmode, y, offset);
9198 if (!MEM_P (orig_x))
9199 return y;
9200 else
9201 return replace_equiv_address_nv (orig_x, y);
9202 }
9203
9204 if (TARGET_MACHO
9205 && GET_CODE (orig_x) == LO_SUM
9206 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9207 {
9208 y = XEXP (XEXP (orig_x, 1), 0);
9209 if (GET_CODE (y) == UNSPEC
9210 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9211 return XVECEXP (y, 0, 0);
9212 }
9213
9214 return orig_x;
9215 }
9216
9217 /* Return true if X shouldn't be emitted into the debug info.
9218 The linker doesn't like .toc section references from
9219 .debug_* sections, so reject .toc section symbols. */
9220
9221 static bool
9222 rs6000_const_not_ok_for_debug_p (rtx x)
9223 {
9224 if (GET_CODE (x) == SYMBOL_REF
9225 && CONSTANT_POOL_ADDRESS_P (x))
9226 {
9227 rtx c = get_pool_constant (x);
9228 machine_mode cmode = get_pool_mode (x);
9229 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9230 return true;
9231 }
9232
9233 return false;
9234 }
9235
9236
9237 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9238
9239 static bool
9240 rs6000_legitimate_combined_insn (rtx_insn *insn)
9241 {
9242 int icode = INSN_CODE (insn);
9243
9244 /* Reject creating doloop insns. Combine should not be allowed
9245 to create these for a number of reasons:
9246 1) In a nested loop, if combine creates one of these in an
9247 outer loop and the register allocator happens to allocate ctr
9248 to the outer loop insn, then the inner loop can't use ctr.
9249 Inner loops ought to be more highly optimized.
9250 2) Combine often wants to create one of these from what was
9251 originally a three insn sequence, first combining the three
9252 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9253 allocated ctr, the splitter takes use back to the three insn
9254 sequence. It's better to stop combine at the two insn
9255 sequence.
9256 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9257 insns, the register allocator sometimes uses floating point
9258 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9259 jump insn and output reloads are not implemented for jumps,
9260 the ctrsi/ctrdi splitters need to handle all possible cases.
9261 That's a pain, and it gets to be seriously difficult when a
9262 splitter that runs after reload needs memory to transfer from
9263 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9264 for the difficult case. It's better to not create problems
9265 in the first place. */
9266 if (icode != CODE_FOR_nothing
9267 && (icode == CODE_FOR_ctrsi_internal1
9268 || icode == CODE_FOR_ctrdi_internal1
9269 || icode == CODE_FOR_ctrsi_internal2
9270 || icode == CODE_FOR_ctrdi_internal2
9271 || icode == CODE_FOR_ctrsi_internal3
9272 || icode == CODE_FOR_ctrdi_internal3
9273 || icode == CODE_FOR_ctrsi_internal4
9274 || icode == CODE_FOR_ctrdi_internal4))
9275 return false;
9276
9277 return true;
9278 }
9279
9280 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9281
9282 static GTY(()) rtx rs6000_tls_symbol;
9283 static rtx
9284 rs6000_tls_get_addr (void)
9285 {
9286 if (!rs6000_tls_symbol)
9287 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9288
9289 return rs6000_tls_symbol;
9290 }
9291
9292 /* Construct the SYMBOL_REF for TLS GOT references. */
9293
9294 static GTY(()) rtx rs6000_got_symbol;
9295 static rtx
9296 rs6000_got_sym (void)
9297 {
9298 if (!rs6000_got_symbol)
9299 {
9300 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9301 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9302 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9303 }
9304
9305 return rs6000_got_symbol;
9306 }
9307
9308 /* AIX Thread-Local Address support. */
9309
9310 static rtx
9311 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9312 {
9313 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9314 const char *name;
9315 char *tlsname;
9316
9317 name = XSTR (addr, 0);
9318 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9319 or the symbol will be in TLS private data section. */
9320 if (name[strlen (name) - 1] != ']'
9321 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9322 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9323 {
9324 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9325 strcpy (tlsname, name);
9326 strcat (tlsname,
9327 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9328 tlsaddr = copy_rtx (addr);
9329 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9330 }
9331 else
9332 tlsaddr = addr;
9333
9334 /* Place addr into TOC constant pool. */
9335 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9336
9337 /* Output the TOC entry and create the MEM referencing the value. */
9338 if (constant_pool_expr_p (XEXP (sym, 0))
9339 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9340 {
9341 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9342 mem = gen_const_mem (Pmode, tocref);
9343 set_mem_alias_set (mem, get_TOC_alias_set ());
9344 }
9345 else
9346 return sym;
9347
9348 /* Use global-dynamic for local-dynamic. */
9349 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9350 || model == TLS_MODEL_LOCAL_DYNAMIC)
9351 {
9352 /* Create new TOC reference for @m symbol. */
9353 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9354 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9355 strcpy (tlsname, "*LCM");
9356 strcat (tlsname, name + 3);
9357 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9358 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9359 tocref = create_TOC_reference (modaddr, NULL_RTX);
9360 rtx modmem = gen_const_mem (Pmode, tocref);
9361 set_mem_alias_set (modmem, get_TOC_alias_set ());
9362
9363 rtx modreg = gen_reg_rtx (Pmode);
9364 emit_insn (gen_rtx_SET (modreg, modmem));
9365
9366 tmpreg = gen_reg_rtx (Pmode);
9367 emit_insn (gen_rtx_SET (tmpreg, mem));
9368
9369 dest = gen_reg_rtx (Pmode);
9370 if (TARGET_32BIT)
9371 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9372 else
9373 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9374 return dest;
9375 }
9376 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9377 else if (TARGET_32BIT)
9378 {
9379 tlsreg = gen_reg_rtx (SImode);
9380 emit_insn (gen_tls_get_tpointer (tlsreg));
9381 }
9382 else
9383 tlsreg = gen_rtx_REG (DImode, 13);
9384
9385 /* Load the TOC value into temporary register. */
9386 tmpreg = gen_reg_rtx (Pmode);
9387 emit_insn (gen_rtx_SET (tmpreg, mem));
9388 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9389 gen_rtx_MINUS (Pmode, addr, tlsreg));
9390
9391 /* Add TOC symbol value to TLS pointer. */
9392 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9393
9394 return dest;
9395 }
9396
9397 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9398 this (thread-local) address. */
9399
9400 static rtx
9401 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9402 {
9403 rtx dest, insn;
9404
9405 if (TARGET_XCOFF)
9406 return rs6000_legitimize_tls_address_aix (addr, model);
9407
9408 dest = gen_reg_rtx (Pmode);
9409 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9410 {
9411 rtx tlsreg;
9412
9413 if (TARGET_64BIT)
9414 {
9415 tlsreg = gen_rtx_REG (Pmode, 13);
9416 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9417 }
9418 else
9419 {
9420 tlsreg = gen_rtx_REG (Pmode, 2);
9421 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9422 }
9423 emit_insn (insn);
9424 }
9425 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9426 {
9427 rtx tlsreg, tmp;
9428
9429 tmp = gen_reg_rtx (Pmode);
9430 if (TARGET_64BIT)
9431 {
9432 tlsreg = gen_rtx_REG (Pmode, 13);
9433 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9434 }
9435 else
9436 {
9437 tlsreg = gen_rtx_REG (Pmode, 2);
9438 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9439 }
9440 emit_insn (insn);
9441 if (TARGET_64BIT)
9442 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9443 else
9444 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9445 emit_insn (insn);
9446 }
9447 else
9448 {
9449 rtx r3, got, tga, tmp1, tmp2, call_insn;
9450
9451 /* We currently use relocations like @got@tlsgd for tls, which
9452 means the linker will handle allocation of tls entries, placing
9453 them in the .got section. So use a pointer to the .got section,
9454 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9455 or to secondary GOT sections used by 32-bit -fPIC. */
9456 if (TARGET_64BIT)
9457 got = gen_rtx_REG (Pmode, 2);
9458 else
9459 {
9460 if (flag_pic == 1)
9461 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9462 else
9463 {
9464 rtx gsym = rs6000_got_sym ();
9465 got = gen_reg_rtx (Pmode);
9466 if (flag_pic == 0)
9467 rs6000_emit_move (got, gsym, Pmode);
9468 else
9469 {
9470 rtx mem, lab;
9471
9472 tmp1 = gen_reg_rtx (Pmode);
9473 tmp2 = gen_reg_rtx (Pmode);
9474 mem = gen_const_mem (Pmode, tmp1);
9475 lab = gen_label_rtx ();
9476 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9477 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9478 if (TARGET_LINK_STACK)
9479 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9480 emit_move_insn (tmp2, mem);
9481 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9482 set_unique_reg_note (last, REG_EQUAL, gsym);
9483 }
9484 }
9485 }
9486
9487 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9488 {
9489 tga = rs6000_tls_get_addr ();
9490 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9491 1, const0_rtx, Pmode);
9492
9493 r3 = gen_rtx_REG (Pmode, 3);
9494 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9495 {
9496 if (TARGET_64BIT)
9497 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9498 else
9499 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9500 }
9501 else if (DEFAULT_ABI == ABI_V4)
9502 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9503 else
9504 gcc_unreachable ();
9505 call_insn = last_call_insn ();
9506 PATTERN (call_insn) = insn;
9507 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9508 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9509 pic_offset_table_rtx);
9510 }
9511 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9512 {
9513 tga = rs6000_tls_get_addr ();
9514 tmp1 = gen_reg_rtx (Pmode);
9515 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9516 1, const0_rtx, Pmode);
9517
9518 r3 = gen_rtx_REG (Pmode, 3);
9519 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9520 {
9521 if (TARGET_64BIT)
9522 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9523 else
9524 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9525 }
9526 else if (DEFAULT_ABI == ABI_V4)
9527 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9528 else
9529 gcc_unreachable ();
9530 call_insn = last_call_insn ();
9531 PATTERN (call_insn) = insn;
9532 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9533 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9534 pic_offset_table_rtx);
9535
9536 if (rs6000_tls_size == 16)
9537 {
9538 if (TARGET_64BIT)
9539 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9540 else
9541 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9542 }
9543 else if (rs6000_tls_size == 32)
9544 {
9545 tmp2 = gen_reg_rtx (Pmode);
9546 if (TARGET_64BIT)
9547 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9548 else
9549 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9550 emit_insn (insn);
9551 if (TARGET_64BIT)
9552 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9553 else
9554 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9555 }
9556 else
9557 {
9558 tmp2 = gen_reg_rtx (Pmode);
9559 if (TARGET_64BIT)
9560 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9561 else
9562 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9563 emit_insn (insn);
9564 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9565 }
9566 emit_insn (insn);
9567 }
9568 else
9569 {
9570 /* IE, or 64-bit offset LE. */
9571 tmp2 = gen_reg_rtx (Pmode);
9572 if (TARGET_64BIT)
9573 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9574 else
9575 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9576 emit_insn (insn);
9577 if (TARGET_64BIT)
9578 insn = gen_tls_tls_64 (dest, tmp2, addr);
9579 else
9580 insn = gen_tls_tls_32 (dest, tmp2, addr);
9581 emit_insn (insn);
9582 }
9583 }
9584
9585 return dest;
9586 }
9587
9588 /* Only create the global variable for the stack protect guard if we are using
9589 the global flavor of that guard. */
9590 static tree
9591 rs6000_init_stack_protect_guard (void)
9592 {
9593 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9594 return default_stack_protect_guard ();
9595
9596 return NULL_TREE;
9597 }
9598
9599 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9600
9601 static bool
9602 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9603 {
9604 if (GET_CODE (x) == HIGH
9605 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9606 return true;
9607
9608 /* A TLS symbol in the TOC cannot contain a sum. */
9609 if (GET_CODE (x) == CONST
9610 && GET_CODE (XEXP (x, 0)) == PLUS
9611 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9612 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9613 return true;
9614
9615 /* Do not place an ELF TLS symbol in the constant pool. */
9616 return TARGET_ELF && tls_referenced_p (x);
9617 }
9618
9619 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9620 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9621 can be addressed relative to the toc pointer. */
9622
9623 static bool
9624 use_toc_relative_ref (rtx sym, machine_mode mode)
9625 {
9626 return ((constant_pool_expr_p (sym)
9627 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9628 get_pool_mode (sym)))
9629 || (TARGET_CMODEL == CMODEL_MEDIUM
9630 && SYMBOL_REF_LOCAL_P (sym)
9631 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9632 }
9633
9634 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9635 replace the input X, or the original X if no replacement is called for.
9636 The output parameter *WIN is 1 if the calling macro should goto WIN,
9637 0 if it should not.
9638
9639 For RS/6000, we wish to handle large displacements off a base
9640 register by splitting the addend across an addiu/addis and the mem insn.
9641 This cuts number of extra insns needed from 3 to 1.
9642
9643 On Darwin, we use this to generate code for floating point constants.
9644 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9645 The Darwin code is inside #if TARGET_MACHO because only then are the
9646 machopic_* functions defined. */
9647 static rtx
9648 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9649 int opnum, int type,
9650 int ind_levels ATTRIBUTE_UNUSED, int *win)
9651 {
9652 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9653 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9654
9655 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9656 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9657 if (reg_offset_p
9658 && opnum == 1
9659 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9660 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9661 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9662 && TARGET_P9_VECTOR)
9663 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9664 && TARGET_P9_VECTOR)))
9665 reg_offset_p = false;
9666
9667 /* We must recognize output that we have already generated ourselves. */
9668 if (GET_CODE (x) == PLUS
9669 && GET_CODE (XEXP (x, 0)) == PLUS
9670 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9671 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9672 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9673 {
9674 if (TARGET_DEBUG_ADDR)
9675 {
9676 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9677 debug_rtx (x);
9678 }
9679 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9680 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9681 opnum, (enum reload_type) type);
9682 *win = 1;
9683 return x;
9684 }
9685
9686 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9687 if (GET_CODE (x) == LO_SUM
9688 && GET_CODE (XEXP (x, 0)) == HIGH)
9689 {
9690 if (TARGET_DEBUG_ADDR)
9691 {
9692 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9693 debug_rtx (x);
9694 }
9695 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9696 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9697 opnum, (enum reload_type) type);
9698 *win = 1;
9699 return x;
9700 }
9701
9702 #if TARGET_MACHO
9703 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9704 && GET_CODE (x) == LO_SUM
9705 && GET_CODE (XEXP (x, 0)) == PLUS
9706 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9707 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9708 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9709 && machopic_operand_p (XEXP (x, 1)))
9710 {
9711 /* Result of previous invocation of this function on Darwin
9712 floating point constant. */
9713 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9714 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9715 opnum, (enum reload_type) type);
9716 *win = 1;
9717 return x;
9718 }
9719 #endif
9720
9721 if (TARGET_CMODEL != CMODEL_SMALL
9722 && reg_offset_p
9723 && !quad_offset_p
9724 && small_toc_ref (x, VOIDmode))
9725 {
9726 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9727 x = gen_rtx_LO_SUM (Pmode, hi, x);
9728 if (TARGET_DEBUG_ADDR)
9729 {
9730 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9731 debug_rtx (x);
9732 }
9733 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9734 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9735 opnum, (enum reload_type) type);
9736 *win = 1;
9737 return x;
9738 }
9739
9740 if (GET_CODE (x) == PLUS
9741 && REG_P (XEXP (x, 0))
9742 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9743 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9744 && CONST_INT_P (XEXP (x, 1))
9745 && reg_offset_p
9746 && !PAIRED_VECTOR_MODE (mode)
9747 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9748 {
9749 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9750 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9751 HOST_WIDE_INT high
9752 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9753
9754 /* Check for 32-bit overflow or quad addresses with one of the
9755 four least significant bits set. */
9756 if (high + low != val
9757 || (quad_offset_p && (low & 0xf)))
9758 {
9759 *win = 0;
9760 return x;
9761 }
9762
9763 /* Reload the high part into a base reg; leave the low part
9764 in the mem directly. */
9765
9766 x = gen_rtx_PLUS (GET_MODE (x),
9767 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9768 GEN_INT (high)),
9769 GEN_INT (low));
9770
9771 if (TARGET_DEBUG_ADDR)
9772 {
9773 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9774 debug_rtx (x);
9775 }
9776 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9777 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9778 opnum, (enum reload_type) type);
9779 *win = 1;
9780 return x;
9781 }
9782
9783 if (GET_CODE (x) == SYMBOL_REF
9784 && reg_offset_p
9785 && !quad_offset_p
9786 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9787 && !PAIRED_VECTOR_MODE (mode)
9788 #if TARGET_MACHO
9789 && DEFAULT_ABI == ABI_DARWIN
9790 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9791 && machopic_symbol_defined_p (x)
9792 #else
9793 && DEFAULT_ABI == ABI_V4
9794 && !flag_pic
9795 #endif
9796 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9797 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9798 without fprs.
9799 ??? Assume floating point reg based on mode? This assumption is
9800 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9801 where reload ends up doing a DFmode load of a constant from
9802 mem using two gprs. Unfortunately, at this point reload
9803 hasn't yet selected regs so poking around in reload data
9804 won't help and even if we could figure out the regs reliably,
9805 we'd still want to allow this transformation when the mem is
9806 naturally aligned. Since we say the address is good here, we
9807 can't disable offsets from LO_SUMs in mem_operand_gpr.
9808 FIXME: Allow offset from lo_sum for other modes too, when
9809 mem is sufficiently aligned.
9810
9811 Also disallow this if the type can go in VMX/Altivec registers, since
9812 those registers do not have d-form (reg+offset) address modes. */
9813 && !reg_addr[mode].scalar_in_vmx_p
9814 && mode != TFmode
9815 && mode != TDmode
9816 && mode != IFmode
9817 && mode != KFmode
9818 && (mode != TImode || !TARGET_VSX_TIMODE)
9819 && mode != PTImode
9820 && (mode != DImode || TARGET_POWERPC64)
9821 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9822 || (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)))
9823 {
9824 #if TARGET_MACHO
9825 if (flag_pic)
9826 {
9827 rtx offset = machopic_gen_offset (x);
9828 x = gen_rtx_LO_SUM (GET_MODE (x),
9829 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9830 gen_rtx_HIGH (Pmode, offset)), offset);
9831 }
9832 else
9833 #endif
9834 x = gen_rtx_LO_SUM (GET_MODE (x),
9835 gen_rtx_HIGH (Pmode, x), x);
9836
9837 if (TARGET_DEBUG_ADDR)
9838 {
9839 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9840 debug_rtx (x);
9841 }
9842 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9843 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9844 opnum, (enum reload_type) type);
9845 *win = 1;
9846 return x;
9847 }
9848
9849 /* Reload an offset address wrapped by an AND that represents the
9850 masking of the lower bits. Strip the outer AND and let reload
9851 convert the offset address into an indirect address. For VSX,
9852 force reload to create the address with an AND in a separate
9853 register, because we can't guarantee an altivec register will
9854 be used. */
9855 if (VECTOR_MEM_ALTIVEC_P (mode)
9856 && GET_CODE (x) == AND
9857 && GET_CODE (XEXP (x, 0)) == PLUS
9858 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9859 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9860 && GET_CODE (XEXP (x, 1)) == CONST_INT
9861 && INTVAL (XEXP (x, 1)) == -16)
9862 {
9863 x = XEXP (x, 0);
9864 *win = 1;
9865 return x;
9866 }
9867
9868 if (TARGET_TOC
9869 && reg_offset_p
9870 && !quad_offset_p
9871 && GET_CODE (x) == SYMBOL_REF
9872 && use_toc_relative_ref (x, mode))
9873 {
9874 x = create_TOC_reference (x, NULL_RTX);
9875 if (TARGET_CMODEL != CMODEL_SMALL)
9876 {
9877 if (TARGET_DEBUG_ADDR)
9878 {
9879 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9880 debug_rtx (x);
9881 }
9882 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9883 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9884 opnum, (enum reload_type) type);
9885 }
9886 *win = 1;
9887 return x;
9888 }
9889 *win = 0;
9890 return x;
9891 }
9892
9893 /* Debug version of rs6000_legitimize_reload_address. */
9894 static rtx
9895 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9896 int opnum, int type,
9897 int ind_levels, int *win)
9898 {
9899 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9900 ind_levels, win);
9901 fprintf (stderr,
9902 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9903 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9904 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9905 debug_rtx (x);
9906
9907 if (x == ret)
9908 fprintf (stderr, "Same address returned\n");
9909 else if (!ret)
9910 fprintf (stderr, "NULL returned\n");
9911 else
9912 {
9913 fprintf (stderr, "New address:\n");
9914 debug_rtx (ret);
9915 }
9916
9917 return ret;
9918 }
9919
9920 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9921 that is a valid memory address for an instruction.
9922 The MODE argument is the machine mode for the MEM expression
9923 that wants to use this address.
9924
9925 On the RS/6000, there are four valid address: a SYMBOL_REF that
9926 refers to a constant pool entry of an address (or the sum of it
9927 plus a constant), a short (16-bit signed) constant plus a register,
9928 the sum of two registers, or a register indirect, possibly with an
9929 auto-increment. For DFmode, DDmode and DImode with a constant plus
9930 register, we must ensure that both words are addressable or PowerPC64
9931 with offset word aligned.
9932
9933 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9934 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9935 because adjacent memory cells are accessed by adding word-sized offsets
9936 during assembly output. */
9937 static bool
9938 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9939 {
9940 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9941 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9942
9943 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9944 if (VECTOR_MEM_ALTIVEC_P (mode)
9945 && GET_CODE (x) == AND
9946 && GET_CODE (XEXP (x, 1)) == CONST_INT
9947 && INTVAL (XEXP (x, 1)) == -16)
9948 x = XEXP (x, 0);
9949
9950 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9951 return 0;
9952 if (legitimate_indirect_address_p (x, reg_ok_strict))
9953 return 1;
9954 if (TARGET_UPDATE
9955 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9956 && mode_supports_pre_incdec_p (mode)
9957 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9958 return 1;
9959 /* Handle restricted vector d-form offsets in ISA 3.0. */
9960 if (quad_offset_p)
9961 {
9962 if (quad_address_p (x, mode, reg_ok_strict))
9963 return 1;
9964 }
9965 else if (virtual_stack_registers_memory_p (x))
9966 return 1;
9967
9968 else if (reg_offset_p)
9969 {
9970 if (legitimate_small_data_p (mode, x))
9971 return 1;
9972 if (legitimate_constant_pool_address_p (x, mode,
9973 reg_ok_strict || lra_in_progress))
9974 return 1;
9975 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9976 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9977 return 1;
9978 }
9979
9980 /* For TImode, if we have TImode in VSX registers, only allow register
9981 indirect addresses. This will allow the values to go in either GPRs
9982 or VSX registers without reloading. The vector types would tend to
9983 go into VSX registers, so we allow REG+REG, while TImode seems
9984 somewhat split, in that some uses are GPR based, and some VSX based. */
9985 /* FIXME: We could loosen this by changing the following to
9986 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
9987 but currently we cannot allow REG+REG addressing for TImode. See
9988 PR72827 for complete details on how this ends up hoodwinking DSE. */
9989 if (mode == TImode && TARGET_VSX_TIMODE)
9990 return 0;
9991 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9992 if (! reg_ok_strict
9993 && reg_offset_p
9994 && GET_CODE (x) == PLUS
9995 && GET_CODE (XEXP (x, 0)) == REG
9996 && (XEXP (x, 0) == virtual_stack_vars_rtx
9997 || XEXP (x, 0) == arg_pointer_rtx)
9998 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9999 return 1;
10000 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
10001 return 1;
10002 if (!FLOAT128_2REG_P (mode)
10003 && ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10004 || TARGET_POWERPC64
10005 || (mode != DFmode && mode != DDmode))
10006 && (TARGET_POWERPC64 || mode != DImode)
10007 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
10008 && mode != PTImode
10009 && !avoiding_indexed_address_p (mode)
10010 && legitimate_indexed_address_p (x, reg_ok_strict))
10011 return 1;
10012 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
10013 && mode_supports_pre_modify_p (mode)
10014 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
10015 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
10016 reg_ok_strict, false)
10017 || (!avoiding_indexed_address_p (mode)
10018 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
10019 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
10020 return 1;
10021 if (reg_offset_p && !quad_offset_p
10022 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
10023 return 1;
10024 return 0;
10025 }
10026
10027 /* Debug version of rs6000_legitimate_address_p. */
10028 static bool
10029 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
10030 bool reg_ok_strict)
10031 {
10032 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
10033 fprintf (stderr,
10034 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10035 "strict = %d, reload = %s, code = %s\n",
10036 ret ? "true" : "false",
10037 GET_MODE_NAME (mode),
10038 reg_ok_strict,
10039 (reload_completed
10040 ? "after"
10041 : (reload_in_progress ? "progress" : "before")),
10042 GET_RTX_NAME (GET_CODE (x)));
10043 debug_rtx (x);
10044
10045 return ret;
10046 }
10047
10048 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10049
10050 static bool
10051 rs6000_mode_dependent_address_p (const_rtx addr,
10052 addr_space_t as ATTRIBUTE_UNUSED)
10053 {
10054 return rs6000_mode_dependent_address_ptr (addr);
10055 }
10056
10057 /* Go to LABEL if ADDR (a legitimate address expression)
10058 has an effect that depends on the machine mode it is used for.
10059
10060 On the RS/6000 this is true of all integral offsets (since AltiVec
10061 and VSX modes don't allow them) or is a pre-increment or decrement.
10062
10063 ??? Except that due to conceptual problems in offsettable_address_p
10064 we can't really report the problems of integral offsets. So leave
10065 this assuming that the adjustable offset must be valid for the
10066 sub-words of a TFmode operand, which is what we had before. */
10067
10068 static bool
10069 rs6000_mode_dependent_address (const_rtx addr)
10070 {
10071 switch (GET_CODE (addr))
10072 {
10073 case PLUS:
10074 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10075 is considered a legitimate address before reload, so there
10076 are no offset restrictions in that case. Note that this
10077 condition is safe in strict mode because any address involving
10078 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10079 been rejected as illegitimate. */
10080 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10081 && XEXP (addr, 0) != arg_pointer_rtx
10082 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
10083 {
10084 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10085 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
10086 }
10087 break;
10088
10089 case LO_SUM:
10090 /* Anything in the constant pool is sufficiently aligned that
10091 all bytes have the same high part address. */
10092 return !legitimate_constant_pool_address_p (addr, QImode, false);
10093
10094 /* Auto-increment cases are now treated generically in recog.c. */
10095 case PRE_MODIFY:
10096 return TARGET_UPDATE;
10097
10098 /* AND is only allowed in Altivec loads. */
10099 case AND:
10100 return true;
10101
10102 default:
10103 break;
10104 }
10105
10106 return false;
10107 }
10108
10109 /* Debug version of rs6000_mode_dependent_address. */
10110 static bool
10111 rs6000_debug_mode_dependent_address (const_rtx addr)
10112 {
10113 bool ret = rs6000_mode_dependent_address (addr);
10114
10115 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10116 ret ? "true" : "false");
10117 debug_rtx (addr);
10118
10119 return ret;
10120 }
10121
10122 /* Implement FIND_BASE_TERM. */
10123
10124 rtx
10125 rs6000_find_base_term (rtx op)
10126 {
10127 rtx base;
10128
10129 base = op;
10130 if (GET_CODE (base) == CONST)
10131 base = XEXP (base, 0);
10132 if (GET_CODE (base) == PLUS)
10133 base = XEXP (base, 0);
10134 if (GET_CODE (base) == UNSPEC)
10135 switch (XINT (base, 1))
10136 {
10137 case UNSPEC_TOCREL:
10138 case UNSPEC_MACHOPIC_OFFSET:
10139 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10140 for aliasing purposes. */
10141 return XVECEXP (base, 0, 0);
10142 }
10143
10144 return op;
10145 }
10146
10147 /* More elaborate version of recog's offsettable_memref_p predicate
10148 that works around the ??? note of rs6000_mode_dependent_address.
10149 In particular it accepts
10150
10151 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10152
10153 in 32-bit mode, that the recog predicate rejects. */
10154
10155 static bool
10156 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
10157 {
10158 bool worst_case;
10159
10160 if (!MEM_P (op))
10161 return false;
10162
10163 /* First mimic offsettable_memref_p. */
10164 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
10165 return true;
10166
10167 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10168 the latter predicate knows nothing about the mode of the memory
10169 reference and, therefore, assumes that it is the largest supported
10170 mode (TFmode). As a consequence, legitimate offsettable memory
10171 references are rejected. rs6000_legitimate_offset_address_p contains
10172 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10173 at least with a little bit of help here given that we know the
10174 actual registers used. */
10175 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10176 || GET_MODE_SIZE (reg_mode) == 4);
10177 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10178 true, worst_case);
10179 }
10180
10181 /* Determine the reassociation width to be used in reassociate_bb.
10182 This takes into account how many parallel operations we
10183 can actually do of a given type, and also the latency.
10184 P8:
10185 int add/sub 6/cycle
10186 mul 2/cycle
10187 vect add/sub/mul 2/cycle
10188 fp add/sub/mul 2/cycle
10189 dfp 1/cycle
10190 */
10191
10192 static int
10193 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10194 machine_mode mode)
10195 {
10196 switch (rs6000_cpu)
10197 {
10198 case PROCESSOR_POWER8:
10199 case PROCESSOR_POWER9:
10200 if (DECIMAL_FLOAT_MODE_P (mode))
10201 return 1;
10202 if (VECTOR_MODE_P (mode))
10203 return 4;
10204 if (INTEGRAL_MODE_P (mode))
10205 return opc == MULT_EXPR ? 4 : 6;
10206 if (FLOAT_MODE_P (mode))
10207 return 4;
10208 break;
10209 default:
10210 break;
10211 }
10212 return 1;
10213 }
10214
10215 /* Change register usage conditional on target flags. */
10216 static void
10217 rs6000_conditional_register_usage (void)
10218 {
10219 int i;
10220
10221 if (TARGET_DEBUG_TARGET)
10222 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10223
10224 /* Set MQ register fixed (already call_used) so that it will not be
10225 allocated. */
10226 fixed_regs[64] = 1;
10227
10228 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10229 if (TARGET_64BIT)
10230 fixed_regs[13] = call_used_regs[13]
10231 = call_really_used_regs[13] = 1;
10232
10233 /* Conditionally disable FPRs. */
10234 if (TARGET_SOFT_FLOAT)
10235 for (i = 32; i < 64; i++)
10236 fixed_regs[i] = call_used_regs[i]
10237 = call_really_used_regs[i] = 1;
10238
10239 /* The TOC register is not killed across calls in a way that is
10240 visible to the compiler. */
10241 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10242 call_really_used_regs[2] = 0;
10243
10244 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10245 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10246
10247 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10248 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10249 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10250 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10251
10252 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10253 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10254 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10255 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10256
10257 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10258 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10259 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10260
10261 if (!TARGET_ALTIVEC && !TARGET_VSX)
10262 {
10263 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10264 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10265 call_really_used_regs[VRSAVE_REGNO] = 1;
10266 }
10267
10268 if (TARGET_ALTIVEC || TARGET_VSX)
10269 global_regs[VSCR_REGNO] = 1;
10270
10271 if (TARGET_ALTIVEC_ABI)
10272 {
10273 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10274 call_used_regs[i] = call_really_used_regs[i] = 1;
10275
10276 /* AIX reserves VR20:31 in non-extended ABI mode. */
10277 if (TARGET_XCOFF)
10278 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10279 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10280 }
10281 }
10282
10283 \f
10284 /* Output insns to set DEST equal to the constant SOURCE as a series of
10285 lis, ori and shl instructions and return TRUE. */
10286
10287 bool
10288 rs6000_emit_set_const (rtx dest, rtx source)
10289 {
10290 machine_mode mode = GET_MODE (dest);
10291 rtx temp, set;
10292 rtx_insn *insn;
10293 HOST_WIDE_INT c;
10294
10295 gcc_checking_assert (CONST_INT_P (source));
10296 c = INTVAL (source);
10297 switch (mode)
10298 {
10299 case QImode:
10300 case HImode:
10301 emit_insn (gen_rtx_SET (dest, source));
10302 return true;
10303
10304 case SImode:
10305 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10306
10307 emit_insn (gen_rtx_SET (copy_rtx (temp),
10308 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10309 emit_insn (gen_rtx_SET (dest,
10310 gen_rtx_IOR (SImode, copy_rtx (temp),
10311 GEN_INT (c & 0xffff))));
10312 break;
10313
10314 case DImode:
10315 if (!TARGET_POWERPC64)
10316 {
10317 rtx hi, lo;
10318
10319 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10320 DImode);
10321 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10322 DImode);
10323 emit_move_insn (hi, GEN_INT (c >> 32));
10324 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10325 emit_move_insn (lo, GEN_INT (c));
10326 }
10327 else
10328 rs6000_emit_set_long_const (dest, c);
10329 break;
10330
10331 default:
10332 gcc_unreachable ();
10333 }
10334
10335 insn = get_last_insn ();
10336 set = single_set (insn);
10337 if (! CONSTANT_P (SET_SRC (set)))
10338 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10339
10340 return true;
10341 }
10342
10343 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10344 Output insns to set DEST equal to the constant C as a series of
10345 lis, ori and shl instructions. */
10346
10347 static void
10348 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10349 {
10350 rtx temp;
10351 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10352
10353 ud1 = c & 0xffff;
10354 c = c >> 16;
10355 ud2 = c & 0xffff;
10356 c = c >> 16;
10357 ud3 = c & 0xffff;
10358 c = c >> 16;
10359 ud4 = c & 0xffff;
10360
10361 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10362 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10363 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10364
10365 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10366 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10367 {
10368 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10369
10370 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10371 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10372 if (ud1 != 0)
10373 emit_move_insn (dest,
10374 gen_rtx_IOR (DImode, copy_rtx (temp),
10375 GEN_INT (ud1)));
10376 }
10377 else if (ud3 == 0 && ud4 == 0)
10378 {
10379 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10380
10381 gcc_assert (ud2 & 0x8000);
10382 emit_move_insn (copy_rtx (temp),
10383 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10384 if (ud1 != 0)
10385 emit_move_insn (copy_rtx (temp),
10386 gen_rtx_IOR (DImode, copy_rtx (temp),
10387 GEN_INT (ud1)));
10388 emit_move_insn (dest,
10389 gen_rtx_ZERO_EXTEND (DImode,
10390 gen_lowpart (SImode,
10391 copy_rtx (temp))));
10392 }
10393 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10394 || (ud4 == 0 && ! (ud3 & 0x8000)))
10395 {
10396 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10397
10398 emit_move_insn (copy_rtx (temp),
10399 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10400 if (ud2 != 0)
10401 emit_move_insn (copy_rtx (temp),
10402 gen_rtx_IOR (DImode, copy_rtx (temp),
10403 GEN_INT (ud2)));
10404 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10405 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10406 GEN_INT (16)));
10407 if (ud1 != 0)
10408 emit_move_insn (dest,
10409 gen_rtx_IOR (DImode, copy_rtx (temp),
10410 GEN_INT (ud1)));
10411 }
10412 else
10413 {
10414 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10415
10416 emit_move_insn (copy_rtx (temp),
10417 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10418 if (ud3 != 0)
10419 emit_move_insn (copy_rtx (temp),
10420 gen_rtx_IOR (DImode, copy_rtx (temp),
10421 GEN_INT (ud3)));
10422
10423 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10424 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10425 GEN_INT (32)));
10426 if (ud2 != 0)
10427 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10428 gen_rtx_IOR (DImode, copy_rtx (temp),
10429 GEN_INT (ud2 << 16)));
10430 if (ud1 != 0)
10431 emit_move_insn (dest,
10432 gen_rtx_IOR (DImode, copy_rtx (temp),
10433 GEN_INT (ud1)));
10434 }
10435 }
10436
10437 /* Helper for the following. Get rid of [r+r] memory refs
10438 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10439
10440 static void
10441 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10442 {
10443 if (reload_in_progress)
10444 return;
10445
10446 if (GET_CODE (operands[0]) == MEM
10447 && GET_CODE (XEXP (operands[0], 0)) != REG
10448 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10449 GET_MODE (operands[0]), false))
10450 operands[0]
10451 = replace_equiv_address (operands[0],
10452 copy_addr_to_reg (XEXP (operands[0], 0)));
10453
10454 if (GET_CODE (operands[1]) == MEM
10455 && GET_CODE (XEXP (operands[1], 0)) != REG
10456 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10457 GET_MODE (operands[1]), false))
10458 operands[1]
10459 = replace_equiv_address (operands[1],
10460 copy_addr_to_reg (XEXP (operands[1], 0)));
10461 }
10462
10463 /* Generate a vector of constants to permute MODE for a little-endian
10464 storage operation by swapping the two halves of a vector. */
10465 static rtvec
10466 rs6000_const_vec (machine_mode mode)
10467 {
10468 int i, subparts;
10469 rtvec v;
10470
10471 switch (mode)
10472 {
10473 case V1TImode:
10474 subparts = 1;
10475 break;
10476 case V2DFmode:
10477 case V2DImode:
10478 subparts = 2;
10479 break;
10480 case V4SFmode:
10481 case V4SImode:
10482 subparts = 4;
10483 break;
10484 case V8HImode:
10485 subparts = 8;
10486 break;
10487 case V16QImode:
10488 subparts = 16;
10489 break;
10490 default:
10491 gcc_unreachable();
10492 }
10493
10494 v = rtvec_alloc (subparts);
10495
10496 for (i = 0; i < subparts / 2; ++i)
10497 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10498 for (i = subparts / 2; i < subparts; ++i)
10499 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10500
10501 return v;
10502 }
10503
10504 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
10505 for a VSX load or store operation. */
10506 rtx
10507 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
10508 {
10509 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
10510 128-bit integers if they are allowed in VSX registers. */
10511 if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
10512 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
10513 else
10514 {
10515 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10516 return gen_rtx_VEC_SELECT (mode, source, par);
10517 }
10518 }
10519
10520 /* Emit a little-endian load from vector memory location SOURCE to VSX
10521 register DEST in mode MODE. The load is done with two permuting
10522 insn's that represent an lxvd2x and xxpermdi. */
10523 void
10524 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10525 {
10526 rtx tmp, permute_mem, permute_reg;
10527
10528 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10529 V1TImode). */
10530 if (mode == TImode || mode == V1TImode)
10531 {
10532 mode = V2DImode;
10533 dest = gen_lowpart (V2DImode, dest);
10534 source = adjust_address (source, V2DImode, 0);
10535 }
10536
10537 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10538 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
10539 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
10540 emit_insn (gen_rtx_SET (tmp, permute_mem));
10541 emit_insn (gen_rtx_SET (dest, permute_reg));
10542 }
10543
10544 /* Emit a little-endian store to vector memory location DEST from VSX
10545 register SOURCE in mode MODE. The store is done with two permuting
10546 insn's that represent an xxpermdi and an stxvd2x. */
10547 void
10548 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10549 {
10550 rtx tmp, permute_src, permute_tmp;
10551
10552 /* This should never be called during or after reload, because it does
10553 not re-permute the source register. It is intended only for use
10554 during expand. */
10555 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
10556
10557 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10558 V1TImode). */
10559 if (mode == TImode || mode == V1TImode)
10560 {
10561 mode = V2DImode;
10562 dest = adjust_address (dest, V2DImode, 0);
10563 source = gen_lowpart (V2DImode, source);
10564 }
10565
10566 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10567 permute_src = rs6000_gen_le_vsx_permute (source, mode);
10568 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
10569 emit_insn (gen_rtx_SET (tmp, permute_src));
10570 emit_insn (gen_rtx_SET (dest, permute_tmp));
10571 }
10572
10573 /* Emit a sequence representing a little-endian VSX load or store,
10574 moving data from SOURCE to DEST in mode MODE. This is done
10575 separately from rs6000_emit_move to ensure it is called only
10576 during expand. LE VSX loads and stores introduced later are
10577 handled with a split. The expand-time RTL generation allows
10578 us to optimize away redundant pairs of register-permutes. */
10579 void
10580 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10581 {
10582 gcc_assert (!BYTES_BIG_ENDIAN
10583 && VECTOR_MEM_VSX_P (mode)
10584 && !TARGET_P9_VECTOR
10585 && !gpr_or_gpr_p (dest, source)
10586 && (MEM_P (source) ^ MEM_P (dest)));
10587
10588 if (MEM_P (source))
10589 {
10590 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10591 rs6000_emit_le_vsx_load (dest, source, mode);
10592 }
10593 else
10594 {
10595 if (!REG_P (source))
10596 source = force_reg (mode, source);
10597 rs6000_emit_le_vsx_store (dest, source, mode);
10598 }
10599 }
10600
10601 /* Return whether a SFmode or SImode move can be done without converting one
10602 mode to another. This arrises when we have:
10603
10604 (SUBREG:SF (REG:SI ...))
10605 (SUBREG:SI (REG:SF ...))
10606
10607 and one of the values is in a floating point/vector register, where SFmode
10608 scalars are stored in DFmode format. */
10609
10610 bool
10611 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10612 {
10613 if (TARGET_ALLOW_SF_SUBREG)
10614 return true;
10615
10616 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10617 return true;
10618
10619 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10620 return true;
10621
10622 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10623 if (SUBREG_P (dest))
10624 {
10625 rtx dest_subreg = SUBREG_REG (dest);
10626 rtx src_subreg = SUBREG_REG (src);
10627 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10628 }
10629
10630 return false;
10631 }
10632
10633
10634 /* Helper function to change moves with:
10635
10636 (SUBREG:SF (REG:SI)) and
10637 (SUBREG:SI (REG:SF))
10638
10639 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10640 values are stored as DFmode values in the VSX registers. We need to convert
10641 the bits before we can use a direct move or operate on the bits in the
10642 vector register as an integer type.
10643
10644 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10645
10646 static bool
10647 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10648 {
10649 if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
10650 && !lra_in_progress
10651 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10652 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10653 {
10654 rtx inner_source = SUBREG_REG (source);
10655 machine_mode inner_mode = GET_MODE (inner_source);
10656
10657 if (mode == SImode && inner_mode == SFmode)
10658 {
10659 emit_insn (gen_movsi_from_sf (dest, inner_source));
10660 return true;
10661 }
10662
10663 if (mode == SFmode && inner_mode == SImode)
10664 {
10665 emit_insn (gen_movsf_from_si (dest, inner_source));
10666 return true;
10667 }
10668 }
10669
10670 return false;
10671 }
10672
10673 /* Emit a move from SOURCE to DEST in mode MODE. */
10674 void
10675 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10676 {
10677 rtx operands[2];
10678 operands[0] = dest;
10679 operands[1] = source;
10680
10681 if (TARGET_DEBUG_ADDR)
10682 {
10683 fprintf (stderr,
10684 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
10685 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10686 GET_MODE_NAME (mode),
10687 reload_in_progress,
10688 reload_completed,
10689 can_create_pseudo_p ());
10690 debug_rtx (dest);
10691 fprintf (stderr, "source:\n");
10692 debug_rtx (source);
10693 }
10694
10695 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10696 if (CONST_WIDE_INT_P (operands[1])
10697 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10698 {
10699 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10700 gcc_unreachable ();
10701 }
10702
10703 /* See if we need to special case SImode/SFmode SUBREG moves. */
10704 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10705 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10706 return;
10707
10708 /* Check if GCC is setting up a block move that will end up using FP
10709 registers as temporaries. We must make sure this is acceptable. */
10710 if (GET_CODE (operands[0]) == MEM
10711 && GET_CODE (operands[1]) == MEM
10712 && mode == DImode
10713 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
10714 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
10715 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
10716 ? 32 : MEM_ALIGN (operands[0])))
10717 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
10718 ? 32
10719 : MEM_ALIGN (operands[1]))))
10720 && ! MEM_VOLATILE_P (operands [0])
10721 && ! MEM_VOLATILE_P (operands [1]))
10722 {
10723 emit_move_insn (adjust_address (operands[0], SImode, 0),
10724 adjust_address (operands[1], SImode, 0));
10725 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10726 adjust_address (copy_rtx (operands[1]), SImode, 4));
10727 return;
10728 }
10729
10730 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10731 && !gpc_reg_operand (operands[1], mode))
10732 operands[1] = force_reg (mode, operands[1]);
10733
10734 /* Recognize the case where operand[1] is a reference to thread-local
10735 data and load its address to a register. */
10736 if (tls_referenced_p (operands[1]))
10737 {
10738 enum tls_model model;
10739 rtx tmp = operands[1];
10740 rtx addend = NULL;
10741
10742 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10743 {
10744 addend = XEXP (XEXP (tmp, 0), 1);
10745 tmp = XEXP (XEXP (tmp, 0), 0);
10746 }
10747
10748 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10749 model = SYMBOL_REF_TLS_MODEL (tmp);
10750 gcc_assert (model != 0);
10751
10752 tmp = rs6000_legitimize_tls_address (tmp, model);
10753 if (addend)
10754 {
10755 tmp = gen_rtx_PLUS (mode, tmp, addend);
10756 tmp = force_operand (tmp, operands[0]);
10757 }
10758 operands[1] = tmp;
10759 }
10760
10761 /* Handle the case where reload calls us with an invalid address. */
10762 if (reload_in_progress && mode == Pmode
10763 && (! general_operand (operands[1], mode)
10764 || ! nonimmediate_operand (operands[0], mode)))
10765 goto emit_set;
10766
10767 /* 128-bit constant floating-point values on Darwin should really be loaded
10768 as two parts. However, this premature splitting is a problem when DFmode
10769 values can go into Altivec registers. */
10770 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10771 && GET_CODE (operands[1]) == CONST_DOUBLE)
10772 {
10773 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10774 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10775 DFmode);
10776 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10777 GET_MODE_SIZE (DFmode)),
10778 simplify_gen_subreg (DFmode, operands[1], mode,
10779 GET_MODE_SIZE (DFmode)),
10780 DFmode);
10781 return;
10782 }
10783
10784 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
10785 cfun->machine->sdmode_stack_slot =
10786 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
10787
10788
10789 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10790 p1:SD) if p1 is not of floating point class and p0 is spilled as
10791 we can have no analogous movsd_store for this. */
10792 if (lra_in_progress && mode == DDmode
10793 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10794 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10795 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10796 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10797 {
10798 enum reg_class cl;
10799 int regno = REGNO (SUBREG_REG (operands[1]));
10800
10801 if (regno >= FIRST_PSEUDO_REGISTER)
10802 {
10803 cl = reg_preferred_class (regno);
10804 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10805 }
10806 if (regno >= 0 && ! FP_REGNO_P (regno))
10807 {
10808 mode = SDmode;
10809 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10810 operands[1] = SUBREG_REG (operands[1]);
10811 }
10812 }
10813 if (lra_in_progress
10814 && mode == SDmode
10815 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10816 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10817 && (REG_P (operands[1])
10818 || (GET_CODE (operands[1]) == SUBREG
10819 && REG_P (SUBREG_REG (operands[1])))))
10820 {
10821 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10822 ? SUBREG_REG (operands[1]) : operands[1]);
10823 enum reg_class cl;
10824
10825 if (regno >= FIRST_PSEUDO_REGISTER)
10826 {
10827 cl = reg_preferred_class (regno);
10828 gcc_assert (cl != NO_REGS);
10829 regno = ira_class_hard_regs[cl][0];
10830 }
10831 if (FP_REGNO_P (regno))
10832 {
10833 if (GET_MODE (operands[0]) != DDmode)
10834 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10835 emit_insn (gen_movsd_store (operands[0], operands[1]));
10836 }
10837 else if (INT_REGNO_P (regno))
10838 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10839 else
10840 gcc_unreachable();
10841 return;
10842 }
10843 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10844 p:DD)) if p0 is not of floating point class and p1 is spilled as
10845 we can have no analogous movsd_load for this. */
10846 if (lra_in_progress && mode == DDmode
10847 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10848 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10849 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10850 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10851 {
10852 enum reg_class cl;
10853 int regno = REGNO (SUBREG_REG (operands[0]));
10854
10855 if (regno >= FIRST_PSEUDO_REGISTER)
10856 {
10857 cl = reg_preferred_class (regno);
10858 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10859 }
10860 if (regno >= 0 && ! FP_REGNO_P (regno))
10861 {
10862 mode = SDmode;
10863 operands[0] = SUBREG_REG (operands[0]);
10864 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10865 }
10866 }
10867 if (lra_in_progress
10868 && mode == SDmode
10869 && (REG_P (operands[0])
10870 || (GET_CODE (operands[0]) == SUBREG
10871 && REG_P (SUBREG_REG (operands[0]))))
10872 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10873 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10874 {
10875 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10876 ? SUBREG_REG (operands[0]) : operands[0]);
10877 enum reg_class cl;
10878
10879 if (regno >= FIRST_PSEUDO_REGISTER)
10880 {
10881 cl = reg_preferred_class (regno);
10882 gcc_assert (cl != NO_REGS);
10883 regno = ira_class_hard_regs[cl][0];
10884 }
10885 if (FP_REGNO_P (regno))
10886 {
10887 if (GET_MODE (operands[1]) != DDmode)
10888 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10889 emit_insn (gen_movsd_load (operands[0], operands[1]));
10890 }
10891 else if (INT_REGNO_P (regno))
10892 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10893 else
10894 gcc_unreachable();
10895 return;
10896 }
10897
10898 if (reload_in_progress
10899 && mode == SDmode
10900 && cfun->machine->sdmode_stack_slot != NULL_RTX
10901 && MEM_P (operands[0])
10902 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
10903 && REG_P (operands[1]))
10904 {
10905 if (FP_REGNO_P (REGNO (operands[1])))
10906 {
10907 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
10908 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10909 emit_insn (gen_movsd_store (mem, operands[1]));
10910 }
10911 else if (INT_REGNO_P (REGNO (operands[1])))
10912 {
10913 rtx mem = operands[0];
10914 if (BYTES_BIG_ENDIAN)
10915 mem = adjust_address_nv (mem, mode, 4);
10916 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10917 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
10918 }
10919 else
10920 gcc_unreachable();
10921 return;
10922 }
10923 if (reload_in_progress
10924 && mode == SDmode
10925 && REG_P (operands[0])
10926 && MEM_P (operands[1])
10927 && cfun->machine->sdmode_stack_slot != NULL_RTX
10928 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
10929 {
10930 if (FP_REGNO_P (REGNO (operands[0])))
10931 {
10932 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
10933 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10934 emit_insn (gen_movsd_load (operands[0], mem));
10935 }
10936 else if (INT_REGNO_P (REGNO (operands[0])))
10937 {
10938 rtx mem = operands[1];
10939 if (BYTES_BIG_ENDIAN)
10940 mem = adjust_address_nv (mem, mode, 4);
10941 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
10942 emit_insn (gen_movsd_hardfloat (operands[0], mem));
10943 }
10944 else
10945 gcc_unreachable();
10946 return;
10947 }
10948
10949 /* FIXME: In the long term, this switch statement should go away
10950 and be replaced by a sequence of tests based on things like
10951 mode == Pmode. */
10952 switch (mode)
10953 {
10954 case HImode:
10955 case QImode:
10956 if (CONSTANT_P (operands[1])
10957 && GET_CODE (operands[1]) != CONST_INT)
10958 operands[1] = force_const_mem (mode, operands[1]);
10959 break;
10960
10961 case TFmode:
10962 case TDmode:
10963 case IFmode:
10964 case KFmode:
10965 if (FLOAT128_2REG_P (mode))
10966 rs6000_eliminate_indexed_memrefs (operands);
10967 /* fall through */
10968
10969 case DFmode:
10970 case DDmode:
10971 case SFmode:
10972 case SDmode:
10973 if (CONSTANT_P (operands[1])
10974 && ! easy_fp_constant (operands[1], mode))
10975 operands[1] = force_const_mem (mode, operands[1]);
10976 break;
10977
10978 case V16QImode:
10979 case V8HImode:
10980 case V4SFmode:
10981 case V4SImode:
10982 case V2SFmode:
10983 case V2SImode:
10984 case V2DFmode:
10985 case V2DImode:
10986 case V1TImode:
10987 if (CONSTANT_P (operands[1])
10988 && !easy_vector_constant (operands[1], mode))
10989 operands[1] = force_const_mem (mode, operands[1]);
10990 break;
10991
10992 case SImode:
10993 case DImode:
10994 /* Use default pattern for address of ELF small data */
10995 if (TARGET_ELF
10996 && mode == Pmode
10997 && DEFAULT_ABI == ABI_V4
10998 && (GET_CODE (operands[1]) == SYMBOL_REF
10999 || GET_CODE (operands[1]) == CONST)
11000 && small_data_operand (operands[1], mode))
11001 {
11002 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11003 return;
11004 }
11005
11006 if (DEFAULT_ABI == ABI_V4
11007 && mode == Pmode && mode == SImode
11008 && flag_pic == 1 && got_operand (operands[1], mode))
11009 {
11010 emit_insn (gen_movsi_got (operands[0], operands[1]));
11011 return;
11012 }
11013
11014 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11015 && TARGET_NO_TOC
11016 && ! flag_pic
11017 && mode == Pmode
11018 && CONSTANT_P (operands[1])
11019 && GET_CODE (operands[1]) != HIGH
11020 && GET_CODE (operands[1]) != CONST_INT)
11021 {
11022 rtx target = (!can_create_pseudo_p ()
11023 ? operands[0]
11024 : gen_reg_rtx (mode));
11025
11026 /* If this is a function address on -mcall-aixdesc,
11027 convert it to the address of the descriptor. */
11028 if (DEFAULT_ABI == ABI_AIX
11029 && GET_CODE (operands[1]) == SYMBOL_REF
11030 && XSTR (operands[1], 0)[0] == '.')
11031 {
11032 const char *name = XSTR (operands[1], 0);
11033 rtx new_ref;
11034 while (*name == '.')
11035 name++;
11036 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11037 CONSTANT_POOL_ADDRESS_P (new_ref)
11038 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11039 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11040 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11041 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11042 operands[1] = new_ref;
11043 }
11044
11045 if (DEFAULT_ABI == ABI_DARWIN)
11046 {
11047 #if TARGET_MACHO
11048 if (MACHO_DYNAMIC_NO_PIC_P)
11049 {
11050 /* Take care of any required data indirection. */
11051 operands[1] = rs6000_machopic_legitimize_pic_address (
11052 operands[1], mode, operands[0]);
11053 if (operands[0] != operands[1])
11054 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11055 return;
11056 }
11057 #endif
11058 emit_insn (gen_macho_high (target, operands[1]));
11059 emit_insn (gen_macho_low (operands[0], target, operands[1]));
11060 return;
11061 }
11062
11063 emit_insn (gen_elf_high (target, operands[1]));
11064 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11065 return;
11066 }
11067
11068 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11069 and we have put it in the TOC, we just need to make a TOC-relative
11070 reference to it. */
11071 if (TARGET_TOC
11072 && GET_CODE (operands[1]) == SYMBOL_REF
11073 && use_toc_relative_ref (operands[1], mode))
11074 operands[1] = create_TOC_reference (operands[1], operands[0]);
11075 else if (mode == Pmode
11076 && CONSTANT_P (operands[1])
11077 && GET_CODE (operands[1]) != HIGH
11078 && ((GET_CODE (operands[1]) != CONST_INT
11079 && ! easy_fp_constant (operands[1], mode))
11080 || (GET_CODE (operands[1]) == CONST_INT
11081 && (num_insns_constant (operands[1], mode)
11082 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11083 || (GET_CODE (operands[0]) == REG
11084 && FP_REGNO_P (REGNO (operands[0]))))
11085 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11086 && (TARGET_CMODEL == CMODEL_SMALL
11087 || can_create_pseudo_p ()
11088 || (REG_P (operands[0])
11089 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11090 {
11091
11092 #if TARGET_MACHO
11093 /* Darwin uses a special PIC legitimizer. */
11094 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11095 {
11096 operands[1] =
11097 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11098 operands[0]);
11099 if (operands[0] != operands[1])
11100 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11101 return;
11102 }
11103 #endif
11104
11105 /* If we are to limit the number of things we put in the TOC and
11106 this is a symbol plus a constant we can add in one insn,
11107 just put the symbol in the TOC and add the constant. Don't do
11108 this if reload is in progress. */
11109 if (GET_CODE (operands[1]) == CONST
11110 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
11111 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11112 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11113 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11114 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
11115 && ! side_effects_p (operands[0]))
11116 {
11117 rtx sym =
11118 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11119 rtx other = XEXP (XEXP (operands[1], 0), 1);
11120
11121 sym = force_reg (mode, sym);
11122 emit_insn (gen_add3_insn (operands[0], sym, other));
11123 return;
11124 }
11125
11126 operands[1] = force_const_mem (mode, operands[1]);
11127
11128 if (TARGET_TOC
11129 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11130 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11131 {
11132 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11133 operands[0]);
11134 operands[1] = gen_const_mem (mode, tocref);
11135 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11136 }
11137 }
11138 break;
11139
11140 case TImode:
11141 if (!VECTOR_MEM_VSX_P (TImode))
11142 rs6000_eliminate_indexed_memrefs (operands);
11143 break;
11144
11145 case PTImode:
11146 rs6000_eliminate_indexed_memrefs (operands);
11147 break;
11148
11149 default:
11150 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11151 }
11152
11153 /* Above, we may have called force_const_mem which may have returned
11154 an invalid address. If we can, fix this up; otherwise, reload will
11155 have to deal with it. */
11156 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
11157 operands[1] = validize_mem (operands[1]);
11158
11159 emit_set:
11160 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11161 }
11162 \f
11163 /* Nonzero if we can use a floating-point register to pass this arg. */
11164 #define USE_FP_FOR_ARG_P(CUM,MODE) \
11165 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
11166 && (CUM)->fregno <= FP_ARG_MAX_REG \
11167 && TARGET_HARD_FLOAT)
11168
11169 /* Nonzero if we can use an AltiVec register to pass this arg. */
11170 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
11171 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
11172 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
11173 && TARGET_ALTIVEC_ABI \
11174 && (NAMED))
11175
11176 /* Walk down the type tree of TYPE counting consecutive base elements.
11177 If *MODEP is VOIDmode, then set it to the first valid floating point
11178 or vector type. If a non-floating point or vector type is found, or
11179 if a floating point or vector type that doesn't match a non-VOIDmode
11180 *MODEP is found, then return -1, otherwise return the count in the
11181 sub-tree. */
11182
11183 static int
11184 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
11185 {
11186 machine_mode mode;
11187 HOST_WIDE_INT size;
11188
11189 switch (TREE_CODE (type))
11190 {
11191 case REAL_TYPE:
11192 mode = TYPE_MODE (type);
11193 if (!SCALAR_FLOAT_MODE_P (mode))
11194 return -1;
11195
11196 if (*modep == VOIDmode)
11197 *modep = mode;
11198
11199 if (*modep == mode)
11200 return 1;
11201
11202 break;
11203
11204 case COMPLEX_TYPE:
11205 mode = TYPE_MODE (TREE_TYPE (type));
11206 if (!SCALAR_FLOAT_MODE_P (mode))
11207 return -1;
11208
11209 if (*modep == VOIDmode)
11210 *modep = mode;
11211
11212 if (*modep == mode)
11213 return 2;
11214
11215 break;
11216
11217 case VECTOR_TYPE:
11218 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
11219 return -1;
11220
11221 /* Use V4SImode as representative of all 128-bit vector types. */
11222 size = int_size_in_bytes (type);
11223 switch (size)
11224 {
11225 case 16:
11226 mode = V4SImode;
11227 break;
11228 default:
11229 return -1;
11230 }
11231
11232 if (*modep == VOIDmode)
11233 *modep = mode;
11234
11235 /* Vector modes are considered to be opaque: two vectors are
11236 equivalent for the purposes of being homogeneous aggregates
11237 if they are the same size. */
11238 if (*modep == mode)
11239 return 1;
11240
11241 break;
11242
11243 case ARRAY_TYPE:
11244 {
11245 int count;
11246 tree index = TYPE_DOMAIN (type);
11247
11248 /* Can't handle incomplete types nor sizes that are not
11249 fixed. */
11250 if (!COMPLETE_TYPE_P (type)
11251 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11252 return -1;
11253
11254 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
11255 if (count == -1
11256 || !index
11257 || !TYPE_MAX_VALUE (index)
11258 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11259 || !TYPE_MIN_VALUE (index)
11260 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11261 || count < 0)
11262 return -1;
11263
11264 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11265 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11266
11267 /* There must be no padding. */
11268 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11269 return -1;
11270
11271 return count;
11272 }
11273
11274 case RECORD_TYPE:
11275 {
11276 int count = 0;
11277 int sub_count;
11278 tree field;
11279
11280 /* Can't handle incomplete types nor sizes that are not
11281 fixed. */
11282 if (!COMPLETE_TYPE_P (type)
11283 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11284 return -1;
11285
11286 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11287 {
11288 if (TREE_CODE (field) != FIELD_DECL)
11289 continue;
11290
11291 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11292 if (sub_count < 0)
11293 return -1;
11294 count += sub_count;
11295 }
11296
11297 /* There must be no padding. */
11298 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11299 return -1;
11300
11301 return count;
11302 }
11303
11304 case UNION_TYPE:
11305 case QUAL_UNION_TYPE:
11306 {
11307 /* These aren't very interesting except in a degenerate case. */
11308 int count = 0;
11309 int sub_count;
11310 tree field;
11311
11312 /* Can't handle incomplete types nor sizes that are not
11313 fixed. */
11314 if (!COMPLETE_TYPE_P (type)
11315 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11316 return -1;
11317
11318 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11319 {
11320 if (TREE_CODE (field) != FIELD_DECL)
11321 continue;
11322
11323 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11324 if (sub_count < 0)
11325 return -1;
11326 count = count > sub_count ? count : sub_count;
11327 }
11328
11329 /* There must be no padding. */
11330 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
11331 return -1;
11332
11333 return count;
11334 }
11335
11336 default:
11337 break;
11338 }
11339
11340 return -1;
11341 }
11342
11343 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11344 float or vector aggregate that shall be passed in FP/vector registers
11345 according to the ELFv2 ABI, return the homogeneous element mode in
11346 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11347
11348 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11349
11350 static bool
11351 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11352 machine_mode *elt_mode,
11353 int *n_elts)
11354 {
11355 /* Note that we do not accept complex types at the top level as
11356 homogeneous aggregates; these types are handled via the
11357 targetm.calls.split_complex_arg mechanism. Complex types
11358 can be elements of homogeneous aggregates, however. */
11359 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11360 {
11361 machine_mode field_mode = VOIDmode;
11362 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11363
11364 if (field_count > 0)
11365 {
11366 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11367 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11368
11369 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11370 up to AGGR_ARG_NUM_REG registers. */
11371 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11372 {
11373 if (elt_mode)
11374 *elt_mode = field_mode;
11375 if (n_elts)
11376 *n_elts = field_count;
11377 return true;
11378 }
11379 }
11380 }
11381
11382 if (elt_mode)
11383 *elt_mode = mode;
11384 if (n_elts)
11385 *n_elts = 1;
11386 return false;
11387 }
11388
11389 /* Return a nonzero value to say to return the function value in
11390 memory, just as large structures are always returned. TYPE will be
11391 the data type of the value, and FNTYPE will be the type of the
11392 function doing the returning, or @code{NULL} for libcalls.
11393
11394 The AIX ABI for the RS/6000 specifies that all structures are
11395 returned in memory. The Darwin ABI does the same.
11396
11397 For the Darwin 64 Bit ABI, a function result can be returned in
11398 registers or in memory, depending on the size of the return data
11399 type. If it is returned in registers, the value occupies the same
11400 registers as it would if it were the first and only function
11401 argument. Otherwise, the function places its result in memory at
11402 the location pointed to by GPR3.
11403
11404 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11405 but a draft put them in memory, and GCC used to implement the draft
11406 instead of the final standard. Therefore, aix_struct_return
11407 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11408 compatibility can change DRAFT_V4_STRUCT_RET to override the
11409 default, and -m switches get the final word. See
11410 rs6000_option_override_internal for more details.
11411
11412 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11413 long double support is enabled. These values are returned in memory.
11414
11415 int_size_in_bytes returns -1 for variable size objects, which go in
11416 memory always. The cast to unsigned makes -1 > 8. */
11417
11418 static bool
11419 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11420 {
11421 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11422 if (TARGET_MACHO
11423 && rs6000_darwin64_abi
11424 && TREE_CODE (type) == RECORD_TYPE
11425 && int_size_in_bytes (type) > 0)
11426 {
11427 CUMULATIVE_ARGS valcum;
11428 rtx valret;
11429
11430 valcum.words = 0;
11431 valcum.fregno = FP_ARG_MIN_REG;
11432 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11433 /* Do a trial code generation as if this were going to be passed
11434 as an argument; if any part goes in memory, we return NULL. */
11435 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11436 if (valret)
11437 return false;
11438 /* Otherwise fall through to more conventional ABI rules. */
11439 }
11440
11441 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11442 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11443 NULL, NULL))
11444 return false;
11445
11446 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11447 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11448 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11449 return false;
11450
11451 if (AGGREGATE_TYPE_P (type)
11452 && (aix_struct_return
11453 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11454 return true;
11455
11456 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11457 modes only exist for GCC vector types if -maltivec. */
11458 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11459 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11460 return false;
11461
11462 /* Return synthetic vectors in memory. */
11463 if (TREE_CODE (type) == VECTOR_TYPE
11464 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11465 {
11466 static bool warned_for_return_big_vectors = false;
11467 if (!warned_for_return_big_vectors)
11468 {
11469 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11470 "non-standard ABI extension with no compatibility guarantee");
11471 warned_for_return_big_vectors = true;
11472 }
11473 return true;
11474 }
11475
11476 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11477 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11478 return true;
11479
11480 return false;
11481 }
11482
11483 /* Specify whether values returned in registers should be at the most
11484 significant end of a register. We want aggregates returned by
11485 value to match the way aggregates are passed to functions. */
11486
11487 static bool
11488 rs6000_return_in_msb (const_tree valtype)
11489 {
11490 return (DEFAULT_ABI == ABI_ELFv2
11491 && BYTES_BIG_ENDIAN
11492 && AGGREGATE_TYPE_P (valtype)
11493 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
11494 }
11495
11496 #ifdef HAVE_AS_GNU_ATTRIBUTE
11497 /* Return TRUE if a call to function FNDECL may be one that
11498 potentially affects the function calling ABI of the object file. */
11499
11500 static bool
11501 call_ABI_of_interest (tree fndecl)
11502 {
11503 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11504 {
11505 struct cgraph_node *c_node;
11506
11507 /* Libcalls are always interesting. */
11508 if (fndecl == NULL_TREE)
11509 return true;
11510
11511 /* Any call to an external function is interesting. */
11512 if (DECL_EXTERNAL (fndecl))
11513 return true;
11514
11515 /* Interesting functions that we are emitting in this object file. */
11516 c_node = cgraph_node::get (fndecl);
11517 c_node = c_node->ultimate_alias_target ();
11518 return !c_node->only_called_directly_p ();
11519 }
11520 return false;
11521 }
11522 #endif
11523
11524 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11525 for a call to a function whose data type is FNTYPE.
11526 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11527
11528 For incoming args we set the number of arguments in the prototype large
11529 so we never return a PARALLEL. */
11530
11531 void
11532 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11533 rtx libname ATTRIBUTE_UNUSED, int incoming,
11534 int libcall, int n_named_args,
11535 tree fndecl ATTRIBUTE_UNUSED,
11536 machine_mode return_mode ATTRIBUTE_UNUSED)
11537 {
11538 static CUMULATIVE_ARGS zero_cumulative;
11539
11540 *cum = zero_cumulative;
11541 cum->words = 0;
11542 cum->fregno = FP_ARG_MIN_REG;
11543 cum->vregno = ALTIVEC_ARG_MIN_REG;
11544 cum->prototype = (fntype && prototype_p (fntype));
11545 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11546 ? CALL_LIBCALL : CALL_NORMAL);
11547 cum->sysv_gregno = GP_ARG_MIN_REG;
11548 cum->stdarg = stdarg_p (fntype);
11549 cum->libcall = libcall;
11550
11551 cum->nargs_prototype = 0;
11552 if (incoming || cum->prototype)
11553 cum->nargs_prototype = n_named_args;
11554
11555 /* Check for a longcall attribute. */
11556 if ((!fntype && rs6000_default_long_calls)
11557 || (fntype
11558 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11559 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11560 cum->call_cookie |= CALL_LONG;
11561
11562 if (TARGET_DEBUG_ARG)
11563 {
11564 fprintf (stderr, "\ninit_cumulative_args:");
11565 if (fntype)
11566 {
11567 tree ret_type = TREE_TYPE (fntype);
11568 fprintf (stderr, " ret code = %s,",
11569 get_tree_code_name (TREE_CODE (ret_type)));
11570 }
11571
11572 if (cum->call_cookie & CALL_LONG)
11573 fprintf (stderr, " longcall,");
11574
11575 fprintf (stderr, " proto = %d, nargs = %d\n",
11576 cum->prototype, cum->nargs_prototype);
11577 }
11578
11579 #ifdef HAVE_AS_GNU_ATTRIBUTE
11580 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11581 {
11582 cum->escapes = call_ABI_of_interest (fndecl);
11583 if (cum->escapes)
11584 {
11585 tree return_type;
11586
11587 if (fntype)
11588 {
11589 return_type = TREE_TYPE (fntype);
11590 return_mode = TYPE_MODE (return_type);
11591 }
11592 else
11593 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11594
11595 if (return_type != NULL)
11596 {
11597 if (TREE_CODE (return_type) == RECORD_TYPE
11598 && TYPE_TRANSPARENT_AGGR (return_type))
11599 {
11600 return_type = TREE_TYPE (first_field (return_type));
11601 return_mode = TYPE_MODE (return_type);
11602 }
11603 if (AGGREGATE_TYPE_P (return_type)
11604 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11605 <= 8))
11606 rs6000_returns_struct = true;
11607 }
11608 if (SCALAR_FLOAT_MODE_P (return_mode))
11609 {
11610 rs6000_passes_float = true;
11611 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11612 && (FLOAT128_IBM_P (return_mode)
11613 || FLOAT128_IEEE_P (return_mode)
11614 || (return_type != NULL
11615 && (TYPE_MAIN_VARIANT (return_type)
11616 == long_double_type_node))))
11617 rs6000_passes_long_double = true;
11618 }
11619 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11620 || PAIRED_VECTOR_MODE (return_mode))
11621 rs6000_passes_vector = true;
11622 }
11623 }
11624 #endif
11625
11626 if (fntype
11627 && !TARGET_ALTIVEC
11628 && TARGET_ALTIVEC_ABI
11629 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11630 {
11631 error ("cannot return value in vector register because"
11632 " altivec instructions are disabled, use -maltivec"
11633 " to enable them");
11634 }
11635 }
11636 \f
11637 /* The mode the ABI uses for a word. This is not the same as word_mode
11638 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11639
11640 static machine_mode
11641 rs6000_abi_word_mode (void)
11642 {
11643 return TARGET_32BIT ? SImode : DImode;
11644 }
11645
11646 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11647 static char *
11648 rs6000_offload_options (void)
11649 {
11650 if (TARGET_64BIT)
11651 return xstrdup ("-foffload-abi=lp64");
11652 else
11653 return xstrdup ("-foffload-abi=ilp32");
11654 }
11655
11656 /* On rs6000, function arguments are promoted, as are function return
11657 values. */
11658
11659 static machine_mode
11660 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11661 machine_mode mode,
11662 int *punsignedp ATTRIBUTE_UNUSED,
11663 const_tree, int)
11664 {
11665 PROMOTE_MODE (mode, *punsignedp, type);
11666
11667 return mode;
11668 }
11669
11670 /* Return true if TYPE must be passed on the stack and not in registers. */
11671
11672 static bool
11673 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11674 {
11675 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11676 return must_pass_in_stack_var_size (mode, type);
11677 else
11678 return must_pass_in_stack_var_size_or_pad (mode, type);
11679 }
11680
11681 static inline bool
11682 is_complex_IBM_long_double (machine_mode mode)
11683 {
11684 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11685 }
11686
11687 /* Whether ABI_V4 passes MODE args to a function in floating point
11688 registers. */
11689
11690 static bool
11691 abi_v4_pass_in_fpr (machine_mode mode)
11692 {
11693 if (!TARGET_HARD_FLOAT)
11694 return false;
11695 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11696 return true;
11697 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11698 return true;
11699 /* ABI_V4 passes complex IBM long double in 8 gprs.
11700 Stupid, but we can't change the ABI now. */
11701 if (is_complex_IBM_long_double (mode))
11702 return false;
11703 if (FLOAT128_2REG_P (mode))
11704 return true;
11705 if (DECIMAL_FLOAT_MODE_P (mode))
11706 return true;
11707 return false;
11708 }
11709
11710 /* If defined, a C expression which determines whether, and in which
11711 direction, to pad out an argument with extra space. The value
11712 should be of type `enum direction': either `upward' to pad above
11713 the argument, `downward' to pad below, or `none' to inhibit
11714 padding.
11715
11716 For the AIX ABI structs are always stored left shifted in their
11717 argument slot. */
11718
11719 enum direction
11720 function_arg_padding (machine_mode mode, const_tree type)
11721 {
11722 #ifndef AGGREGATE_PADDING_FIXED
11723 #define AGGREGATE_PADDING_FIXED 0
11724 #endif
11725 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11726 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11727 #endif
11728
11729 if (!AGGREGATE_PADDING_FIXED)
11730 {
11731 /* GCC used to pass structures of the same size as integer types as
11732 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
11733 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11734 passed padded downward, except that -mstrict-align further
11735 muddied the water in that multi-component structures of 2 and 4
11736 bytes in size were passed padded upward.
11737
11738 The following arranges for best compatibility with previous
11739 versions of gcc, but removes the -mstrict-align dependency. */
11740 if (BYTES_BIG_ENDIAN)
11741 {
11742 HOST_WIDE_INT size = 0;
11743
11744 if (mode == BLKmode)
11745 {
11746 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11747 size = int_size_in_bytes (type);
11748 }
11749 else
11750 size = GET_MODE_SIZE (mode);
11751
11752 if (size == 1 || size == 2 || size == 4)
11753 return downward;
11754 }
11755 return upward;
11756 }
11757
11758 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11759 {
11760 if (type != 0 && AGGREGATE_TYPE_P (type))
11761 return upward;
11762 }
11763
11764 /* Fall back to the default. */
11765 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
11766 }
11767
11768 /* If defined, a C expression that gives the alignment boundary, in bits,
11769 of an argument with the specified mode and type. If it is not defined,
11770 PARM_BOUNDARY is used for all arguments.
11771
11772 V.4 wants long longs and doubles to be double word aligned. Just
11773 testing the mode size is a boneheaded way to do this as it means
11774 that other types such as complex int are also double word aligned.
11775 However, we're stuck with this because changing the ABI might break
11776 existing library interfaces.
11777
11778 Quadword align Altivec/VSX vectors.
11779 Quadword align large synthetic vector types. */
11780
11781 static unsigned int
11782 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11783 {
11784 machine_mode elt_mode;
11785 int n_elts;
11786
11787 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11788
11789 if (DEFAULT_ABI == ABI_V4
11790 && (GET_MODE_SIZE (mode) == 8
11791 || (TARGET_HARD_FLOAT
11792 && !is_complex_IBM_long_double (mode)
11793 && FLOAT128_2REG_P (mode))))
11794 return 64;
11795 else if (FLOAT128_VECTOR_P (mode))
11796 return 128;
11797 else if (PAIRED_VECTOR_MODE (mode)
11798 || (type && TREE_CODE (type) == VECTOR_TYPE
11799 && int_size_in_bytes (type) >= 8
11800 && int_size_in_bytes (type) < 16))
11801 return 64;
11802 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11803 || (type && TREE_CODE (type) == VECTOR_TYPE
11804 && int_size_in_bytes (type) >= 16))
11805 return 128;
11806
11807 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11808 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11809 -mcompat-align-parm is used. */
11810 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11811 || DEFAULT_ABI == ABI_ELFv2)
11812 && type && TYPE_ALIGN (type) > 64)
11813 {
11814 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11815 or homogeneous float/vector aggregates here. We already handled
11816 vector aggregates above, but still need to check for float here. */
11817 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11818 && !SCALAR_FLOAT_MODE_P (elt_mode));
11819
11820 /* We used to check for BLKmode instead of the above aggregate type
11821 check. Warn when this results in any difference to the ABI. */
11822 if (aggregate_p != (mode == BLKmode))
11823 {
11824 static bool warned;
11825 if (!warned && warn_psabi)
11826 {
11827 warned = true;
11828 inform (input_location,
11829 "the ABI of passing aggregates with %d-byte alignment"
11830 " has changed in GCC 5",
11831 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11832 }
11833 }
11834
11835 if (aggregate_p)
11836 return 128;
11837 }
11838
11839 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11840 implement the "aggregate type" check as a BLKmode check here; this
11841 means certain aggregate types are in fact not aligned. */
11842 if (TARGET_MACHO && rs6000_darwin64_abi
11843 && mode == BLKmode
11844 && type && TYPE_ALIGN (type) > 64)
11845 return 128;
11846
11847 return PARM_BOUNDARY;
11848 }
11849
11850 /* The offset in words to the start of the parameter save area. */
11851
11852 static unsigned int
11853 rs6000_parm_offset (void)
11854 {
11855 return (DEFAULT_ABI == ABI_V4 ? 2
11856 : DEFAULT_ABI == ABI_ELFv2 ? 4
11857 : 6);
11858 }
11859
11860 /* For a function parm of MODE and TYPE, return the starting word in
11861 the parameter area. NWORDS of the parameter area are already used. */
11862
11863 static unsigned int
11864 rs6000_parm_start (machine_mode mode, const_tree type,
11865 unsigned int nwords)
11866 {
11867 unsigned int align;
11868
11869 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11870 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11871 }
11872
11873 /* Compute the size (in words) of a function argument. */
11874
11875 static unsigned long
11876 rs6000_arg_size (machine_mode mode, const_tree type)
11877 {
11878 unsigned long size;
11879
11880 if (mode != BLKmode)
11881 size = GET_MODE_SIZE (mode);
11882 else
11883 size = int_size_in_bytes (type);
11884
11885 if (TARGET_32BIT)
11886 return (size + 3) >> 2;
11887 else
11888 return (size + 7) >> 3;
11889 }
11890 \f
11891 /* Use this to flush pending int fields. */
11892
11893 static void
11894 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11895 HOST_WIDE_INT bitpos, int final)
11896 {
11897 unsigned int startbit, endbit;
11898 int intregs, intoffset;
11899 machine_mode mode;
11900
11901 /* Handle the situations where a float is taking up the first half
11902 of the GPR, and the other half is empty (typically due to
11903 alignment restrictions). We can detect this by a 8-byte-aligned
11904 int field, or by seeing that this is the final flush for this
11905 argument. Count the word and continue on. */
11906 if (cum->floats_in_gpr == 1
11907 && (cum->intoffset % 64 == 0
11908 || (cum->intoffset == -1 && final)))
11909 {
11910 cum->words++;
11911 cum->floats_in_gpr = 0;
11912 }
11913
11914 if (cum->intoffset == -1)
11915 return;
11916
11917 intoffset = cum->intoffset;
11918 cum->intoffset = -1;
11919 cum->floats_in_gpr = 0;
11920
11921 if (intoffset % BITS_PER_WORD != 0)
11922 {
11923 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
11924 MODE_INT, 0);
11925 if (mode == BLKmode)
11926 {
11927 /* We couldn't find an appropriate mode, which happens,
11928 e.g., in packed structs when there are 3 bytes to load.
11929 Back intoffset back to the beginning of the word in this
11930 case. */
11931 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11932 }
11933 }
11934
11935 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11936 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11937 intregs = (endbit - startbit) / BITS_PER_WORD;
11938 cum->words += intregs;
11939 /* words should be unsigned. */
11940 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11941 {
11942 int pad = (endbit/BITS_PER_WORD) - cum->words;
11943 cum->words += pad;
11944 }
11945 }
11946
11947 /* The darwin64 ABI calls for us to recurse down through structs,
11948 looking for elements passed in registers. Unfortunately, we have
11949 to track int register count here also because of misalignments
11950 in powerpc alignment mode. */
11951
11952 static void
11953 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11954 const_tree type,
11955 HOST_WIDE_INT startbitpos)
11956 {
11957 tree f;
11958
11959 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11960 if (TREE_CODE (f) == FIELD_DECL)
11961 {
11962 HOST_WIDE_INT bitpos = startbitpos;
11963 tree ftype = TREE_TYPE (f);
11964 machine_mode mode;
11965 if (ftype == error_mark_node)
11966 continue;
11967 mode = TYPE_MODE (ftype);
11968
11969 if (DECL_SIZE (f) != 0
11970 && tree_fits_uhwi_p (bit_position (f)))
11971 bitpos += int_bit_position (f);
11972
11973 /* ??? FIXME: else assume zero offset. */
11974
11975 if (TREE_CODE (ftype) == RECORD_TYPE)
11976 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11977 else if (USE_FP_FOR_ARG_P (cum, mode))
11978 {
11979 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11980 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11981 cum->fregno += n_fpregs;
11982 /* Single-precision floats present a special problem for
11983 us, because they are smaller than an 8-byte GPR, and so
11984 the structure-packing rules combined with the standard
11985 varargs behavior mean that we want to pack float/float
11986 and float/int combinations into a single register's
11987 space. This is complicated by the arg advance flushing,
11988 which works on arbitrarily large groups of int-type
11989 fields. */
11990 if (mode == SFmode)
11991 {
11992 if (cum->floats_in_gpr == 1)
11993 {
11994 /* Two floats in a word; count the word and reset
11995 the float count. */
11996 cum->words++;
11997 cum->floats_in_gpr = 0;
11998 }
11999 else if (bitpos % 64 == 0)
12000 {
12001 /* A float at the beginning of an 8-byte word;
12002 count it and put off adjusting cum->words until
12003 we see if a arg advance flush is going to do it
12004 for us. */
12005 cum->floats_in_gpr++;
12006 }
12007 else
12008 {
12009 /* The float is at the end of a word, preceded
12010 by integer fields, so the arg advance flush
12011 just above has already set cum->words and
12012 everything is taken care of. */
12013 }
12014 }
12015 else
12016 cum->words += n_fpregs;
12017 }
12018 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12019 {
12020 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
12021 cum->vregno++;
12022 cum->words += 2;
12023 }
12024 else if (cum->intoffset == -1)
12025 cum->intoffset = bitpos;
12026 }
12027 }
12028
12029 /* Check for an item that needs to be considered specially under the darwin 64
12030 bit ABI. These are record types where the mode is BLK or the structure is
12031 8 bytes in size. */
12032 static int
12033 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
12034 {
12035 return rs6000_darwin64_abi
12036 && ((mode == BLKmode
12037 && TREE_CODE (type) == RECORD_TYPE
12038 && int_size_in_bytes (type) > 0)
12039 || (type && TREE_CODE (type) == RECORD_TYPE
12040 && int_size_in_bytes (type) == 8)) ? 1 : 0;
12041 }
12042
12043 /* Update the data in CUM to advance over an argument
12044 of mode MODE and data type TYPE.
12045 (TYPE is null for libcalls where that information may not be available.)
12046
12047 Note that for args passed by reference, function_arg will be called
12048 with MODE and TYPE set to that of the pointer to the arg, not the arg
12049 itself. */
12050
12051 static void
12052 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
12053 const_tree type, bool named, int depth)
12054 {
12055 machine_mode elt_mode;
12056 int n_elts;
12057
12058 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12059
12060 /* Only tick off an argument if we're not recursing. */
12061 if (depth == 0)
12062 cum->nargs_prototype--;
12063
12064 #ifdef HAVE_AS_GNU_ATTRIBUTE
12065 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
12066 && cum->escapes)
12067 {
12068 if (SCALAR_FLOAT_MODE_P (mode))
12069 {
12070 rs6000_passes_float = true;
12071 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
12072 && (FLOAT128_IBM_P (mode)
12073 || FLOAT128_IEEE_P (mode)
12074 || (type != NULL
12075 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
12076 rs6000_passes_long_double = true;
12077 }
12078 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
12079 || (PAIRED_VECTOR_MODE (mode)
12080 && !cum->stdarg
12081 && cum->sysv_gregno <= GP_ARG_MAX_REG))
12082 rs6000_passes_vector = true;
12083 }
12084 #endif
12085
12086 if (TARGET_ALTIVEC_ABI
12087 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
12088 || (type && TREE_CODE (type) == VECTOR_TYPE
12089 && int_size_in_bytes (type) == 16)))
12090 {
12091 bool stack = false;
12092
12093 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12094 {
12095 cum->vregno += n_elts;
12096
12097 if (!TARGET_ALTIVEC)
12098 error ("cannot pass argument in vector register because"
12099 " altivec instructions are disabled, use -maltivec"
12100 " to enable them");
12101
12102 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
12103 even if it is going to be passed in a vector register.
12104 Darwin does the same for variable-argument functions. */
12105 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12106 && TARGET_64BIT)
12107 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
12108 stack = true;
12109 }
12110 else
12111 stack = true;
12112
12113 if (stack)
12114 {
12115 int align;
12116
12117 /* Vector parameters must be 16-byte aligned. In 32-bit
12118 mode this means we need to take into account the offset
12119 to the parameter save area. In 64-bit mode, they just
12120 have to start on an even word, since the parameter save
12121 area is 16-byte aligned. */
12122 if (TARGET_32BIT)
12123 align = -(rs6000_parm_offset () + cum->words) & 3;
12124 else
12125 align = cum->words & 1;
12126 cum->words += align + rs6000_arg_size (mode, type);
12127
12128 if (TARGET_DEBUG_ARG)
12129 {
12130 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
12131 cum->words, align);
12132 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
12133 cum->nargs_prototype, cum->prototype,
12134 GET_MODE_NAME (mode));
12135 }
12136 }
12137 }
12138 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12139 {
12140 int size = int_size_in_bytes (type);
12141 /* Variable sized types have size == -1 and are
12142 treated as if consisting entirely of ints.
12143 Pad to 16 byte boundary if needed. */
12144 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12145 && (cum->words % 2) != 0)
12146 cum->words++;
12147 /* For varargs, we can just go up by the size of the struct. */
12148 if (!named)
12149 cum->words += (size + 7) / 8;
12150 else
12151 {
12152 /* It is tempting to say int register count just goes up by
12153 sizeof(type)/8, but this is wrong in a case such as
12154 { int; double; int; } [powerpc alignment]. We have to
12155 grovel through the fields for these too. */
12156 cum->intoffset = 0;
12157 cum->floats_in_gpr = 0;
12158 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
12159 rs6000_darwin64_record_arg_advance_flush (cum,
12160 size * BITS_PER_UNIT, 1);
12161 }
12162 if (TARGET_DEBUG_ARG)
12163 {
12164 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
12165 cum->words, TYPE_ALIGN (type), size);
12166 fprintf (stderr,
12167 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
12168 cum->nargs_prototype, cum->prototype,
12169 GET_MODE_NAME (mode));
12170 }
12171 }
12172 else if (DEFAULT_ABI == ABI_V4)
12173 {
12174 if (abi_v4_pass_in_fpr (mode))
12175 {
12176 /* _Decimal128 must use an even/odd register pair. This assumes
12177 that the register number is odd when fregno is odd. */
12178 if (mode == TDmode && (cum->fregno % 2) == 1)
12179 cum->fregno++;
12180
12181 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12182 <= FP_ARG_V4_MAX_REG)
12183 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
12184 else
12185 {
12186 cum->fregno = FP_ARG_V4_MAX_REG + 1;
12187 if (mode == DFmode || FLOAT128_IBM_P (mode)
12188 || mode == DDmode || mode == TDmode)
12189 cum->words += cum->words & 1;
12190 cum->words += rs6000_arg_size (mode, type);
12191 }
12192 }
12193 else
12194 {
12195 int n_words = rs6000_arg_size (mode, type);
12196 int gregno = cum->sysv_gregno;
12197
12198 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12199 As does any other 2 word item such as complex int due to a
12200 historical mistake. */
12201 if (n_words == 2)
12202 gregno += (1 - gregno) & 1;
12203
12204 /* Multi-reg args are not split between registers and stack. */
12205 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12206 {
12207 /* Long long is aligned on the stack. So are other 2 word
12208 items such as complex int due to a historical mistake. */
12209 if (n_words == 2)
12210 cum->words += cum->words & 1;
12211 cum->words += n_words;
12212 }
12213
12214 /* Note: continuing to accumulate gregno past when we've started
12215 spilling to the stack indicates the fact that we've started
12216 spilling to the stack to expand_builtin_saveregs. */
12217 cum->sysv_gregno = gregno + n_words;
12218 }
12219
12220 if (TARGET_DEBUG_ARG)
12221 {
12222 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12223 cum->words, cum->fregno);
12224 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
12225 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
12226 fprintf (stderr, "mode = %4s, named = %d\n",
12227 GET_MODE_NAME (mode), named);
12228 }
12229 }
12230 else
12231 {
12232 int n_words = rs6000_arg_size (mode, type);
12233 int start_words = cum->words;
12234 int align_words = rs6000_parm_start (mode, type, start_words);
12235
12236 cum->words = align_words + n_words;
12237
12238 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
12239 {
12240 /* _Decimal128 must be passed in an even/odd float register pair.
12241 This assumes that the register number is odd when fregno is
12242 odd. */
12243 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12244 cum->fregno++;
12245 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
12246 }
12247
12248 if (TARGET_DEBUG_ARG)
12249 {
12250 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
12251 cum->words, cum->fregno);
12252 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
12253 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
12254 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
12255 named, align_words - start_words, depth);
12256 }
12257 }
12258 }
12259
12260 static void
12261 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
12262 const_tree type, bool named)
12263 {
12264 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12265 0);
12266 }
12267
12268 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12269 structure between cum->intoffset and bitpos to integer registers. */
12270
12271 static void
12272 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12273 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12274 {
12275 machine_mode mode;
12276 unsigned int regno;
12277 unsigned int startbit, endbit;
12278 int this_regno, intregs, intoffset;
12279 rtx reg;
12280
12281 if (cum->intoffset == -1)
12282 return;
12283
12284 intoffset = cum->intoffset;
12285 cum->intoffset = -1;
12286
12287 /* If this is the trailing part of a word, try to only load that
12288 much into the register. Otherwise load the whole register. Note
12289 that in the latter case we may pick up unwanted bits. It's not a
12290 problem at the moment but may wish to revisit. */
12291
12292 if (intoffset % BITS_PER_WORD != 0)
12293 {
12294 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
12295 MODE_INT, 0);
12296 if (mode == BLKmode)
12297 {
12298 /* We couldn't find an appropriate mode, which happens,
12299 e.g., in packed structs when there are 3 bytes to load.
12300 Back intoffset back to the beginning of the word in this
12301 case. */
12302 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12303 mode = word_mode;
12304 }
12305 }
12306 else
12307 mode = word_mode;
12308
12309 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12310 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12311 intregs = (endbit - startbit) / BITS_PER_WORD;
12312 this_regno = cum->words + intoffset / BITS_PER_WORD;
12313
12314 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12315 cum->use_stack = 1;
12316
12317 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12318 if (intregs <= 0)
12319 return;
12320
12321 intoffset /= BITS_PER_UNIT;
12322 do
12323 {
12324 regno = GP_ARG_MIN_REG + this_regno;
12325 reg = gen_rtx_REG (mode, regno);
12326 rvec[(*k)++] =
12327 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12328
12329 this_regno += 1;
12330 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12331 mode = word_mode;
12332 intregs -= 1;
12333 }
12334 while (intregs > 0);
12335 }
12336
12337 /* Recursive workhorse for the following. */
12338
12339 static void
12340 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12341 HOST_WIDE_INT startbitpos, rtx rvec[],
12342 int *k)
12343 {
12344 tree f;
12345
12346 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12347 if (TREE_CODE (f) == FIELD_DECL)
12348 {
12349 HOST_WIDE_INT bitpos = startbitpos;
12350 tree ftype = TREE_TYPE (f);
12351 machine_mode mode;
12352 if (ftype == error_mark_node)
12353 continue;
12354 mode = TYPE_MODE (ftype);
12355
12356 if (DECL_SIZE (f) != 0
12357 && tree_fits_uhwi_p (bit_position (f)))
12358 bitpos += int_bit_position (f);
12359
12360 /* ??? FIXME: else assume zero offset. */
12361
12362 if (TREE_CODE (ftype) == RECORD_TYPE)
12363 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12364 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12365 {
12366 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12367 #if 0
12368 switch (mode)
12369 {
12370 case SCmode: mode = SFmode; break;
12371 case DCmode: mode = DFmode; break;
12372 case TCmode: mode = TFmode; break;
12373 default: break;
12374 }
12375 #endif
12376 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12377 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12378 {
12379 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12380 && (mode == TFmode || mode == TDmode));
12381 /* Long double or _Decimal128 split over regs and memory. */
12382 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12383 cum->use_stack=1;
12384 }
12385 rvec[(*k)++]
12386 = gen_rtx_EXPR_LIST (VOIDmode,
12387 gen_rtx_REG (mode, cum->fregno++),
12388 GEN_INT (bitpos / BITS_PER_UNIT));
12389 if (FLOAT128_2REG_P (mode))
12390 cum->fregno++;
12391 }
12392 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12393 {
12394 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12395 rvec[(*k)++]
12396 = gen_rtx_EXPR_LIST (VOIDmode,
12397 gen_rtx_REG (mode, cum->vregno++),
12398 GEN_INT (bitpos / BITS_PER_UNIT));
12399 }
12400 else if (cum->intoffset == -1)
12401 cum->intoffset = bitpos;
12402 }
12403 }
12404
12405 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12406 the register(s) to be used for each field and subfield of a struct
12407 being passed by value, along with the offset of where the
12408 register's value may be found in the block. FP fields go in FP
12409 register, vector fields go in vector registers, and everything
12410 else goes in int registers, packed as in memory.
12411
12412 This code is also used for function return values. RETVAL indicates
12413 whether this is the case.
12414
12415 Much of this is taken from the SPARC V9 port, which has a similar
12416 calling convention. */
12417
12418 static rtx
12419 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12420 bool named, bool retval)
12421 {
12422 rtx rvec[FIRST_PSEUDO_REGISTER];
12423 int k = 1, kbase = 1;
12424 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12425 /* This is a copy; modifications are not visible to our caller. */
12426 CUMULATIVE_ARGS copy_cum = *orig_cum;
12427 CUMULATIVE_ARGS *cum = &copy_cum;
12428
12429 /* Pad to 16 byte boundary if needed. */
12430 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12431 && (cum->words % 2) != 0)
12432 cum->words++;
12433
12434 cum->intoffset = 0;
12435 cum->use_stack = 0;
12436 cum->named = named;
12437
12438 /* Put entries into rvec[] for individual FP and vector fields, and
12439 for the chunks of memory that go in int regs. Note we start at
12440 element 1; 0 is reserved for an indication of using memory, and
12441 may or may not be filled in below. */
12442 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12443 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12444
12445 /* If any part of the struct went on the stack put all of it there.
12446 This hack is because the generic code for
12447 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12448 parts of the struct are not at the beginning. */
12449 if (cum->use_stack)
12450 {
12451 if (retval)
12452 return NULL_RTX; /* doesn't go in registers at all */
12453 kbase = 0;
12454 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12455 }
12456 if (k > 1 || cum->use_stack)
12457 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12458 else
12459 return NULL_RTX;
12460 }
12461
12462 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12463
12464 static rtx
12465 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12466 int align_words)
12467 {
12468 int n_units;
12469 int i, k;
12470 rtx rvec[GP_ARG_NUM_REG + 1];
12471
12472 if (align_words >= GP_ARG_NUM_REG)
12473 return NULL_RTX;
12474
12475 n_units = rs6000_arg_size (mode, type);
12476
12477 /* Optimize the simple case where the arg fits in one gpr, except in
12478 the case of BLKmode due to assign_parms assuming that registers are
12479 BITS_PER_WORD wide. */
12480 if (n_units == 0
12481 || (n_units == 1 && mode != BLKmode))
12482 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12483
12484 k = 0;
12485 if (align_words + n_units > GP_ARG_NUM_REG)
12486 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12487 using a magic NULL_RTX component.
12488 This is not strictly correct. Only some of the arg belongs in
12489 memory, not all of it. However, the normal scheme using
12490 function_arg_partial_nregs can result in unusual subregs, eg.
12491 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12492 store the whole arg to memory is often more efficient than code
12493 to store pieces, and we know that space is available in the right
12494 place for the whole arg. */
12495 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12496
12497 i = 0;
12498 do
12499 {
12500 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12501 rtx off = GEN_INT (i++ * 4);
12502 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12503 }
12504 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12505
12506 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12507 }
12508
12509 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12510 but must also be copied into the parameter save area starting at
12511 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12512 to the GPRs and/or memory. Return the number of elements used. */
12513
12514 static int
12515 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12516 int align_words, rtx *rvec)
12517 {
12518 int k = 0;
12519
12520 if (align_words < GP_ARG_NUM_REG)
12521 {
12522 int n_words = rs6000_arg_size (mode, type);
12523
12524 if (align_words + n_words > GP_ARG_NUM_REG
12525 || mode == BLKmode
12526 || (TARGET_32BIT && TARGET_POWERPC64))
12527 {
12528 /* If this is partially on the stack, then we only
12529 include the portion actually in registers here. */
12530 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12531 int i = 0;
12532
12533 if (align_words + n_words > GP_ARG_NUM_REG)
12534 {
12535 /* Not all of the arg fits in gprs. Say that it goes in memory
12536 too, using a magic NULL_RTX component. Also see comment in
12537 rs6000_mixed_function_arg for why the normal
12538 function_arg_partial_nregs scheme doesn't work in this case. */
12539 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12540 }
12541
12542 do
12543 {
12544 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12545 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12546 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12547 }
12548 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12549 }
12550 else
12551 {
12552 /* The whole arg fits in gprs. */
12553 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12554 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12555 }
12556 }
12557 else
12558 {
12559 /* It's entirely in memory. */
12560 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12561 }
12562
12563 return k;
12564 }
12565
12566 /* RVEC is a vector of K components of an argument of mode MODE.
12567 Construct the final function_arg return value from it. */
12568
12569 static rtx
12570 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12571 {
12572 gcc_assert (k >= 1);
12573
12574 /* Avoid returning a PARALLEL in the trivial cases. */
12575 if (k == 1)
12576 {
12577 if (XEXP (rvec[0], 0) == NULL_RTX)
12578 return NULL_RTX;
12579
12580 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12581 return XEXP (rvec[0], 0);
12582 }
12583
12584 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12585 }
12586
12587 /* Determine where to put an argument to a function.
12588 Value is zero to push the argument on the stack,
12589 or a hard register in which to store the argument.
12590
12591 MODE is the argument's machine mode.
12592 TYPE is the data type of the argument (as a tree).
12593 This is null for libcalls where that information may
12594 not be available.
12595 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12596 the preceding args and about the function being called. It is
12597 not modified in this routine.
12598 NAMED is nonzero if this argument is a named parameter
12599 (otherwise it is an extra parameter matching an ellipsis).
12600
12601 On RS/6000 the first eight words of non-FP are normally in registers
12602 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12603 Under V.4, the first 8 FP args are in registers.
12604
12605 If this is floating-point and no prototype is specified, we use
12606 both an FP and integer register (or possibly FP reg and stack). Library
12607 functions (when CALL_LIBCALL is set) always have the proper types for args,
12608 so we can pass the FP value just in one register. emit_library_function
12609 doesn't support PARALLEL anyway.
12610
12611 Note that for args passed by reference, function_arg will be called
12612 with MODE and TYPE set to that of the pointer to the arg, not the arg
12613 itself. */
12614
12615 static rtx
12616 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12617 const_tree type, bool named)
12618 {
12619 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12620 enum rs6000_abi abi = DEFAULT_ABI;
12621 machine_mode elt_mode;
12622 int n_elts;
12623
12624 /* Return a marker to indicate whether CR1 needs to set or clear the
12625 bit that V.4 uses to say fp args were passed in registers.
12626 Assume that we don't need the marker for software floating point,
12627 or compiler generated library calls. */
12628 if (mode == VOIDmode)
12629 {
12630 if (abi == ABI_V4
12631 && (cum->call_cookie & CALL_LIBCALL) == 0
12632 && (cum->stdarg
12633 || (cum->nargs_prototype < 0
12634 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12635 && TARGET_HARD_FLOAT)
12636 return GEN_INT (cum->call_cookie
12637 | ((cum->fregno == FP_ARG_MIN_REG)
12638 ? CALL_V4_SET_FP_ARGS
12639 : CALL_V4_CLEAR_FP_ARGS));
12640
12641 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12642 }
12643
12644 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12645
12646 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12647 {
12648 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12649 if (rslt != NULL_RTX)
12650 return rslt;
12651 /* Else fall through to usual handling. */
12652 }
12653
12654 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12655 {
12656 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12657 rtx r, off;
12658 int i, k = 0;
12659
12660 /* Do we also need to pass this argument in the parameter save area?
12661 Library support functions for IEEE 128-bit are assumed to not need the
12662 value passed both in GPRs and in vector registers. */
12663 if (TARGET_64BIT && !cum->prototype
12664 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12665 {
12666 int align_words = ROUND_UP (cum->words, 2);
12667 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12668 }
12669
12670 /* Describe where this argument goes in the vector registers. */
12671 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12672 {
12673 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12674 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12675 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12676 }
12677
12678 return rs6000_finish_function_arg (mode, rvec, k);
12679 }
12680 else if (TARGET_ALTIVEC_ABI
12681 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12682 || (type && TREE_CODE (type) == VECTOR_TYPE
12683 && int_size_in_bytes (type) == 16)))
12684 {
12685 if (named || abi == ABI_V4)
12686 return NULL_RTX;
12687 else
12688 {
12689 /* Vector parameters to varargs functions under AIX or Darwin
12690 get passed in memory and possibly also in GPRs. */
12691 int align, align_words, n_words;
12692 machine_mode part_mode;
12693
12694 /* Vector parameters must be 16-byte aligned. In 32-bit
12695 mode this means we need to take into account the offset
12696 to the parameter save area. In 64-bit mode, they just
12697 have to start on an even word, since the parameter save
12698 area is 16-byte aligned. */
12699 if (TARGET_32BIT)
12700 align = -(rs6000_parm_offset () + cum->words) & 3;
12701 else
12702 align = cum->words & 1;
12703 align_words = cum->words + align;
12704
12705 /* Out of registers? Memory, then. */
12706 if (align_words >= GP_ARG_NUM_REG)
12707 return NULL_RTX;
12708
12709 if (TARGET_32BIT && TARGET_POWERPC64)
12710 return rs6000_mixed_function_arg (mode, type, align_words);
12711
12712 /* The vector value goes in GPRs. Only the part of the
12713 value in GPRs is reported here. */
12714 part_mode = mode;
12715 n_words = rs6000_arg_size (mode, type);
12716 if (align_words + n_words > GP_ARG_NUM_REG)
12717 /* Fortunately, there are only two possibilities, the value
12718 is either wholly in GPRs or half in GPRs and half not. */
12719 part_mode = DImode;
12720
12721 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12722 }
12723 }
12724
12725 else if (abi == ABI_V4)
12726 {
12727 if (abi_v4_pass_in_fpr (mode))
12728 {
12729 /* _Decimal128 must use an even/odd register pair. This assumes
12730 that the register number is odd when fregno is odd. */
12731 if (mode == TDmode && (cum->fregno % 2) == 1)
12732 cum->fregno++;
12733
12734 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12735 <= FP_ARG_V4_MAX_REG)
12736 return gen_rtx_REG (mode, cum->fregno);
12737 else
12738 return NULL_RTX;
12739 }
12740 else
12741 {
12742 int n_words = rs6000_arg_size (mode, type);
12743 int gregno = cum->sysv_gregno;
12744
12745 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12746 As does any other 2 word item such as complex int due to a
12747 historical mistake. */
12748 if (n_words == 2)
12749 gregno += (1 - gregno) & 1;
12750
12751 /* Multi-reg args are not split between registers and stack. */
12752 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12753 return NULL_RTX;
12754
12755 if (TARGET_32BIT && TARGET_POWERPC64)
12756 return rs6000_mixed_function_arg (mode, type,
12757 gregno - GP_ARG_MIN_REG);
12758 return gen_rtx_REG (mode, gregno);
12759 }
12760 }
12761 else
12762 {
12763 int align_words = rs6000_parm_start (mode, type, cum->words);
12764
12765 /* _Decimal128 must be passed in an even/odd float register pair.
12766 This assumes that the register number is odd when fregno is odd. */
12767 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12768 cum->fregno++;
12769
12770 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12771 {
12772 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12773 rtx r, off;
12774 int i, k = 0;
12775 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12776 int fpr_words;
12777
12778 /* Do we also need to pass this argument in the parameter
12779 save area? */
12780 if (type && (cum->nargs_prototype <= 0
12781 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12782 && TARGET_XL_COMPAT
12783 && align_words >= GP_ARG_NUM_REG)))
12784 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12785
12786 /* Describe where this argument goes in the fprs. */
12787 for (i = 0; i < n_elts
12788 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12789 {
12790 /* Check if the argument is split over registers and memory.
12791 This can only ever happen for long double or _Decimal128;
12792 complex types are handled via split_complex_arg. */
12793 machine_mode fmode = elt_mode;
12794 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12795 {
12796 gcc_assert (FLOAT128_2REG_P (fmode));
12797 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12798 }
12799
12800 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12801 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12802 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12803 }
12804
12805 /* If there were not enough FPRs to hold the argument, the rest
12806 usually goes into memory. However, if the current position
12807 is still within the register parameter area, a portion may
12808 actually have to go into GPRs.
12809
12810 Note that it may happen that the portion of the argument
12811 passed in the first "half" of the first GPR was already
12812 passed in the last FPR as well.
12813
12814 For unnamed arguments, we already set up GPRs to cover the
12815 whole argument in rs6000_psave_function_arg, so there is
12816 nothing further to do at this point. */
12817 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12818 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12819 && cum->nargs_prototype > 0)
12820 {
12821 static bool warned;
12822
12823 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12824 int n_words = rs6000_arg_size (mode, type);
12825
12826 align_words += fpr_words;
12827 n_words -= fpr_words;
12828
12829 do
12830 {
12831 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12832 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12833 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12834 }
12835 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12836
12837 if (!warned && warn_psabi)
12838 {
12839 warned = true;
12840 inform (input_location,
12841 "the ABI of passing homogeneous float aggregates"
12842 " has changed in GCC 5");
12843 }
12844 }
12845
12846 return rs6000_finish_function_arg (mode, rvec, k);
12847 }
12848 else if (align_words < GP_ARG_NUM_REG)
12849 {
12850 if (TARGET_32BIT && TARGET_POWERPC64)
12851 return rs6000_mixed_function_arg (mode, type, align_words);
12852
12853 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12854 }
12855 else
12856 return NULL_RTX;
12857 }
12858 }
12859 \f
12860 /* For an arg passed partly in registers and partly in memory, this is
12861 the number of bytes passed in registers. For args passed entirely in
12862 registers or entirely in memory, zero. When an arg is described by a
12863 PARALLEL, perhaps using more than one register type, this function
12864 returns the number of bytes used by the first element of the PARALLEL. */
12865
12866 static int
12867 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12868 tree type, bool named)
12869 {
12870 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12871 bool passed_in_gprs = true;
12872 int ret = 0;
12873 int align_words;
12874 machine_mode elt_mode;
12875 int n_elts;
12876
12877 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12878
12879 if (DEFAULT_ABI == ABI_V4)
12880 return 0;
12881
12882 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12883 {
12884 /* If we are passing this arg in the fixed parameter save area (gprs or
12885 memory) as well as VRs, we do not use the partial bytes mechanism;
12886 instead, rs6000_function_arg will return a PARALLEL including a memory
12887 element as necessary. Library support functions for IEEE 128-bit are
12888 assumed to not need the value passed both in GPRs and in vector
12889 registers. */
12890 if (TARGET_64BIT && !cum->prototype
12891 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12892 return 0;
12893
12894 /* Otherwise, we pass in VRs only. Check for partial copies. */
12895 passed_in_gprs = false;
12896 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12897 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12898 }
12899
12900 /* In this complicated case we just disable the partial_nregs code. */
12901 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12902 return 0;
12903
12904 align_words = rs6000_parm_start (mode, type, cum->words);
12905
12906 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12907 {
12908 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12909
12910 /* If we are passing this arg in the fixed parameter save area
12911 (gprs or memory) as well as FPRs, we do not use the partial
12912 bytes mechanism; instead, rs6000_function_arg will return a
12913 PARALLEL including a memory element as necessary. */
12914 if (type
12915 && (cum->nargs_prototype <= 0
12916 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12917 && TARGET_XL_COMPAT
12918 && align_words >= GP_ARG_NUM_REG)))
12919 return 0;
12920
12921 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12922 passed_in_gprs = false;
12923 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12924 {
12925 /* Compute number of bytes / words passed in FPRs. If there
12926 is still space available in the register parameter area
12927 *after* that amount, a part of the argument will be passed
12928 in GPRs. In that case, the total amount passed in any
12929 registers is equal to the amount that would have been passed
12930 in GPRs if everything were passed there, so we fall back to
12931 the GPR code below to compute the appropriate value. */
12932 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12933 * MIN (8, GET_MODE_SIZE (elt_mode)));
12934 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12935
12936 if (align_words + fpr_words < GP_ARG_NUM_REG)
12937 passed_in_gprs = true;
12938 else
12939 ret = fpr;
12940 }
12941 }
12942
12943 if (passed_in_gprs
12944 && align_words < GP_ARG_NUM_REG
12945 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12946 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12947
12948 if (ret != 0 && TARGET_DEBUG_ARG)
12949 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12950
12951 return ret;
12952 }
12953 \f
12954 /* A C expression that indicates when an argument must be passed by
12955 reference. If nonzero for an argument, a copy of that argument is
12956 made in memory and a pointer to the argument is passed instead of
12957 the argument itself. The pointer is passed in whatever way is
12958 appropriate for passing a pointer to that type.
12959
12960 Under V.4, aggregates and long double are passed by reference.
12961
12962 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12963 reference unless the AltiVec vector extension ABI is in force.
12964
12965 As an extension to all ABIs, variable sized types are passed by
12966 reference. */
12967
12968 static bool
12969 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12970 machine_mode mode, const_tree type,
12971 bool named ATTRIBUTE_UNUSED)
12972 {
12973 if (!type)
12974 return 0;
12975
12976 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12977 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12978 {
12979 if (TARGET_DEBUG_ARG)
12980 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12981 return 1;
12982 }
12983
12984 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12985 {
12986 if (TARGET_DEBUG_ARG)
12987 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12988 return 1;
12989 }
12990
12991 if (int_size_in_bytes (type) < 0)
12992 {
12993 if (TARGET_DEBUG_ARG)
12994 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12995 return 1;
12996 }
12997
12998 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12999 modes only exist for GCC vector types if -maltivec. */
13000 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13001 {
13002 if (TARGET_DEBUG_ARG)
13003 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
13004 return 1;
13005 }
13006
13007 /* Pass synthetic vectors in memory. */
13008 if (TREE_CODE (type) == VECTOR_TYPE
13009 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
13010 {
13011 static bool warned_for_pass_big_vectors = false;
13012 if (TARGET_DEBUG_ARG)
13013 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
13014 if (!warned_for_pass_big_vectors)
13015 {
13016 warning (OPT_Wpsabi, "GCC vector passed by reference: "
13017 "non-standard ABI extension with no compatibility guarantee");
13018 warned_for_pass_big_vectors = true;
13019 }
13020 return 1;
13021 }
13022
13023 return 0;
13024 }
13025
13026 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
13027 already processes. Return true if the parameter must be passed
13028 (fully or partially) on the stack. */
13029
13030 static bool
13031 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
13032 {
13033 machine_mode mode;
13034 int unsignedp;
13035 rtx entry_parm;
13036
13037 /* Catch errors. */
13038 if (type == NULL || type == error_mark_node)
13039 return true;
13040
13041 /* Handle types with no storage requirement. */
13042 if (TYPE_MODE (type) == VOIDmode)
13043 return false;
13044
13045 /* Handle complex types. */
13046 if (TREE_CODE (type) == COMPLEX_TYPE)
13047 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
13048 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
13049
13050 /* Handle transparent aggregates. */
13051 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
13052 && TYPE_TRANSPARENT_AGGR (type))
13053 type = TREE_TYPE (first_field (type));
13054
13055 /* See if this arg was passed by invisible reference. */
13056 if (pass_by_reference (get_cumulative_args (args_so_far),
13057 TYPE_MODE (type), type, true))
13058 type = build_pointer_type (type);
13059
13060 /* Find mode as it is passed by the ABI. */
13061 unsignedp = TYPE_UNSIGNED (type);
13062 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
13063
13064 /* If we must pass in stack, we need a stack. */
13065 if (rs6000_must_pass_in_stack (mode, type))
13066 return true;
13067
13068 /* If there is no incoming register, we need a stack. */
13069 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
13070 if (entry_parm == NULL)
13071 return true;
13072
13073 /* Likewise if we need to pass both in registers and on the stack. */
13074 if (GET_CODE (entry_parm) == PARALLEL
13075 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
13076 return true;
13077
13078 /* Also true if we're partially in registers and partially not. */
13079 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
13080 return true;
13081
13082 /* Update info on where next arg arrives in registers. */
13083 rs6000_function_arg_advance (args_so_far, mode, type, true);
13084 return false;
13085 }
13086
13087 /* Return true if FUN has no prototype, has a variable argument
13088 list, or passes any parameter in memory. */
13089
13090 static bool
13091 rs6000_function_parms_need_stack (tree fun, bool incoming)
13092 {
13093 tree fntype, result;
13094 CUMULATIVE_ARGS args_so_far_v;
13095 cumulative_args_t args_so_far;
13096
13097 if (!fun)
13098 /* Must be a libcall, all of which only use reg parms. */
13099 return false;
13100
13101 fntype = fun;
13102 if (!TYPE_P (fun))
13103 fntype = TREE_TYPE (fun);
13104
13105 /* Varargs functions need the parameter save area. */
13106 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
13107 return true;
13108
13109 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
13110 args_so_far = pack_cumulative_args (&args_so_far_v);
13111
13112 /* When incoming, we will have been passed the function decl.
13113 It is necessary to use the decl to handle K&R style functions,
13114 where TYPE_ARG_TYPES may not be available. */
13115 if (incoming)
13116 {
13117 gcc_assert (DECL_P (fun));
13118 result = DECL_RESULT (fun);
13119 }
13120 else
13121 result = TREE_TYPE (fntype);
13122
13123 if (result && aggregate_value_p (result, fntype))
13124 {
13125 if (!TYPE_P (result))
13126 result = TREE_TYPE (result);
13127 result = build_pointer_type (result);
13128 rs6000_parm_needs_stack (args_so_far, result);
13129 }
13130
13131 if (incoming)
13132 {
13133 tree parm;
13134
13135 for (parm = DECL_ARGUMENTS (fun);
13136 parm && parm != void_list_node;
13137 parm = TREE_CHAIN (parm))
13138 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
13139 return true;
13140 }
13141 else
13142 {
13143 function_args_iterator args_iter;
13144 tree arg_type;
13145
13146 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
13147 if (rs6000_parm_needs_stack (args_so_far, arg_type))
13148 return true;
13149 }
13150
13151 return false;
13152 }
13153
13154 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
13155 usually a constant depending on the ABI. However, in the ELFv2 ABI
13156 the register parameter area is optional when calling a function that
13157 has a prototype is scope, has no variable argument list, and passes
13158 all parameters in registers. */
13159
13160 int
13161 rs6000_reg_parm_stack_space (tree fun, bool incoming)
13162 {
13163 int reg_parm_stack_space;
13164
13165 switch (DEFAULT_ABI)
13166 {
13167 default:
13168 reg_parm_stack_space = 0;
13169 break;
13170
13171 case ABI_AIX:
13172 case ABI_DARWIN:
13173 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13174 break;
13175
13176 case ABI_ELFv2:
13177 /* ??? Recomputing this every time is a bit expensive. Is there
13178 a place to cache this information? */
13179 if (rs6000_function_parms_need_stack (fun, incoming))
13180 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
13181 else
13182 reg_parm_stack_space = 0;
13183 break;
13184 }
13185
13186 return reg_parm_stack_space;
13187 }
13188
13189 static void
13190 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
13191 {
13192 int i;
13193 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
13194
13195 if (nregs == 0)
13196 return;
13197
13198 for (i = 0; i < nregs; i++)
13199 {
13200 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
13201 if (reload_completed)
13202 {
13203 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
13204 tem = NULL_RTX;
13205 else
13206 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
13207 i * GET_MODE_SIZE (reg_mode));
13208 }
13209 else
13210 tem = replace_equiv_address (tem, XEXP (tem, 0));
13211
13212 gcc_assert (tem);
13213
13214 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
13215 }
13216 }
13217 \f
13218 /* Perform any needed actions needed for a function that is receiving a
13219 variable number of arguments.
13220
13221 CUM is as above.
13222
13223 MODE and TYPE are the mode and type of the current parameter.
13224
13225 PRETEND_SIZE is a variable that should be set to the amount of stack
13226 that must be pushed by the prolog to pretend that our caller pushed
13227 it.
13228
13229 Normally, this macro will push all remaining incoming registers on the
13230 stack and set PRETEND_SIZE to the length of the registers pushed. */
13231
13232 static void
13233 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
13234 tree type, int *pretend_size ATTRIBUTE_UNUSED,
13235 int no_rtl)
13236 {
13237 CUMULATIVE_ARGS next_cum;
13238 int reg_size = TARGET_32BIT ? 4 : 8;
13239 rtx save_area = NULL_RTX, mem;
13240 int first_reg_offset;
13241 alias_set_type set;
13242
13243 /* Skip the last named argument. */
13244 next_cum = *get_cumulative_args (cum);
13245 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
13246
13247 if (DEFAULT_ABI == ABI_V4)
13248 {
13249 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
13250
13251 if (! no_rtl)
13252 {
13253 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
13254 HOST_WIDE_INT offset = 0;
13255
13256 /* Try to optimize the size of the varargs save area.
13257 The ABI requires that ap.reg_save_area is doubleword
13258 aligned, but we don't need to allocate space for all
13259 the bytes, only those to which we actually will save
13260 anything. */
13261 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
13262 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
13263 if (TARGET_HARD_FLOAT
13264 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13265 && cfun->va_list_fpr_size)
13266 {
13267 if (gpr_reg_num)
13268 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13269 * UNITS_PER_FP_WORD;
13270 if (cfun->va_list_fpr_size
13271 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13272 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13273 else
13274 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13275 * UNITS_PER_FP_WORD;
13276 }
13277 if (gpr_reg_num)
13278 {
13279 offset = -((first_reg_offset * reg_size) & ~7);
13280 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13281 {
13282 gpr_reg_num = cfun->va_list_gpr_size;
13283 if (reg_size == 4 && (first_reg_offset & 1))
13284 gpr_reg_num++;
13285 }
13286 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13287 }
13288 else if (fpr_size)
13289 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13290 * UNITS_PER_FP_WORD
13291 - (int) (GP_ARG_NUM_REG * reg_size);
13292
13293 if (gpr_size + fpr_size)
13294 {
13295 rtx reg_save_area
13296 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13297 gcc_assert (GET_CODE (reg_save_area) == MEM);
13298 reg_save_area = XEXP (reg_save_area, 0);
13299 if (GET_CODE (reg_save_area) == PLUS)
13300 {
13301 gcc_assert (XEXP (reg_save_area, 0)
13302 == virtual_stack_vars_rtx);
13303 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13304 offset += INTVAL (XEXP (reg_save_area, 1));
13305 }
13306 else
13307 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13308 }
13309
13310 cfun->machine->varargs_save_offset = offset;
13311 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13312 }
13313 }
13314 else
13315 {
13316 first_reg_offset = next_cum.words;
13317 save_area = crtl->args.internal_arg_pointer;
13318
13319 if (targetm.calls.must_pass_in_stack (mode, type))
13320 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13321 }
13322
13323 set = get_varargs_alias_set ();
13324 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13325 && cfun->va_list_gpr_size)
13326 {
13327 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13328
13329 if (va_list_gpr_counter_field)
13330 /* V4 va_list_gpr_size counts number of registers needed. */
13331 n_gpr = cfun->va_list_gpr_size;
13332 else
13333 /* char * va_list instead counts number of bytes needed. */
13334 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13335
13336 if (nregs > n_gpr)
13337 nregs = n_gpr;
13338
13339 mem = gen_rtx_MEM (BLKmode,
13340 plus_constant (Pmode, save_area,
13341 first_reg_offset * reg_size));
13342 MEM_NOTRAP_P (mem) = 1;
13343 set_mem_alias_set (mem, set);
13344 set_mem_align (mem, BITS_PER_WORD);
13345
13346 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13347 nregs);
13348 }
13349
13350 /* Save FP registers if needed. */
13351 if (DEFAULT_ABI == ABI_V4
13352 && TARGET_HARD_FLOAT
13353 && ! no_rtl
13354 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13355 && cfun->va_list_fpr_size)
13356 {
13357 int fregno = next_cum.fregno, nregs;
13358 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13359 rtx lab = gen_label_rtx ();
13360 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13361 * UNITS_PER_FP_WORD);
13362
13363 emit_jump_insn
13364 (gen_rtx_SET (pc_rtx,
13365 gen_rtx_IF_THEN_ELSE (VOIDmode,
13366 gen_rtx_NE (VOIDmode, cr1,
13367 const0_rtx),
13368 gen_rtx_LABEL_REF (VOIDmode, lab),
13369 pc_rtx)));
13370
13371 for (nregs = 0;
13372 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13373 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13374 {
13375 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13376 ? DFmode : SFmode,
13377 plus_constant (Pmode, save_area, off));
13378 MEM_NOTRAP_P (mem) = 1;
13379 set_mem_alias_set (mem, set);
13380 set_mem_align (mem, GET_MODE_ALIGNMENT (
13381 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13382 ? DFmode : SFmode));
13383 emit_move_insn (mem, gen_rtx_REG (
13384 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13385 ? DFmode : SFmode, fregno));
13386 }
13387
13388 emit_label (lab);
13389 }
13390 }
13391
13392 /* Create the va_list data type. */
13393
13394 static tree
13395 rs6000_build_builtin_va_list (void)
13396 {
13397 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13398
13399 /* For AIX, prefer 'char *' because that's what the system
13400 header files like. */
13401 if (DEFAULT_ABI != ABI_V4)
13402 return build_pointer_type (char_type_node);
13403
13404 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13405 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13406 get_identifier ("__va_list_tag"), record);
13407
13408 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13409 unsigned_char_type_node);
13410 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13411 unsigned_char_type_node);
13412 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13413 every user file. */
13414 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13415 get_identifier ("reserved"), short_unsigned_type_node);
13416 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13417 get_identifier ("overflow_arg_area"),
13418 ptr_type_node);
13419 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13420 get_identifier ("reg_save_area"),
13421 ptr_type_node);
13422
13423 va_list_gpr_counter_field = f_gpr;
13424 va_list_fpr_counter_field = f_fpr;
13425
13426 DECL_FIELD_CONTEXT (f_gpr) = record;
13427 DECL_FIELD_CONTEXT (f_fpr) = record;
13428 DECL_FIELD_CONTEXT (f_res) = record;
13429 DECL_FIELD_CONTEXT (f_ovf) = record;
13430 DECL_FIELD_CONTEXT (f_sav) = record;
13431
13432 TYPE_STUB_DECL (record) = type_decl;
13433 TYPE_NAME (record) = type_decl;
13434 TYPE_FIELDS (record) = f_gpr;
13435 DECL_CHAIN (f_gpr) = f_fpr;
13436 DECL_CHAIN (f_fpr) = f_res;
13437 DECL_CHAIN (f_res) = f_ovf;
13438 DECL_CHAIN (f_ovf) = f_sav;
13439
13440 layout_type (record);
13441
13442 /* The correct type is an array type of one element. */
13443 return build_array_type (record, build_index_type (size_zero_node));
13444 }
13445
13446 /* Implement va_start. */
13447
13448 static void
13449 rs6000_va_start (tree valist, rtx nextarg)
13450 {
13451 HOST_WIDE_INT words, n_gpr, n_fpr;
13452 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13453 tree gpr, fpr, ovf, sav, t;
13454
13455 /* Only SVR4 needs something special. */
13456 if (DEFAULT_ABI != ABI_V4)
13457 {
13458 std_expand_builtin_va_start (valist, nextarg);
13459 return;
13460 }
13461
13462 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13463 f_fpr = DECL_CHAIN (f_gpr);
13464 f_res = DECL_CHAIN (f_fpr);
13465 f_ovf = DECL_CHAIN (f_res);
13466 f_sav = DECL_CHAIN (f_ovf);
13467
13468 valist = build_simple_mem_ref (valist);
13469 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13470 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13471 f_fpr, NULL_TREE);
13472 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13473 f_ovf, NULL_TREE);
13474 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13475 f_sav, NULL_TREE);
13476
13477 /* Count number of gp and fp argument registers used. */
13478 words = crtl->args.info.words;
13479 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13480 GP_ARG_NUM_REG);
13481 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13482 FP_ARG_NUM_REG);
13483
13484 if (TARGET_DEBUG_ARG)
13485 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13486 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13487 words, n_gpr, n_fpr);
13488
13489 if (cfun->va_list_gpr_size)
13490 {
13491 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13492 build_int_cst (NULL_TREE, n_gpr));
13493 TREE_SIDE_EFFECTS (t) = 1;
13494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13495 }
13496
13497 if (cfun->va_list_fpr_size)
13498 {
13499 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13500 build_int_cst (NULL_TREE, n_fpr));
13501 TREE_SIDE_EFFECTS (t) = 1;
13502 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13503
13504 #ifdef HAVE_AS_GNU_ATTRIBUTE
13505 if (call_ABI_of_interest (cfun->decl))
13506 rs6000_passes_float = true;
13507 #endif
13508 }
13509
13510 /* Find the overflow area. */
13511 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13512 if (words != 0)
13513 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13514 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13515 TREE_SIDE_EFFECTS (t) = 1;
13516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13517
13518 /* If there were no va_arg invocations, don't set up the register
13519 save area. */
13520 if (!cfun->va_list_gpr_size
13521 && !cfun->va_list_fpr_size
13522 && n_gpr < GP_ARG_NUM_REG
13523 && n_fpr < FP_ARG_V4_MAX_REG)
13524 return;
13525
13526 /* Find the register save area. */
13527 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13528 if (cfun->machine->varargs_save_offset)
13529 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13530 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13531 TREE_SIDE_EFFECTS (t) = 1;
13532 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13533 }
13534
13535 /* Implement va_arg. */
13536
13537 static tree
13538 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13539 gimple_seq *post_p)
13540 {
13541 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13542 tree gpr, fpr, ovf, sav, reg, t, u;
13543 int size, rsize, n_reg, sav_ofs, sav_scale;
13544 tree lab_false, lab_over, addr;
13545 int align;
13546 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13547 int regalign = 0;
13548 gimple *stmt;
13549
13550 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13551 {
13552 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13553 return build_va_arg_indirect_ref (t);
13554 }
13555
13556 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13557 earlier version of gcc, with the property that it always applied alignment
13558 adjustments to the va-args (even for zero-sized types). The cheapest way
13559 to deal with this is to replicate the effect of the part of
13560 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13561 of relevance.
13562 We don't need to check for pass-by-reference because of the test above.
13563 We can return a simplifed answer, since we know there's no offset to add. */
13564
13565 if (((TARGET_MACHO
13566 && rs6000_darwin64_abi)
13567 || DEFAULT_ABI == ABI_ELFv2
13568 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13569 && integer_zerop (TYPE_SIZE (type)))
13570 {
13571 unsigned HOST_WIDE_INT align, boundary;
13572 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13573 align = PARM_BOUNDARY / BITS_PER_UNIT;
13574 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13575 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13576 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13577 boundary /= BITS_PER_UNIT;
13578 if (boundary > align)
13579 {
13580 tree t ;
13581 /* This updates arg ptr by the amount that would be necessary
13582 to align the zero-sized (but not zero-alignment) item. */
13583 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13584 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13585 gimplify_and_add (t, pre_p);
13586
13587 t = fold_convert (sizetype, valist_tmp);
13588 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13589 fold_convert (TREE_TYPE (valist),
13590 fold_build2 (BIT_AND_EXPR, sizetype, t,
13591 size_int (-boundary))));
13592 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13593 gimplify_and_add (t, pre_p);
13594 }
13595 /* Since it is zero-sized there's no increment for the item itself. */
13596 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13597 return build_va_arg_indirect_ref (valist_tmp);
13598 }
13599
13600 if (DEFAULT_ABI != ABI_V4)
13601 {
13602 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13603 {
13604 tree elem_type = TREE_TYPE (type);
13605 machine_mode elem_mode = TYPE_MODE (elem_type);
13606 int elem_size = GET_MODE_SIZE (elem_mode);
13607
13608 if (elem_size < UNITS_PER_WORD)
13609 {
13610 tree real_part, imag_part;
13611 gimple_seq post = NULL;
13612
13613 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13614 &post);
13615 /* Copy the value into a temporary, lest the formal temporary
13616 be reused out from under us. */
13617 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13618 gimple_seq_add_seq (pre_p, post);
13619
13620 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13621 post_p);
13622
13623 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13624 }
13625 }
13626
13627 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13628 }
13629
13630 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13631 f_fpr = DECL_CHAIN (f_gpr);
13632 f_res = DECL_CHAIN (f_fpr);
13633 f_ovf = DECL_CHAIN (f_res);
13634 f_sav = DECL_CHAIN (f_ovf);
13635
13636 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13637 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13638 f_fpr, NULL_TREE);
13639 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13640 f_ovf, NULL_TREE);
13641 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13642 f_sav, NULL_TREE);
13643
13644 size = int_size_in_bytes (type);
13645 rsize = (size + 3) / 4;
13646 int pad = 4 * rsize - size;
13647 align = 1;
13648
13649 machine_mode mode = TYPE_MODE (type);
13650 if (abi_v4_pass_in_fpr (mode))
13651 {
13652 /* FP args go in FP registers, if present. */
13653 reg = fpr;
13654 n_reg = (size + 7) / 8;
13655 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13656 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13657 if (mode != SFmode && mode != SDmode)
13658 align = 8;
13659 }
13660 else
13661 {
13662 /* Otherwise into GP registers. */
13663 reg = gpr;
13664 n_reg = rsize;
13665 sav_ofs = 0;
13666 sav_scale = 4;
13667 if (n_reg == 2)
13668 align = 8;
13669 }
13670
13671 /* Pull the value out of the saved registers.... */
13672
13673 lab_over = NULL;
13674 addr = create_tmp_var (ptr_type_node, "addr");
13675
13676 /* AltiVec vectors never go in registers when -mabi=altivec. */
13677 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13678 align = 16;
13679 else
13680 {
13681 lab_false = create_artificial_label (input_location);
13682 lab_over = create_artificial_label (input_location);
13683
13684 /* Long long is aligned in the registers. As are any other 2 gpr
13685 item such as complex int due to a historical mistake. */
13686 u = reg;
13687 if (n_reg == 2 && reg == gpr)
13688 {
13689 regalign = 1;
13690 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13691 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13692 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13693 unshare_expr (reg), u);
13694 }
13695 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13696 reg number is 0 for f1, so we want to make it odd. */
13697 else if (reg == fpr && mode == TDmode)
13698 {
13699 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13700 build_int_cst (TREE_TYPE (reg), 1));
13701 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13702 }
13703
13704 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13705 t = build2 (GE_EXPR, boolean_type_node, u, t);
13706 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13707 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13708 gimplify_and_add (t, pre_p);
13709
13710 t = sav;
13711 if (sav_ofs)
13712 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13713
13714 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13715 build_int_cst (TREE_TYPE (reg), n_reg));
13716 u = fold_convert (sizetype, u);
13717 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13718 t = fold_build_pointer_plus (t, u);
13719
13720 /* _Decimal32 varargs are located in the second word of the 64-bit
13721 FP register for 32-bit binaries. */
13722 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13723 t = fold_build_pointer_plus_hwi (t, size);
13724
13725 /* Args are passed right-aligned. */
13726 if (BYTES_BIG_ENDIAN)
13727 t = fold_build_pointer_plus_hwi (t, pad);
13728
13729 gimplify_assign (addr, t, pre_p);
13730
13731 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13732
13733 stmt = gimple_build_label (lab_false);
13734 gimple_seq_add_stmt (pre_p, stmt);
13735
13736 if ((n_reg == 2 && !regalign) || n_reg > 2)
13737 {
13738 /* Ensure that we don't find any more args in regs.
13739 Alignment has taken care of for special cases. */
13740 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13741 }
13742 }
13743
13744 /* ... otherwise out of the overflow area. */
13745
13746 /* Care for on-stack alignment if needed. */
13747 t = ovf;
13748 if (align != 1)
13749 {
13750 t = fold_build_pointer_plus_hwi (t, align - 1);
13751 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13752 build_int_cst (TREE_TYPE (t), -align));
13753 }
13754
13755 /* Args are passed right-aligned. */
13756 if (BYTES_BIG_ENDIAN)
13757 t = fold_build_pointer_plus_hwi (t, pad);
13758
13759 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13760
13761 gimplify_assign (unshare_expr (addr), t, pre_p);
13762
13763 t = fold_build_pointer_plus_hwi (t, size);
13764 gimplify_assign (unshare_expr (ovf), t, pre_p);
13765
13766 if (lab_over)
13767 {
13768 stmt = gimple_build_label (lab_over);
13769 gimple_seq_add_stmt (pre_p, stmt);
13770 }
13771
13772 if (STRICT_ALIGNMENT
13773 && (TYPE_ALIGN (type)
13774 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13775 {
13776 /* The value (of type complex double, for example) may not be
13777 aligned in memory in the saved registers, so copy via a
13778 temporary. (This is the same code as used for SPARC.) */
13779 tree tmp = create_tmp_var (type, "va_arg_tmp");
13780 tree dest_addr = build_fold_addr_expr (tmp);
13781
13782 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13783 3, dest_addr, addr, size_int (rsize * 4));
13784
13785 gimplify_and_add (copy, pre_p);
13786 addr = dest_addr;
13787 }
13788
13789 addr = fold_convert (ptrtype, addr);
13790 return build_va_arg_indirect_ref (addr);
13791 }
13792
13793 /* Builtins. */
13794
13795 static void
13796 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13797 {
13798 tree t;
13799 unsigned classify = rs6000_builtin_info[(int)code].attr;
13800 const char *attr_string = "";
13801
13802 gcc_assert (name != NULL);
13803 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13804
13805 if (rs6000_builtin_decls[(int)code])
13806 fatal_error (input_location,
13807 "internal error: builtin function %s already processed", name);
13808
13809 rs6000_builtin_decls[(int)code] = t =
13810 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13811
13812 /* Set any special attributes. */
13813 if ((classify & RS6000_BTC_CONST) != 0)
13814 {
13815 /* const function, function only depends on the inputs. */
13816 TREE_READONLY (t) = 1;
13817 TREE_NOTHROW (t) = 1;
13818 attr_string = ", const";
13819 }
13820 else if ((classify & RS6000_BTC_PURE) != 0)
13821 {
13822 /* pure function, function can read global memory, but does not set any
13823 external state. */
13824 DECL_PURE_P (t) = 1;
13825 TREE_NOTHROW (t) = 1;
13826 attr_string = ", pure";
13827 }
13828 else if ((classify & RS6000_BTC_FP) != 0)
13829 {
13830 /* Function is a math function. If rounding mode is on, then treat the
13831 function as not reading global memory, but it can have arbitrary side
13832 effects. If it is off, then assume the function is a const function.
13833 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13834 builtin-attribute.def that is used for the math functions. */
13835 TREE_NOTHROW (t) = 1;
13836 if (flag_rounding_math)
13837 {
13838 DECL_PURE_P (t) = 1;
13839 DECL_IS_NOVOPS (t) = 1;
13840 attr_string = ", fp, pure";
13841 }
13842 else
13843 {
13844 TREE_READONLY (t) = 1;
13845 attr_string = ", fp, const";
13846 }
13847 }
13848 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13849 gcc_unreachable ();
13850
13851 if (TARGET_DEBUG_BUILTIN)
13852 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13853 (int)code, name, attr_string);
13854 }
13855
13856 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13857
13858 #undef RS6000_BUILTIN_0
13859 #undef RS6000_BUILTIN_1
13860 #undef RS6000_BUILTIN_2
13861 #undef RS6000_BUILTIN_3
13862 #undef RS6000_BUILTIN_A
13863 #undef RS6000_BUILTIN_D
13864 #undef RS6000_BUILTIN_H
13865 #undef RS6000_BUILTIN_P
13866 #undef RS6000_BUILTIN_Q
13867 #undef RS6000_BUILTIN_X
13868
13869 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13870 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13871 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13872 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13873 { MASK, ICODE, NAME, ENUM },
13874
13875 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13876 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13877 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13878 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13879 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13880 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13881
13882 static const struct builtin_description bdesc_3arg[] =
13883 {
13884 #include "rs6000-builtin.def"
13885 };
13886
13887 /* DST operations: void foo (void *, const int, const char). */
13888
13889 #undef RS6000_BUILTIN_0
13890 #undef RS6000_BUILTIN_1
13891 #undef RS6000_BUILTIN_2
13892 #undef RS6000_BUILTIN_3
13893 #undef RS6000_BUILTIN_A
13894 #undef RS6000_BUILTIN_D
13895 #undef RS6000_BUILTIN_H
13896 #undef RS6000_BUILTIN_P
13897 #undef RS6000_BUILTIN_Q
13898 #undef RS6000_BUILTIN_X
13899
13900 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13901 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13902 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13903 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13904 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13905 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13906 { MASK, ICODE, NAME, ENUM },
13907
13908 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13909 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13910 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13912
13913 static const struct builtin_description bdesc_dst[] =
13914 {
13915 #include "rs6000-builtin.def"
13916 };
13917
13918 /* Simple binary operations: VECc = foo (VECa, VECb). */
13919
13920 #undef RS6000_BUILTIN_0
13921 #undef RS6000_BUILTIN_1
13922 #undef RS6000_BUILTIN_2
13923 #undef RS6000_BUILTIN_3
13924 #undef RS6000_BUILTIN_A
13925 #undef RS6000_BUILTIN_D
13926 #undef RS6000_BUILTIN_H
13927 #undef RS6000_BUILTIN_P
13928 #undef RS6000_BUILTIN_Q
13929 #undef RS6000_BUILTIN_X
13930
13931 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13932 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13933 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13934 { MASK, ICODE, NAME, ENUM },
13935
13936 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13937 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13938 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13939 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13940 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13941 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13942 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13943
13944 static const struct builtin_description bdesc_2arg[] =
13945 {
13946 #include "rs6000-builtin.def"
13947 };
13948
13949 #undef RS6000_BUILTIN_0
13950 #undef RS6000_BUILTIN_1
13951 #undef RS6000_BUILTIN_2
13952 #undef RS6000_BUILTIN_3
13953 #undef RS6000_BUILTIN_A
13954 #undef RS6000_BUILTIN_D
13955 #undef RS6000_BUILTIN_H
13956 #undef RS6000_BUILTIN_P
13957 #undef RS6000_BUILTIN_Q
13958 #undef RS6000_BUILTIN_X
13959
13960 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13961 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13962 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13963 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13964 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13965 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13966 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13967 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13968 { MASK, ICODE, NAME, ENUM },
13969
13970 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13971 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13972
13973 /* AltiVec predicates. */
13974
13975 static const struct builtin_description bdesc_altivec_preds[] =
13976 {
13977 #include "rs6000-builtin.def"
13978 };
13979
13980 /* PAIRED predicates. */
13981 #undef RS6000_BUILTIN_0
13982 #undef RS6000_BUILTIN_1
13983 #undef RS6000_BUILTIN_2
13984 #undef RS6000_BUILTIN_3
13985 #undef RS6000_BUILTIN_A
13986 #undef RS6000_BUILTIN_D
13987 #undef RS6000_BUILTIN_H
13988 #undef RS6000_BUILTIN_P
13989 #undef RS6000_BUILTIN_Q
13990 #undef RS6000_BUILTIN_X
13991
13992 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13993 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13994 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13995 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13996 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13997 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13998 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13999 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14000 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
14001 { MASK, ICODE, NAME, ENUM },
14002
14003 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14004
14005 static const struct builtin_description bdesc_paired_preds[] =
14006 {
14007 #include "rs6000-builtin.def"
14008 };
14009
14010 /* ABS* operations. */
14011
14012 #undef RS6000_BUILTIN_0
14013 #undef RS6000_BUILTIN_1
14014 #undef RS6000_BUILTIN_2
14015 #undef RS6000_BUILTIN_3
14016 #undef RS6000_BUILTIN_A
14017 #undef RS6000_BUILTIN_D
14018 #undef RS6000_BUILTIN_H
14019 #undef RS6000_BUILTIN_P
14020 #undef RS6000_BUILTIN_Q
14021 #undef RS6000_BUILTIN_X
14022
14023 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14024 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14025 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14026 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14027 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
14028 { MASK, ICODE, NAME, ENUM },
14029
14030 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14031 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14032 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14033 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14034 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14035
14036 static const struct builtin_description bdesc_abs[] =
14037 {
14038 #include "rs6000-builtin.def"
14039 };
14040
14041 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
14042 foo (VECa). */
14043
14044 #undef RS6000_BUILTIN_0
14045 #undef RS6000_BUILTIN_1
14046 #undef RS6000_BUILTIN_2
14047 #undef RS6000_BUILTIN_3
14048 #undef RS6000_BUILTIN_A
14049 #undef RS6000_BUILTIN_D
14050 #undef RS6000_BUILTIN_H
14051 #undef RS6000_BUILTIN_P
14052 #undef RS6000_BUILTIN_Q
14053 #undef RS6000_BUILTIN_X
14054
14055 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14056 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
14057 { MASK, ICODE, NAME, ENUM },
14058
14059 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14060 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14061 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14062 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14063 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14064 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14065 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14066 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14067
14068 static const struct builtin_description bdesc_1arg[] =
14069 {
14070 #include "rs6000-builtin.def"
14071 };
14072
14073 /* Simple no-argument operations: result = __builtin_darn_32 () */
14074
14075 #undef RS6000_BUILTIN_0
14076 #undef RS6000_BUILTIN_1
14077 #undef RS6000_BUILTIN_2
14078 #undef RS6000_BUILTIN_3
14079 #undef RS6000_BUILTIN_A
14080 #undef RS6000_BUILTIN_D
14081 #undef RS6000_BUILTIN_H
14082 #undef RS6000_BUILTIN_P
14083 #undef RS6000_BUILTIN_Q
14084 #undef RS6000_BUILTIN_X
14085
14086 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
14087 { MASK, ICODE, NAME, ENUM },
14088
14089 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14090 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14091 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14092 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14093 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14094 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
14095 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14096 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14097 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14098
14099 static const struct builtin_description bdesc_0arg[] =
14100 {
14101 #include "rs6000-builtin.def"
14102 };
14103
14104 /* HTM builtins. */
14105 #undef RS6000_BUILTIN_0
14106 #undef RS6000_BUILTIN_1
14107 #undef RS6000_BUILTIN_2
14108 #undef RS6000_BUILTIN_3
14109 #undef RS6000_BUILTIN_A
14110 #undef RS6000_BUILTIN_D
14111 #undef RS6000_BUILTIN_H
14112 #undef RS6000_BUILTIN_P
14113 #undef RS6000_BUILTIN_Q
14114 #undef RS6000_BUILTIN_X
14115
14116 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
14117 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
14118 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
14119 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
14120 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
14121 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
14122 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
14123 { MASK, ICODE, NAME, ENUM },
14124
14125 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
14126 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
14127 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
14128
14129 static const struct builtin_description bdesc_htm[] =
14130 {
14131 #include "rs6000-builtin.def"
14132 };
14133
14134 #undef RS6000_BUILTIN_0
14135 #undef RS6000_BUILTIN_1
14136 #undef RS6000_BUILTIN_2
14137 #undef RS6000_BUILTIN_3
14138 #undef RS6000_BUILTIN_A
14139 #undef RS6000_BUILTIN_D
14140 #undef RS6000_BUILTIN_H
14141 #undef RS6000_BUILTIN_P
14142 #undef RS6000_BUILTIN_Q
14143
14144 /* Return true if a builtin function is overloaded. */
14145 bool
14146 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
14147 {
14148 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
14149 }
14150
14151 const char *
14152 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
14153 {
14154 return rs6000_builtin_info[(int)fncode].name;
14155 }
14156
14157 /* Expand an expression EXP that calls a builtin without arguments. */
14158 static rtx
14159 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
14160 {
14161 rtx pat;
14162 machine_mode tmode = insn_data[icode].operand[0].mode;
14163
14164 if (icode == CODE_FOR_nothing)
14165 /* Builtin not supported on this processor. */
14166 return 0;
14167
14168 if (target == 0
14169 || GET_MODE (target) != tmode
14170 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14171 target = gen_reg_rtx (tmode);
14172
14173 pat = GEN_FCN (icode) (target);
14174 if (! pat)
14175 return 0;
14176 emit_insn (pat);
14177
14178 return target;
14179 }
14180
14181
14182 static rtx
14183 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
14184 {
14185 rtx pat;
14186 tree arg0 = CALL_EXPR_ARG (exp, 0);
14187 tree arg1 = CALL_EXPR_ARG (exp, 1);
14188 rtx op0 = expand_normal (arg0);
14189 rtx op1 = expand_normal (arg1);
14190 machine_mode mode0 = insn_data[icode].operand[0].mode;
14191 machine_mode mode1 = insn_data[icode].operand[1].mode;
14192
14193 if (icode == CODE_FOR_nothing)
14194 /* Builtin not supported on this processor. */
14195 return 0;
14196
14197 /* If we got invalid arguments bail out before generating bad rtl. */
14198 if (arg0 == error_mark_node || arg1 == error_mark_node)
14199 return const0_rtx;
14200
14201 if (GET_CODE (op0) != CONST_INT
14202 || INTVAL (op0) > 255
14203 || INTVAL (op0) < 0)
14204 {
14205 error ("argument 1 must be an 8-bit field value");
14206 return const0_rtx;
14207 }
14208
14209 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14210 op0 = copy_to_mode_reg (mode0, op0);
14211
14212 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14213 op1 = copy_to_mode_reg (mode1, op1);
14214
14215 pat = GEN_FCN (icode) (op0, op1);
14216 if (! pat)
14217 return const0_rtx;
14218 emit_insn (pat);
14219
14220 return NULL_RTX;
14221 }
14222
14223 static rtx
14224 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
14225 {
14226 rtx pat;
14227 tree arg0 = CALL_EXPR_ARG (exp, 0);
14228 rtx op0 = expand_normal (arg0);
14229 machine_mode tmode = insn_data[icode].operand[0].mode;
14230 machine_mode mode0 = insn_data[icode].operand[1].mode;
14231
14232 if (icode == CODE_FOR_nothing)
14233 /* Builtin not supported on this processor. */
14234 return 0;
14235
14236 /* If we got invalid arguments bail out before generating bad rtl. */
14237 if (arg0 == error_mark_node)
14238 return const0_rtx;
14239
14240 if (icode == CODE_FOR_altivec_vspltisb
14241 || icode == CODE_FOR_altivec_vspltish
14242 || icode == CODE_FOR_altivec_vspltisw)
14243 {
14244 /* Only allow 5-bit *signed* literals. */
14245 if (GET_CODE (op0) != CONST_INT
14246 || INTVAL (op0) > 15
14247 || INTVAL (op0) < -16)
14248 {
14249 error ("argument 1 must be a 5-bit signed literal");
14250 return CONST0_RTX (tmode);
14251 }
14252 }
14253
14254 if (target == 0
14255 || GET_MODE (target) != tmode
14256 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14257 target = gen_reg_rtx (tmode);
14258
14259 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14260 op0 = copy_to_mode_reg (mode0, op0);
14261
14262 pat = GEN_FCN (icode) (target, op0);
14263 if (! pat)
14264 return 0;
14265 emit_insn (pat);
14266
14267 return target;
14268 }
14269
14270 static rtx
14271 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14272 {
14273 rtx pat, scratch1, scratch2;
14274 tree arg0 = CALL_EXPR_ARG (exp, 0);
14275 rtx op0 = expand_normal (arg0);
14276 machine_mode tmode = insn_data[icode].operand[0].mode;
14277 machine_mode mode0 = insn_data[icode].operand[1].mode;
14278
14279 /* If we have invalid arguments, bail out before generating bad rtl. */
14280 if (arg0 == error_mark_node)
14281 return const0_rtx;
14282
14283 if (target == 0
14284 || GET_MODE (target) != tmode
14285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14286 target = gen_reg_rtx (tmode);
14287
14288 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14289 op0 = copy_to_mode_reg (mode0, op0);
14290
14291 scratch1 = gen_reg_rtx (mode0);
14292 scratch2 = gen_reg_rtx (mode0);
14293
14294 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14295 if (! pat)
14296 return 0;
14297 emit_insn (pat);
14298
14299 return target;
14300 }
14301
14302 static rtx
14303 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14304 {
14305 rtx pat;
14306 tree arg0 = CALL_EXPR_ARG (exp, 0);
14307 tree arg1 = CALL_EXPR_ARG (exp, 1);
14308 rtx op0 = expand_normal (arg0);
14309 rtx op1 = expand_normal (arg1);
14310 machine_mode tmode = insn_data[icode].operand[0].mode;
14311 machine_mode mode0 = insn_data[icode].operand[1].mode;
14312 machine_mode mode1 = insn_data[icode].operand[2].mode;
14313
14314 if (icode == CODE_FOR_nothing)
14315 /* Builtin not supported on this processor. */
14316 return 0;
14317
14318 /* If we got invalid arguments bail out before generating bad rtl. */
14319 if (arg0 == error_mark_node || arg1 == error_mark_node)
14320 return const0_rtx;
14321
14322 if (icode == CODE_FOR_altivec_vcfux
14323 || icode == CODE_FOR_altivec_vcfsx
14324 || icode == CODE_FOR_altivec_vctsxs
14325 || icode == CODE_FOR_altivec_vctuxs
14326 || icode == CODE_FOR_altivec_vspltb
14327 || icode == CODE_FOR_altivec_vsplth
14328 || icode == CODE_FOR_altivec_vspltw)
14329 {
14330 /* Only allow 5-bit unsigned literals. */
14331 STRIP_NOPS (arg1);
14332 if (TREE_CODE (arg1) != INTEGER_CST
14333 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14334 {
14335 error ("argument 2 must be a 5-bit unsigned literal");
14336 return CONST0_RTX (tmode);
14337 }
14338 }
14339 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14340 || icode == CODE_FOR_dfptstsfi_lt_dd
14341 || icode == CODE_FOR_dfptstsfi_gt_dd
14342 || icode == CODE_FOR_dfptstsfi_unordered_dd
14343 || icode == CODE_FOR_dfptstsfi_eq_td
14344 || icode == CODE_FOR_dfptstsfi_lt_td
14345 || icode == CODE_FOR_dfptstsfi_gt_td
14346 || icode == CODE_FOR_dfptstsfi_unordered_td)
14347 {
14348 /* Only allow 6-bit unsigned literals. */
14349 STRIP_NOPS (arg0);
14350 if (TREE_CODE (arg0) != INTEGER_CST
14351 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14352 {
14353 error ("argument 1 must be a 6-bit unsigned literal");
14354 return CONST0_RTX (tmode);
14355 }
14356 }
14357 else if (icode == CODE_FOR_xststdcdp
14358 || icode == CODE_FOR_xststdcsp
14359 || icode == CODE_FOR_xvtstdcdp
14360 || icode == CODE_FOR_xvtstdcsp)
14361 {
14362 /* Only allow 7-bit unsigned literals. */
14363 STRIP_NOPS (arg1);
14364 if (TREE_CODE (arg1) != INTEGER_CST
14365 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14366 {
14367 error ("argument 2 must be a 7-bit unsigned literal");
14368 return CONST0_RTX (tmode);
14369 }
14370 }
14371
14372 if (target == 0
14373 || GET_MODE (target) != tmode
14374 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14375 target = gen_reg_rtx (tmode);
14376
14377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14378 op0 = copy_to_mode_reg (mode0, op0);
14379 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14380 op1 = copy_to_mode_reg (mode1, op1);
14381
14382 pat = GEN_FCN (icode) (target, op0, op1);
14383 if (! pat)
14384 return 0;
14385 emit_insn (pat);
14386
14387 return target;
14388 }
14389
14390 static rtx
14391 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14392 {
14393 rtx pat, scratch;
14394 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14395 tree arg0 = CALL_EXPR_ARG (exp, 1);
14396 tree arg1 = CALL_EXPR_ARG (exp, 2);
14397 rtx op0 = expand_normal (arg0);
14398 rtx op1 = expand_normal (arg1);
14399 machine_mode tmode = SImode;
14400 machine_mode mode0 = insn_data[icode].operand[1].mode;
14401 machine_mode mode1 = insn_data[icode].operand[2].mode;
14402 int cr6_form_int;
14403
14404 if (TREE_CODE (cr6_form) != INTEGER_CST)
14405 {
14406 error ("argument 1 of __builtin_altivec_predicate must be a constant");
14407 return const0_rtx;
14408 }
14409 else
14410 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14411
14412 gcc_assert (mode0 == mode1);
14413
14414 /* If we have invalid arguments, bail out before generating bad rtl. */
14415 if (arg0 == error_mark_node || arg1 == error_mark_node)
14416 return const0_rtx;
14417
14418 if (target == 0
14419 || GET_MODE (target) != tmode
14420 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14421 target = gen_reg_rtx (tmode);
14422
14423 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14424 op0 = copy_to_mode_reg (mode0, op0);
14425 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14426 op1 = copy_to_mode_reg (mode1, op1);
14427
14428 /* Note that for many of the relevant operations (e.g. cmpne or
14429 cmpeq) with float or double operands, it makes more sense for the
14430 mode of the allocated scratch register to select a vector of
14431 integer. But the choice to copy the mode of operand 0 was made
14432 long ago and there are no plans to change it. */
14433 scratch = gen_reg_rtx (mode0);
14434
14435 pat = GEN_FCN (icode) (scratch, op0, op1);
14436 if (! pat)
14437 return 0;
14438 emit_insn (pat);
14439
14440 /* The vec_any* and vec_all* predicates use the same opcodes for two
14441 different operations, but the bits in CR6 will be different
14442 depending on what information we want. So we have to play tricks
14443 with CR6 to get the right bits out.
14444
14445 If you think this is disgusting, look at the specs for the
14446 AltiVec predicates. */
14447
14448 switch (cr6_form_int)
14449 {
14450 case 0:
14451 emit_insn (gen_cr6_test_for_zero (target));
14452 break;
14453 case 1:
14454 emit_insn (gen_cr6_test_for_zero_reverse (target));
14455 break;
14456 case 2:
14457 emit_insn (gen_cr6_test_for_lt (target));
14458 break;
14459 case 3:
14460 emit_insn (gen_cr6_test_for_lt_reverse (target));
14461 break;
14462 default:
14463 error ("argument 1 of __builtin_altivec_predicate is out of range");
14464 break;
14465 }
14466
14467 return target;
14468 }
14469
14470 static rtx
14471 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14472 {
14473 rtx pat, addr;
14474 tree arg0 = CALL_EXPR_ARG (exp, 0);
14475 tree arg1 = CALL_EXPR_ARG (exp, 1);
14476 machine_mode tmode = insn_data[icode].operand[0].mode;
14477 machine_mode mode0 = Pmode;
14478 machine_mode mode1 = Pmode;
14479 rtx op0 = expand_normal (arg0);
14480 rtx op1 = expand_normal (arg1);
14481
14482 if (icode == CODE_FOR_nothing)
14483 /* Builtin not supported on this processor. */
14484 return 0;
14485
14486 /* If we got invalid arguments bail out before generating bad rtl. */
14487 if (arg0 == error_mark_node || arg1 == error_mark_node)
14488 return const0_rtx;
14489
14490 if (target == 0
14491 || GET_MODE (target) != tmode
14492 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14493 target = gen_reg_rtx (tmode);
14494
14495 op1 = copy_to_mode_reg (mode1, op1);
14496
14497 if (op0 == const0_rtx)
14498 {
14499 addr = gen_rtx_MEM (tmode, op1);
14500 }
14501 else
14502 {
14503 op0 = copy_to_mode_reg (mode0, op0);
14504 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14505 }
14506
14507 pat = GEN_FCN (icode) (target, addr);
14508
14509 if (! pat)
14510 return 0;
14511 emit_insn (pat);
14512
14513 return target;
14514 }
14515
14516 /* Return a constant vector for use as a little-endian permute control vector
14517 to reverse the order of elements of the given vector mode. */
14518 static rtx
14519 swap_selector_for_mode (machine_mode mode)
14520 {
14521 /* These are little endian vectors, so their elements are reversed
14522 from what you would normally expect for a permute control vector. */
14523 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14524 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14525 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14526 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14527 unsigned int *swaparray, i;
14528 rtx perm[16];
14529
14530 switch (mode)
14531 {
14532 case V2DFmode:
14533 case V2DImode:
14534 swaparray = swap2;
14535 break;
14536 case V4SFmode:
14537 case V4SImode:
14538 swaparray = swap4;
14539 break;
14540 case V8HImode:
14541 swaparray = swap8;
14542 break;
14543 case V16QImode:
14544 swaparray = swap16;
14545 break;
14546 default:
14547 gcc_unreachable ();
14548 }
14549
14550 for (i = 0; i < 16; ++i)
14551 perm[i] = GEN_INT (swaparray[i]);
14552
14553 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14554 }
14555
14556 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14557 with -maltivec=be specified. Issue the load followed by an element-
14558 reversing permute. */
14559 void
14560 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14561 {
14562 rtx tmp = gen_reg_rtx (mode);
14563 rtx load = gen_rtx_SET (tmp, op1);
14564 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14565 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14566 rtx sel = swap_selector_for_mode (mode);
14567 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14568
14569 gcc_assert (REG_P (op0));
14570 emit_insn (par);
14571 emit_insn (gen_rtx_SET (op0, vperm));
14572 }
14573
14574 /* Generate code for a "stvxl" built-in for a little endian target with
14575 -maltivec=be specified. Issue the store preceded by an element-reversing
14576 permute. */
14577 void
14578 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14579 {
14580 rtx tmp = gen_reg_rtx (mode);
14581 rtx store = gen_rtx_SET (op0, tmp);
14582 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14583 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14584 rtx sel = swap_selector_for_mode (mode);
14585 rtx vperm;
14586
14587 gcc_assert (REG_P (op1));
14588 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14589 emit_insn (gen_rtx_SET (tmp, vperm));
14590 emit_insn (par);
14591 }
14592
14593 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14594 specified. Issue the store preceded by an element-reversing permute. */
14595 void
14596 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14597 {
14598 machine_mode inner_mode = GET_MODE_INNER (mode);
14599 rtx tmp = gen_reg_rtx (mode);
14600 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14601 rtx sel = swap_selector_for_mode (mode);
14602 rtx vperm;
14603
14604 gcc_assert (REG_P (op1));
14605 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14606 emit_insn (gen_rtx_SET (tmp, vperm));
14607 emit_insn (gen_rtx_SET (op0, stvx));
14608 }
14609
14610 static rtx
14611 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14612 {
14613 rtx pat, addr;
14614 tree arg0 = CALL_EXPR_ARG (exp, 0);
14615 tree arg1 = CALL_EXPR_ARG (exp, 1);
14616 machine_mode tmode = insn_data[icode].operand[0].mode;
14617 machine_mode mode0 = Pmode;
14618 machine_mode mode1 = Pmode;
14619 rtx op0 = expand_normal (arg0);
14620 rtx op1 = expand_normal (arg1);
14621
14622 if (icode == CODE_FOR_nothing)
14623 /* Builtin not supported on this processor. */
14624 return 0;
14625
14626 /* If we got invalid arguments bail out before generating bad rtl. */
14627 if (arg0 == error_mark_node || arg1 == error_mark_node)
14628 return const0_rtx;
14629
14630 if (target == 0
14631 || GET_MODE (target) != tmode
14632 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14633 target = gen_reg_rtx (tmode);
14634
14635 op1 = copy_to_mode_reg (mode1, op1);
14636
14637 /* For LVX, express the RTL accurately by ANDing the address with -16.
14638 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14639 so the raw address is fine. */
14640 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14641 || icode == CODE_FOR_altivec_lvx_v2di_2op
14642 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14643 || icode == CODE_FOR_altivec_lvx_v4si_2op
14644 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14645 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14646 {
14647 rtx rawaddr;
14648 if (op0 == const0_rtx)
14649 rawaddr = op1;
14650 else
14651 {
14652 op0 = copy_to_mode_reg (mode0, op0);
14653 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14654 }
14655 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14656 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14657
14658 /* For -maltivec=be, emit the load and follow it up with a
14659 permute to swap the elements. */
14660 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14661 {
14662 rtx temp = gen_reg_rtx (tmode);
14663 emit_insn (gen_rtx_SET (temp, addr));
14664
14665 rtx sel = swap_selector_for_mode (tmode);
14666 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14667 UNSPEC_VPERM);
14668 emit_insn (gen_rtx_SET (target, vperm));
14669 }
14670 else
14671 emit_insn (gen_rtx_SET (target, addr));
14672 }
14673 else
14674 {
14675 if (op0 == const0_rtx)
14676 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14677 else
14678 {
14679 op0 = copy_to_mode_reg (mode0, op0);
14680 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14681 gen_rtx_PLUS (Pmode, op1, op0));
14682 }
14683
14684 pat = GEN_FCN (icode) (target, addr);
14685 if (! pat)
14686 return 0;
14687 emit_insn (pat);
14688 }
14689
14690 return target;
14691 }
14692
14693 static rtx
14694 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14695 {
14696 tree arg0 = CALL_EXPR_ARG (exp, 0);
14697 tree arg1 = CALL_EXPR_ARG (exp, 1);
14698 tree arg2 = CALL_EXPR_ARG (exp, 2);
14699 rtx op0 = expand_normal (arg0);
14700 rtx op1 = expand_normal (arg1);
14701 rtx op2 = expand_normal (arg2);
14702 rtx pat, addr;
14703 machine_mode tmode = insn_data[icode].operand[0].mode;
14704 machine_mode mode1 = Pmode;
14705 machine_mode mode2 = Pmode;
14706
14707 /* Invalid arguments. Bail before doing anything stoopid! */
14708 if (arg0 == error_mark_node
14709 || arg1 == error_mark_node
14710 || arg2 == error_mark_node)
14711 return const0_rtx;
14712
14713 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14714 op0 = copy_to_mode_reg (tmode, op0);
14715
14716 op2 = copy_to_mode_reg (mode2, op2);
14717
14718 if (op1 == const0_rtx)
14719 {
14720 addr = gen_rtx_MEM (tmode, op2);
14721 }
14722 else
14723 {
14724 op1 = copy_to_mode_reg (mode1, op1);
14725 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14726 }
14727
14728 pat = GEN_FCN (icode) (addr, op0);
14729 if (pat)
14730 emit_insn (pat);
14731 return NULL_RTX;
14732 }
14733
14734 static rtx
14735 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14736 {
14737 rtx pat;
14738 tree arg0 = CALL_EXPR_ARG (exp, 0);
14739 tree arg1 = CALL_EXPR_ARG (exp, 1);
14740 tree arg2 = CALL_EXPR_ARG (exp, 2);
14741 rtx op0 = expand_normal (arg0);
14742 rtx op1 = expand_normal (arg1);
14743 rtx op2 = expand_normal (arg2);
14744 machine_mode mode0 = insn_data[icode].operand[0].mode;
14745 machine_mode mode1 = insn_data[icode].operand[1].mode;
14746 machine_mode mode2 = insn_data[icode].operand[2].mode;
14747
14748 if (icode == CODE_FOR_nothing)
14749 /* Builtin not supported on this processor. */
14750 return NULL_RTX;
14751
14752 /* If we got invalid arguments bail out before generating bad rtl. */
14753 if (arg0 == error_mark_node
14754 || arg1 == error_mark_node
14755 || arg2 == error_mark_node)
14756 return NULL_RTX;
14757
14758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14759 op0 = copy_to_mode_reg (mode0, op0);
14760 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14761 op1 = copy_to_mode_reg (mode1, op1);
14762 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14763 op2 = copy_to_mode_reg (mode2, op2);
14764
14765 pat = GEN_FCN (icode) (op0, op1, op2);
14766 if (pat)
14767 emit_insn (pat);
14768
14769 return NULL_RTX;
14770 }
14771
14772 static rtx
14773 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14774 {
14775 tree arg0 = CALL_EXPR_ARG (exp, 0);
14776 tree arg1 = CALL_EXPR_ARG (exp, 1);
14777 tree arg2 = CALL_EXPR_ARG (exp, 2);
14778 rtx op0 = expand_normal (arg0);
14779 rtx op1 = expand_normal (arg1);
14780 rtx op2 = expand_normal (arg2);
14781 rtx pat, addr, rawaddr;
14782 machine_mode tmode = insn_data[icode].operand[0].mode;
14783 machine_mode smode = insn_data[icode].operand[1].mode;
14784 machine_mode mode1 = Pmode;
14785 machine_mode mode2 = Pmode;
14786
14787 /* Invalid arguments. Bail before doing anything stoopid! */
14788 if (arg0 == error_mark_node
14789 || arg1 == error_mark_node
14790 || arg2 == error_mark_node)
14791 return const0_rtx;
14792
14793 op2 = copy_to_mode_reg (mode2, op2);
14794
14795 /* For STVX, express the RTL accurately by ANDing the address with -16.
14796 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14797 so the raw address is fine. */
14798 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14799 || icode == CODE_FOR_altivec_stvx_v2di_2op
14800 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14801 || icode == CODE_FOR_altivec_stvx_v4si_2op
14802 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14803 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14804 {
14805 if (op1 == const0_rtx)
14806 rawaddr = op2;
14807 else
14808 {
14809 op1 = copy_to_mode_reg (mode1, op1);
14810 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14811 }
14812
14813 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14814 addr = gen_rtx_MEM (tmode, addr);
14815
14816 op0 = copy_to_mode_reg (tmode, op0);
14817
14818 /* For -maltivec=be, emit a permute to swap the elements, followed
14819 by the store. */
14820 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14821 {
14822 rtx temp = gen_reg_rtx (tmode);
14823 rtx sel = swap_selector_for_mode (tmode);
14824 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14825 UNSPEC_VPERM);
14826 emit_insn (gen_rtx_SET (temp, vperm));
14827 emit_insn (gen_rtx_SET (addr, temp));
14828 }
14829 else
14830 emit_insn (gen_rtx_SET (addr, op0));
14831 }
14832 else
14833 {
14834 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14835 op0 = copy_to_mode_reg (smode, op0);
14836
14837 if (op1 == const0_rtx)
14838 addr = gen_rtx_MEM (tmode, op2);
14839 else
14840 {
14841 op1 = copy_to_mode_reg (mode1, op1);
14842 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14843 }
14844
14845 pat = GEN_FCN (icode) (addr, op0);
14846 if (pat)
14847 emit_insn (pat);
14848 }
14849
14850 return NULL_RTX;
14851 }
14852
14853 /* Return the appropriate SPR number associated with the given builtin. */
14854 static inline HOST_WIDE_INT
14855 htm_spr_num (enum rs6000_builtins code)
14856 {
14857 if (code == HTM_BUILTIN_GET_TFHAR
14858 || code == HTM_BUILTIN_SET_TFHAR)
14859 return TFHAR_SPR;
14860 else if (code == HTM_BUILTIN_GET_TFIAR
14861 || code == HTM_BUILTIN_SET_TFIAR)
14862 return TFIAR_SPR;
14863 else if (code == HTM_BUILTIN_GET_TEXASR
14864 || code == HTM_BUILTIN_SET_TEXASR)
14865 return TEXASR_SPR;
14866 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14867 || code == HTM_BUILTIN_SET_TEXASRU);
14868 return TEXASRU_SPR;
14869 }
14870
14871 /* Return the appropriate SPR regno associated with the given builtin. */
14872 static inline HOST_WIDE_INT
14873 htm_spr_regno (enum rs6000_builtins code)
14874 {
14875 if (code == HTM_BUILTIN_GET_TFHAR
14876 || code == HTM_BUILTIN_SET_TFHAR)
14877 return TFHAR_REGNO;
14878 else if (code == HTM_BUILTIN_GET_TFIAR
14879 || code == HTM_BUILTIN_SET_TFIAR)
14880 return TFIAR_REGNO;
14881 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14882 || code == HTM_BUILTIN_SET_TEXASR
14883 || code == HTM_BUILTIN_GET_TEXASRU
14884 || code == HTM_BUILTIN_SET_TEXASRU);
14885 return TEXASR_REGNO;
14886 }
14887
14888 /* Return the correct ICODE value depending on whether we are
14889 setting or reading the HTM SPRs. */
14890 static inline enum insn_code
14891 rs6000_htm_spr_icode (bool nonvoid)
14892 {
14893 if (nonvoid)
14894 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14895 else
14896 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14897 }
14898
14899 /* Expand the HTM builtin in EXP and store the result in TARGET.
14900 Store true in *EXPANDEDP if we found a builtin to expand. */
14901 static rtx
14902 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14903 {
14904 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14905 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14906 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14907 const struct builtin_description *d;
14908 size_t i;
14909
14910 *expandedp = true;
14911
14912 if (!TARGET_POWERPC64
14913 && (fcode == HTM_BUILTIN_TABORTDC
14914 || fcode == HTM_BUILTIN_TABORTDCI))
14915 {
14916 size_t uns_fcode = (size_t)fcode;
14917 const char *name = rs6000_builtin_info[uns_fcode].name;
14918 error ("builtin %s is only valid in 64-bit mode", name);
14919 return const0_rtx;
14920 }
14921
14922 /* Expand the HTM builtins. */
14923 d = bdesc_htm;
14924 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14925 if (d->code == fcode)
14926 {
14927 rtx op[MAX_HTM_OPERANDS], pat;
14928 int nopnds = 0;
14929 tree arg;
14930 call_expr_arg_iterator iter;
14931 unsigned attr = rs6000_builtin_info[fcode].attr;
14932 enum insn_code icode = d->icode;
14933 const struct insn_operand_data *insn_op;
14934 bool uses_spr = (attr & RS6000_BTC_SPR);
14935 rtx cr = NULL_RTX;
14936
14937 if (uses_spr)
14938 icode = rs6000_htm_spr_icode (nonvoid);
14939 insn_op = &insn_data[icode].operand[0];
14940
14941 if (nonvoid)
14942 {
14943 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
14944 if (!target
14945 || GET_MODE (target) != tmode
14946 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14947 target = gen_reg_rtx (tmode);
14948 if (uses_spr)
14949 op[nopnds++] = target;
14950 }
14951
14952 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14953 {
14954 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14955 return const0_rtx;
14956
14957 insn_op = &insn_data[icode].operand[nopnds];
14958
14959 op[nopnds] = expand_normal (arg);
14960
14961 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14962 {
14963 if (!strcmp (insn_op->constraint, "n"))
14964 {
14965 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14966 if (!CONST_INT_P (op[nopnds]))
14967 error ("argument %d must be an unsigned literal", arg_num);
14968 else
14969 error ("argument %d is an unsigned literal that is "
14970 "out of range", arg_num);
14971 return const0_rtx;
14972 }
14973 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14974 }
14975
14976 nopnds++;
14977 }
14978
14979 /* Handle the builtins for extended mnemonics. These accept
14980 no arguments, but map to builtins that take arguments. */
14981 switch (fcode)
14982 {
14983 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14984 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14985 op[nopnds++] = GEN_INT (1);
14986 if (flag_checking)
14987 attr |= RS6000_BTC_UNARY;
14988 break;
14989 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14990 op[nopnds++] = GEN_INT (0);
14991 if (flag_checking)
14992 attr |= RS6000_BTC_UNARY;
14993 break;
14994 default:
14995 break;
14996 }
14997
14998 /* If this builtin accesses SPRs, then pass in the appropriate
14999 SPR number and SPR regno as the last two operands. */
15000 if (uses_spr)
15001 {
15002 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
15003 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
15004 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
15005 }
15006 /* If this builtin accesses a CR, then pass in a scratch
15007 CR as the last operand. */
15008 else if (attr & RS6000_BTC_CR)
15009 { cr = gen_reg_rtx (CCmode);
15010 op[nopnds++] = cr;
15011 }
15012
15013 if (flag_checking)
15014 {
15015 int expected_nopnds = 0;
15016 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
15017 expected_nopnds = 1;
15018 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
15019 expected_nopnds = 2;
15020 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
15021 expected_nopnds = 3;
15022 if (!(attr & RS6000_BTC_VOID))
15023 expected_nopnds += 1;
15024 if (uses_spr)
15025 expected_nopnds += 2;
15026
15027 gcc_assert (nopnds == expected_nopnds
15028 && nopnds <= MAX_HTM_OPERANDS);
15029 }
15030
15031 switch (nopnds)
15032 {
15033 case 1:
15034 pat = GEN_FCN (icode) (op[0]);
15035 break;
15036 case 2:
15037 pat = GEN_FCN (icode) (op[0], op[1]);
15038 break;
15039 case 3:
15040 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15041 break;
15042 case 4:
15043 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15044 break;
15045 default:
15046 gcc_unreachable ();
15047 }
15048 if (!pat)
15049 return NULL_RTX;
15050 emit_insn (pat);
15051
15052 if (attr & RS6000_BTC_CR)
15053 {
15054 if (fcode == HTM_BUILTIN_TBEGIN)
15055 {
15056 /* Emit code to set TARGET to true or false depending on
15057 whether the tbegin. instruction successfully or failed
15058 to start a transaction. We do this by placing the 1's
15059 complement of CR's EQ bit into TARGET. */
15060 rtx scratch = gen_reg_rtx (SImode);
15061 emit_insn (gen_rtx_SET (scratch,
15062 gen_rtx_EQ (SImode, cr,
15063 const0_rtx)));
15064 emit_insn (gen_rtx_SET (target,
15065 gen_rtx_XOR (SImode, scratch,
15066 GEN_INT (1))));
15067 }
15068 else
15069 {
15070 /* Emit code to copy the 4-bit condition register field
15071 CR into the least significant end of register TARGET. */
15072 rtx scratch1 = gen_reg_rtx (SImode);
15073 rtx scratch2 = gen_reg_rtx (SImode);
15074 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
15075 emit_insn (gen_movcc (subreg, cr));
15076 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
15077 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
15078 }
15079 }
15080
15081 if (nonvoid)
15082 return target;
15083 return const0_rtx;
15084 }
15085
15086 *expandedp = false;
15087 return NULL_RTX;
15088 }
15089
15090 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
15091
15092 static rtx
15093 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
15094 rtx target)
15095 {
15096 /* __builtin_cpu_init () is a nop, so expand to nothing. */
15097 if (fcode == RS6000_BUILTIN_CPU_INIT)
15098 return const0_rtx;
15099
15100 if (target == 0 || GET_MODE (target) != SImode)
15101 target = gen_reg_rtx (SImode);
15102
15103 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
15104 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
15105 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
15106 to a STRING_CST. */
15107 if (TREE_CODE (arg) == ARRAY_REF
15108 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
15109 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
15110 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
15111 arg = TREE_OPERAND (arg, 0);
15112
15113 if (TREE_CODE (arg) != STRING_CST)
15114 {
15115 error ("builtin %s only accepts a string argument",
15116 rs6000_builtin_info[(size_t) fcode].name);
15117 return const0_rtx;
15118 }
15119
15120 if (fcode == RS6000_BUILTIN_CPU_IS)
15121 {
15122 const char *cpu = TREE_STRING_POINTER (arg);
15123 rtx cpuid = NULL_RTX;
15124 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
15125 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
15126 {
15127 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
15128 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
15129 break;
15130 }
15131 if (cpuid == NULL_RTX)
15132 {
15133 /* Invalid CPU argument. */
15134 error ("cpu %s is an invalid argument to builtin %s",
15135 cpu, rs6000_builtin_info[(size_t) fcode].name);
15136 return const0_rtx;
15137 }
15138
15139 rtx platform = gen_reg_rtx (SImode);
15140 rtx tcbmem = gen_const_mem (SImode,
15141 gen_rtx_PLUS (Pmode,
15142 gen_rtx_REG (Pmode, TLS_REGNUM),
15143 GEN_INT (TCB_PLATFORM_OFFSET)));
15144 emit_move_insn (platform, tcbmem);
15145 emit_insn (gen_eqsi3 (target, platform, cpuid));
15146 }
15147 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
15148 {
15149 const char *hwcap = TREE_STRING_POINTER (arg);
15150 rtx mask = NULL_RTX;
15151 int hwcap_offset;
15152 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
15153 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
15154 {
15155 mask = GEN_INT (cpu_supports_info[i].mask);
15156 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
15157 break;
15158 }
15159 if (mask == NULL_RTX)
15160 {
15161 /* Invalid HWCAP argument. */
15162 error ("hwcap %s is an invalid argument to builtin %s",
15163 hwcap, rs6000_builtin_info[(size_t) fcode].name);
15164 return const0_rtx;
15165 }
15166
15167 rtx tcb_hwcap = gen_reg_rtx (SImode);
15168 rtx tcbmem = gen_const_mem (SImode,
15169 gen_rtx_PLUS (Pmode,
15170 gen_rtx_REG (Pmode, TLS_REGNUM),
15171 GEN_INT (hwcap_offset)));
15172 emit_move_insn (tcb_hwcap, tcbmem);
15173 rtx scratch1 = gen_reg_rtx (SImode);
15174 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
15175 rtx scratch2 = gen_reg_rtx (SImode);
15176 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
15177 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
15178 }
15179
15180 /* Record that we have expanded a CPU builtin, so that we can later
15181 emit a reference to the special symbol exported by LIBC to ensure we
15182 do not link against an old LIBC that doesn't support this feature. */
15183 cpu_builtin_p = true;
15184
15185 #else
15186 /* For old LIBCs, always return FALSE. */
15187 emit_move_insn (target, GEN_INT (0));
15188 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
15189
15190 return target;
15191 }
15192
15193 static rtx
15194 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
15195 {
15196 rtx pat;
15197 tree arg0 = CALL_EXPR_ARG (exp, 0);
15198 tree arg1 = CALL_EXPR_ARG (exp, 1);
15199 tree arg2 = CALL_EXPR_ARG (exp, 2);
15200 rtx op0 = expand_normal (arg0);
15201 rtx op1 = expand_normal (arg1);
15202 rtx op2 = expand_normal (arg2);
15203 machine_mode tmode = insn_data[icode].operand[0].mode;
15204 machine_mode mode0 = insn_data[icode].operand[1].mode;
15205 machine_mode mode1 = insn_data[icode].operand[2].mode;
15206 machine_mode mode2 = insn_data[icode].operand[3].mode;
15207
15208 if (icode == CODE_FOR_nothing)
15209 /* Builtin not supported on this processor. */
15210 return 0;
15211
15212 /* If we got invalid arguments bail out before generating bad rtl. */
15213 if (arg0 == error_mark_node
15214 || arg1 == error_mark_node
15215 || arg2 == error_mark_node)
15216 return const0_rtx;
15217
15218 /* Check and prepare argument depending on the instruction code.
15219
15220 Note that a switch statement instead of the sequence of tests
15221 would be incorrect as many of the CODE_FOR values could be
15222 CODE_FOR_nothing and that would yield multiple alternatives
15223 with identical values. We'd never reach here at runtime in
15224 this case. */
15225 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15226 || icode == CODE_FOR_altivec_vsldoi_v2df
15227 || icode == CODE_FOR_altivec_vsldoi_v4si
15228 || icode == CODE_FOR_altivec_vsldoi_v8hi
15229 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15230 {
15231 /* Only allow 4-bit unsigned literals. */
15232 STRIP_NOPS (arg2);
15233 if (TREE_CODE (arg2) != INTEGER_CST
15234 || TREE_INT_CST_LOW (arg2) & ~0xf)
15235 {
15236 error ("argument 3 must be a 4-bit unsigned literal");
15237 return CONST0_RTX (tmode);
15238 }
15239 }
15240 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15241 || icode == CODE_FOR_vsx_xxpermdi_v2di
15242 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15243 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15244 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15245 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15246 || icode == CODE_FOR_vsx_xxpermdi_v4si
15247 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15248 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15249 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15250 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15251 || icode == CODE_FOR_vsx_xxsldwi_v4si
15252 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15253 || icode == CODE_FOR_vsx_xxsldwi_v2di
15254 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15255 {
15256 /* Only allow 2-bit unsigned literals. */
15257 STRIP_NOPS (arg2);
15258 if (TREE_CODE (arg2) != INTEGER_CST
15259 || TREE_INT_CST_LOW (arg2) & ~0x3)
15260 {
15261 error ("argument 3 must be a 2-bit unsigned literal");
15262 return CONST0_RTX (tmode);
15263 }
15264 }
15265 else if (icode == CODE_FOR_vsx_set_v2df
15266 || icode == CODE_FOR_vsx_set_v2di
15267 || icode == CODE_FOR_bcdadd
15268 || icode == CODE_FOR_bcdadd_lt
15269 || icode == CODE_FOR_bcdadd_eq
15270 || icode == CODE_FOR_bcdadd_gt
15271 || icode == CODE_FOR_bcdsub
15272 || icode == CODE_FOR_bcdsub_lt
15273 || icode == CODE_FOR_bcdsub_eq
15274 || icode == CODE_FOR_bcdsub_gt)
15275 {
15276 /* Only allow 1-bit unsigned literals. */
15277 STRIP_NOPS (arg2);
15278 if (TREE_CODE (arg2) != INTEGER_CST
15279 || TREE_INT_CST_LOW (arg2) & ~0x1)
15280 {
15281 error ("argument 3 must be a 1-bit unsigned literal");
15282 return CONST0_RTX (tmode);
15283 }
15284 }
15285 else if (icode == CODE_FOR_dfp_ddedpd_dd
15286 || icode == CODE_FOR_dfp_ddedpd_td)
15287 {
15288 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15289 STRIP_NOPS (arg0);
15290 if (TREE_CODE (arg0) != INTEGER_CST
15291 || TREE_INT_CST_LOW (arg2) & ~0x3)
15292 {
15293 error ("argument 1 must be 0 or 2");
15294 return CONST0_RTX (tmode);
15295 }
15296 }
15297 else if (icode == CODE_FOR_dfp_denbcd_dd
15298 || icode == CODE_FOR_dfp_denbcd_td)
15299 {
15300 /* Only allow 1-bit unsigned literals. */
15301 STRIP_NOPS (arg0);
15302 if (TREE_CODE (arg0) != INTEGER_CST
15303 || TREE_INT_CST_LOW (arg0) & ~0x1)
15304 {
15305 error ("argument 1 must be a 1-bit unsigned literal");
15306 return CONST0_RTX (tmode);
15307 }
15308 }
15309 else if (icode == CODE_FOR_dfp_dscli_dd
15310 || icode == CODE_FOR_dfp_dscli_td
15311 || icode == CODE_FOR_dfp_dscri_dd
15312 || icode == CODE_FOR_dfp_dscri_td)
15313 {
15314 /* Only allow 6-bit unsigned literals. */
15315 STRIP_NOPS (arg1);
15316 if (TREE_CODE (arg1) != INTEGER_CST
15317 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15318 {
15319 error ("argument 2 must be a 6-bit unsigned literal");
15320 return CONST0_RTX (tmode);
15321 }
15322 }
15323 else if (icode == CODE_FOR_crypto_vshasigmaw
15324 || icode == CODE_FOR_crypto_vshasigmad)
15325 {
15326 /* Check whether the 2nd and 3rd arguments are integer constants and in
15327 range and prepare arguments. */
15328 STRIP_NOPS (arg1);
15329 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
15330 {
15331 error ("argument 2 must be 0 or 1");
15332 return CONST0_RTX (tmode);
15333 }
15334
15335 STRIP_NOPS (arg2);
15336 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
15337 {
15338 error ("argument 3 must be in the range 0..15");
15339 return CONST0_RTX (tmode);
15340 }
15341 }
15342
15343 if (target == 0
15344 || GET_MODE (target) != tmode
15345 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15346 target = gen_reg_rtx (tmode);
15347
15348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15349 op0 = copy_to_mode_reg (mode0, op0);
15350 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15351 op1 = copy_to_mode_reg (mode1, op1);
15352 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15353 op2 = copy_to_mode_reg (mode2, op2);
15354
15355 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15356 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15357 else
15358 pat = GEN_FCN (icode) (target, op0, op1, op2);
15359 if (! pat)
15360 return 0;
15361 emit_insn (pat);
15362
15363 return target;
15364 }
15365
15366 /* Expand the lvx builtins. */
15367 static rtx
15368 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15369 {
15370 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15371 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15372 tree arg0;
15373 machine_mode tmode, mode0;
15374 rtx pat, op0;
15375 enum insn_code icode;
15376
15377 switch (fcode)
15378 {
15379 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15380 icode = CODE_FOR_vector_altivec_load_v16qi;
15381 break;
15382 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15383 icode = CODE_FOR_vector_altivec_load_v8hi;
15384 break;
15385 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15386 icode = CODE_FOR_vector_altivec_load_v4si;
15387 break;
15388 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15389 icode = CODE_FOR_vector_altivec_load_v4sf;
15390 break;
15391 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15392 icode = CODE_FOR_vector_altivec_load_v2df;
15393 break;
15394 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15395 icode = CODE_FOR_vector_altivec_load_v2di;
15396 break;
15397 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15398 icode = CODE_FOR_vector_altivec_load_v1ti;
15399 break;
15400 default:
15401 *expandedp = false;
15402 return NULL_RTX;
15403 }
15404
15405 *expandedp = true;
15406
15407 arg0 = CALL_EXPR_ARG (exp, 0);
15408 op0 = expand_normal (arg0);
15409 tmode = insn_data[icode].operand[0].mode;
15410 mode0 = insn_data[icode].operand[1].mode;
15411
15412 if (target == 0
15413 || GET_MODE (target) != tmode
15414 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15415 target = gen_reg_rtx (tmode);
15416
15417 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15418 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15419
15420 pat = GEN_FCN (icode) (target, op0);
15421 if (! pat)
15422 return 0;
15423 emit_insn (pat);
15424 return target;
15425 }
15426
15427 /* Expand the stvx builtins. */
15428 static rtx
15429 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15430 bool *expandedp)
15431 {
15432 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15433 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15434 tree arg0, arg1;
15435 machine_mode mode0, mode1;
15436 rtx pat, op0, op1;
15437 enum insn_code icode;
15438
15439 switch (fcode)
15440 {
15441 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15442 icode = CODE_FOR_vector_altivec_store_v16qi;
15443 break;
15444 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15445 icode = CODE_FOR_vector_altivec_store_v8hi;
15446 break;
15447 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15448 icode = CODE_FOR_vector_altivec_store_v4si;
15449 break;
15450 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15451 icode = CODE_FOR_vector_altivec_store_v4sf;
15452 break;
15453 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15454 icode = CODE_FOR_vector_altivec_store_v2df;
15455 break;
15456 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15457 icode = CODE_FOR_vector_altivec_store_v2di;
15458 break;
15459 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15460 icode = CODE_FOR_vector_altivec_store_v1ti;
15461 break;
15462 default:
15463 *expandedp = false;
15464 return NULL_RTX;
15465 }
15466
15467 arg0 = CALL_EXPR_ARG (exp, 0);
15468 arg1 = CALL_EXPR_ARG (exp, 1);
15469 op0 = expand_normal (arg0);
15470 op1 = expand_normal (arg1);
15471 mode0 = insn_data[icode].operand[0].mode;
15472 mode1 = insn_data[icode].operand[1].mode;
15473
15474 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15475 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15476 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15477 op1 = copy_to_mode_reg (mode1, op1);
15478
15479 pat = GEN_FCN (icode) (op0, op1);
15480 if (pat)
15481 emit_insn (pat);
15482
15483 *expandedp = true;
15484 return NULL_RTX;
15485 }
15486
15487 /* Expand the dst builtins. */
15488 static rtx
15489 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15490 bool *expandedp)
15491 {
15492 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15493 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15494 tree arg0, arg1, arg2;
15495 machine_mode mode0, mode1;
15496 rtx pat, op0, op1, op2;
15497 const struct builtin_description *d;
15498 size_t i;
15499
15500 *expandedp = false;
15501
15502 /* Handle DST variants. */
15503 d = bdesc_dst;
15504 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15505 if (d->code == fcode)
15506 {
15507 arg0 = CALL_EXPR_ARG (exp, 0);
15508 arg1 = CALL_EXPR_ARG (exp, 1);
15509 arg2 = CALL_EXPR_ARG (exp, 2);
15510 op0 = expand_normal (arg0);
15511 op1 = expand_normal (arg1);
15512 op2 = expand_normal (arg2);
15513 mode0 = insn_data[d->icode].operand[0].mode;
15514 mode1 = insn_data[d->icode].operand[1].mode;
15515
15516 /* Invalid arguments, bail out before generating bad rtl. */
15517 if (arg0 == error_mark_node
15518 || arg1 == error_mark_node
15519 || arg2 == error_mark_node)
15520 return const0_rtx;
15521
15522 *expandedp = true;
15523 STRIP_NOPS (arg2);
15524 if (TREE_CODE (arg2) != INTEGER_CST
15525 || TREE_INT_CST_LOW (arg2) & ~0x3)
15526 {
15527 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15528 return const0_rtx;
15529 }
15530
15531 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15532 op0 = copy_to_mode_reg (Pmode, op0);
15533 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15534 op1 = copy_to_mode_reg (mode1, op1);
15535
15536 pat = GEN_FCN (d->icode) (op0, op1, op2);
15537 if (pat != 0)
15538 emit_insn (pat);
15539
15540 return NULL_RTX;
15541 }
15542
15543 return NULL_RTX;
15544 }
15545
15546 /* Expand vec_init builtin. */
15547 static rtx
15548 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15549 {
15550 machine_mode tmode = TYPE_MODE (type);
15551 machine_mode inner_mode = GET_MODE_INNER (tmode);
15552 int i, n_elt = GET_MODE_NUNITS (tmode);
15553
15554 gcc_assert (VECTOR_MODE_P (tmode));
15555 gcc_assert (n_elt == call_expr_nargs (exp));
15556
15557 if (!target || !register_operand (target, tmode))
15558 target = gen_reg_rtx (tmode);
15559
15560 /* If we have a vector compromised of a single element, such as V1TImode, do
15561 the initialization directly. */
15562 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15563 {
15564 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15565 emit_move_insn (target, gen_lowpart (tmode, x));
15566 }
15567 else
15568 {
15569 rtvec v = rtvec_alloc (n_elt);
15570
15571 for (i = 0; i < n_elt; ++i)
15572 {
15573 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15574 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15575 }
15576
15577 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15578 }
15579
15580 return target;
15581 }
15582
15583 /* Return the integer constant in ARG. Constrain it to be in the range
15584 of the subparts of VEC_TYPE; issue an error if not. */
15585
15586 static int
15587 get_element_number (tree vec_type, tree arg)
15588 {
15589 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15590
15591 if (!tree_fits_uhwi_p (arg)
15592 || (elt = tree_to_uhwi (arg), elt > max))
15593 {
15594 error ("selector must be an integer constant in the range 0..%wi", max);
15595 return 0;
15596 }
15597
15598 return elt;
15599 }
15600
15601 /* Expand vec_set builtin. */
15602 static rtx
15603 altivec_expand_vec_set_builtin (tree exp)
15604 {
15605 machine_mode tmode, mode1;
15606 tree arg0, arg1, arg2;
15607 int elt;
15608 rtx op0, op1;
15609
15610 arg0 = CALL_EXPR_ARG (exp, 0);
15611 arg1 = CALL_EXPR_ARG (exp, 1);
15612 arg2 = CALL_EXPR_ARG (exp, 2);
15613
15614 tmode = TYPE_MODE (TREE_TYPE (arg0));
15615 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15616 gcc_assert (VECTOR_MODE_P (tmode));
15617
15618 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15619 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15620 elt = get_element_number (TREE_TYPE (arg0), arg2);
15621
15622 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15623 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15624
15625 op0 = force_reg (tmode, op0);
15626 op1 = force_reg (mode1, op1);
15627
15628 rs6000_expand_vector_set (op0, op1, elt);
15629
15630 return op0;
15631 }
15632
15633 /* Expand vec_ext builtin. */
15634 static rtx
15635 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15636 {
15637 machine_mode tmode, mode0;
15638 tree arg0, arg1;
15639 rtx op0;
15640 rtx op1;
15641
15642 arg0 = CALL_EXPR_ARG (exp, 0);
15643 arg1 = CALL_EXPR_ARG (exp, 1);
15644
15645 op0 = expand_normal (arg0);
15646 op1 = expand_normal (arg1);
15647
15648 /* Call get_element_number to validate arg1 if it is a constant. */
15649 if (TREE_CODE (arg1) == INTEGER_CST)
15650 (void) get_element_number (TREE_TYPE (arg0), arg1);
15651
15652 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15653 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15654 gcc_assert (VECTOR_MODE_P (mode0));
15655
15656 op0 = force_reg (mode0, op0);
15657
15658 if (optimize || !target || !register_operand (target, tmode))
15659 target = gen_reg_rtx (tmode);
15660
15661 rs6000_expand_vector_extract (target, op0, op1);
15662
15663 return target;
15664 }
15665
15666 /* Expand the builtin in EXP and store the result in TARGET. Store
15667 true in *EXPANDEDP if we found a builtin to expand. */
15668 static rtx
15669 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15670 {
15671 const struct builtin_description *d;
15672 size_t i;
15673 enum insn_code icode;
15674 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15675 tree arg0, arg1, arg2;
15676 rtx op0, pat;
15677 machine_mode tmode, mode0;
15678 enum rs6000_builtins fcode
15679 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15680
15681 if (rs6000_overloaded_builtin_p (fcode))
15682 {
15683 *expandedp = true;
15684 error ("unresolved overload for Altivec builtin %qF", fndecl);
15685
15686 /* Given it is invalid, just generate a normal call. */
15687 return expand_call (exp, target, false);
15688 }
15689
15690 target = altivec_expand_ld_builtin (exp, target, expandedp);
15691 if (*expandedp)
15692 return target;
15693
15694 target = altivec_expand_st_builtin (exp, target, expandedp);
15695 if (*expandedp)
15696 return target;
15697
15698 target = altivec_expand_dst_builtin (exp, target, expandedp);
15699 if (*expandedp)
15700 return target;
15701
15702 *expandedp = true;
15703
15704 switch (fcode)
15705 {
15706 case ALTIVEC_BUILTIN_STVX_V2DF:
15707 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15708 case ALTIVEC_BUILTIN_STVX_V2DI:
15709 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15710 case ALTIVEC_BUILTIN_STVX_V4SF:
15711 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15712 case ALTIVEC_BUILTIN_STVX:
15713 case ALTIVEC_BUILTIN_STVX_V4SI:
15714 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15715 case ALTIVEC_BUILTIN_STVX_V8HI:
15716 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15717 case ALTIVEC_BUILTIN_STVX_V16QI:
15718 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15719 case ALTIVEC_BUILTIN_STVEBX:
15720 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15721 case ALTIVEC_BUILTIN_STVEHX:
15722 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15723 case ALTIVEC_BUILTIN_STVEWX:
15724 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15725 case ALTIVEC_BUILTIN_STVXL_V2DF:
15726 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15727 case ALTIVEC_BUILTIN_STVXL_V2DI:
15728 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15729 case ALTIVEC_BUILTIN_STVXL_V4SF:
15730 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15731 case ALTIVEC_BUILTIN_STVXL:
15732 case ALTIVEC_BUILTIN_STVXL_V4SI:
15733 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15734 case ALTIVEC_BUILTIN_STVXL_V8HI:
15735 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15736 case ALTIVEC_BUILTIN_STVXL_V16QI:
15737 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15738
15739 case ALTIVEC_BUILTIN_STVLX:
15740 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15741 case ALTIVEC_BUILTIN_STVLXL:
15742 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15743 case ALTIVEC_BUILTIN_STVRX:
15744 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15745 case ALTIVEC_BUILTIN_STVRXL:
15746 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15747
15748 case P9V_BUILTIN_STXVL:
15749 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15750
15751 case VSX_BUILTIN_STXVD2X_V1TI:
15752 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15753 case VSX_BUILTIN_STXVD2X_V2DF:
15754 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15755 case VSX_BUILTIN_STXVD2X_V2DI:
15756 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15757 case VSX_BUILTIN_STXVW4X_V4SF:
15758 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15759 case VSX_BUILTIN_STXVW4X_V4SI:
15760 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15761 case VSX_BUILTIN_STXVW4X_V8HI:
15762 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15763 case VSX_BUILTIN_STXVW4X_V16QI:
15764 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15765
15766 /* For the following on big endian, it's ok to use any appropriate
15767 unaligned-supporting store, so use a generic expander. For
15768 little-endian, the exact element-reversing instruction must
15769 be used. */
15770 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15771 {
15772 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15773 : CODE_FOR_vsx_st_elemrev_v2df);
15774 return altivec_expand_stv_builtin (code, exp);
15775 }
15776 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15777 {
15778 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15779 : CODE_FOR_vsx_st_elemrev_v2di);
15780 return altivec_expand_stv_builtin (code, exp);
15781 }
15782 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15783 {
15784 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15785 : CODE_FOR_vsx_st_elemrev_v4sf);
15786 return altivec_expand_stv_builtin (code, exp);
15787 }
15788 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15789 {
15790 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15791 : CODE_FOR_vsx_st_elemrev_v4si);
15792 return altivec_expand_stv_builtin (code, exp);
15793 }
15794 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15795 {
15796 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15797 : CODE_FOR_vsx_st_elemrev_v8hi);
15798 return altivec_expand_stv_builtin (code, exp);
15799 }
15800 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15801 {
15802 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15803 : CODE_FOR_vsx_st_elemrev_v16qi);
15804 return altivec_expand_stv_builtin (code, exp);
15805 }
15806
15807 case ALTIVEC_BUILTIN_MFVSCR:
15808 icode = CODE_FOR_altivec_mfvscr;
15809 tmode = insn_data[icode].operand[0].mode;
15810
15811 if (target == 0
15812 || GET_MODE (target) != tmode
15813 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15814 target = gen_reg_rtx (tmode);
15815
15816 pat = GEN_FCN (icode) (target);
15817 if (! pat)
15818 return 0;
15819 emit_insn (pat);
15820 return target;
15821
15822 case ALTIVEC_BUILTIN_MTVSCR:
15823 icode = CODE_FOR_altivec_mtvscr;
15824 arg0 = CALL_EXPR_ARG (exp, 0);
15825 op0 = expand_normal (arg0);
15826 mode0 = insn_data[icode].operand[0].mode;
15827
15828 /* If we got invalid arguments bail out before generating bad rtl. */
15829 if (arg0 == error_mark_node)
15830 return const0_rtx;
15831
15832 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15833 op0 = copy_to_mode_reg (mode0, op0);
15834
15835 pat = GEN_FCN (icode) (op0);
15836 if (pat)
15837 emit_insn (pat);
15838 return NULL_RTX;
15839
15840 case ALTIVEC_BUILTIN_DSSALL:
15841 emit_insn (gen_altivec_dssall ());
15842 return NULL_RTX;
15843
15844 case ALTIVEC_BUILTIN_DSS:
15845 icode = CODE_FOR_altivec_dss;
15846 arg0 = CALL_EXPR_ARG (exp, 0);
15847 STRIP_NOPS (arg0);
15848 op0 = expand_normal (arg0);
15849 mode0 = insn_data[icode].operand[0].mode;
15850
15851 /* If we got invalid arguments bail out before generating bad rtl. */
15852 if (arg0 == error_mark_node)
15853 return const0_rtx;
15854
15855 if (TREE_CODE (arg0) != INTEGER_CST
15856 || TREE_INT_CST_LOW (arg0) & ~0x3)
15857 {
15858 error ("argument to dss must be a 2-bit unsigned literal");
15859 return const0_rtx;
15860 }
15861
15862 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15863 op0 = copy_to_mode_reg (mode0, op0);
15864
15865 emit_insn (gen_altivec_dss (op0));
15866 return NULL_RTX;
15867
15868 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15869 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15870 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15871 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15872 case VSX_BUILTIN_VEC_INIT_V2DF:
15873 case VSX_BUILTIN_VEC_INIT_V2DI:
15874 case VSX_BUILTIN_VEC_INIT_V1TI:
15875 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15876
15877 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15878 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15879 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15880 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15881 case VSX_BUILTIN_VEC_SET_V2DF:
15882 case VSX_BUILTIN_VEC_SET_V2DI:
15883 case VSX_BUILTIN_VEC_SET_V1TI:
15884 return altivec_expand_vec_set_builtin (exp);
15885
15886 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15887 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15888 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15889 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15890 case VSX_BUILTIN_VEC_EXT_V2DF:
15891 case VSX_BUILTIN_VEC_EXT_V2DI:
15892 case VSX_BUILTIN_VEC_EXT_V1TI:
15893 return altivec_expand_vec_ext_builtin (exp, target);
15894
15895 case P9V_BUILTIN_VEXTRACT4B:
15896 case P9V_BUILTIN_VEC_VEXTRACT4B:
15897 arg1 = CALL_EXPR_ARG (exp, 1);
15898 STRIP_NOPS (arg1);
15899
15900 /* Generate a normal call if it is invalid. */
15901 if (arg1 == error_mark_node)
15902 return expand_call (exp, target, false);
15903
15904 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15905 {
15906 error ("second argument to vec_vextract4b must be 0..12");
15907 return expand_call (exp, target, false);
15908 }
15909 break;
15910
15911 case P9V_BUILTIN_VINSERT4B:
15912 case P9V_BUILTIN_VINSERT4B_DI:
15913 case P9V_BUILTIN_VEC_VINSERT4B:
15914 arg2 = CALL_EXPR_ARG (exp, 2);
15915 STRIP_NOPS (arg2);
15916
15917 /* Generate a normal call if it is invalid. */
15918 if (arg2 == error_mark_node)
15919 return expand_call (exp, target, false);
15920
15921 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15922 {
15923 error ("third argument to vec_vinsert4b must be 0..12");
15924 return expand_call (exp, target, false);
15925 }
15926 break;
15927
15928 default:
15929 break;
15930 /* Fall through. */
15931 }
15932
15933 /* Expand abs* operations. */
15934 d = bdesc_abs;
15935 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15936 if (d->code == fcode)
15937 return altivec_expand_abs_builtin (d->icode, exp, target);
15938
15939 /* Expand the AltiVec predicates. */
15940 d = bdesc_altivec_preds;
15941 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15942 if (d->code == fcode)
15943 return altivec_expand_predicate_builtin (d->icode, exp, target);
15944
15945 /* LV* are funky. We initialized them differently. */
15946 switch (fcode)
15947 {
15948 case ALTIVEC_BUILTIN_LVSL:
15949 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15950 exp, target, false);
15951 case ALTIVEC_BUILTIN_LVSR:
15952 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15953 exp, target, false);
15954 case ALTIVEC_BUILTIN_LVEBX:
15955 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15956 exp, target, false);
15957 case ALTIVEC_BUILTIN_LVEHX:
15958 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15959 exp, target, false);
15960 case ALTIVEC_BUILTIN_LVEWX:
15961 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15962 exp, target, false);
15963 case ALTIVEC_BUILTIN_LVXL_V2DF:
15964 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15965 exp, target, false);
15966 case ALTIVEC_BUILTIN_LVXL_V2DI:
15967 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15968 exp, target, false);
15969 case ALTIVEC_BUILTIN_LVXL_V4SF:
15970 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15971 exp, target, false);
15972 case ALTIVEC_BUILTIN_LVXL:
15973 case ALTIVEC_BUILTIN_LVXL_V4SI:
15974 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15975 exp, target, false);
15976 case ALTIVEC_BUILTIN_LVXL_V8HI:
15977 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15978 exp, target, false);
15979 case ALTIVEC_BUILTIN_LVXL_V16QI:
15980 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15981 exp, target, false);
15982 case ALTIVEC_BUILTIN_LVX_V2DF:
15983 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15984 exp, target, false);
15985 case ALTIVEC_BUILTIN_LVX_V2DI:
15986 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15987 exp, target, false);
15988 case ALTIVEC_BUILTIN_LVX_V4SF:
15989 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15990 exp, target, false);
15991 case ALTIVEC_BUILTIN_LVX:
15992 case ALTIVEC_BUILTIN_LVX_V4SI:
15993 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15994 exp, target, false);
15995 case ALTIVEC_BUILTIN_LVX_V8HI:
15996 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15997 exp, target, false);
15998 case ALTIVEC_BUILTIN_LVX_V16QI:
15999 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
16000 exp, target, false);
16001 case ALTIVEC_BUILTIN_LVLX:
16002 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
16003 exp, target, true);
16004 case ALTIVEC_BUILTIN_LVLXL:
16005 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
16006 exp, target, true);
16007 case ALTIVEC_BUILTIN_LVRX:
16008 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
16009 exp, target, true);
16010 case ALTIVEC_BUILTIN_LVRXL:
16011 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
16012 exp, target, true);
16013 case VSX_BUILTIN_LXVD2X_V1TI:
16014 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
16015 exp, target, false);
16016 case VSX_BUILTIN_LXVD2X_V2DF:
16017 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
16018 exp, target, false);
16019 case VSX_BUILTIN_LXVD2X_V2DI:
16020 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
16021 exp, target, false);
16022 case VSX_BUILTIN_LXVW4X_V4SF:
16023 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
16024 exp, target, false);
16025 case VSX_BUILTIN_LXVW4X_V4SI:
16026 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
16027 exp, target, false);
16028 case VSX_BUILTIN_LXVW4X_V8HI:
16029 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
16030 exp, target, false);
16031 case VSX_BUILTIN_LXVW4X_V16QI:
16032 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
16033 exp, target, false);
16034 /* For the following on big endian, it's ok to use any appropriate
16035 unaligned-supporting load, so use a generic expander. For
16036 little-endian, the exact element-reversing instruction must
16037 be used. */
16038 case VSX_BUILTIN_LD_ELEMREV_V2DF:
16039 {
16040 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
16041 : CODE_FOR_vsx_ld_elemrev_v2df);
16042 return altivec_expand_lv_builtin (code, exp, target, false);
16043 }
16044 case VSX_BUILTIN_LD_ELEMREV_V2DI:
16045 {
16046 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
16047 : CODE_FOR_vsx_ld_elemrev_v2di);
16048 return altivec_expand_lv_builtin (code, exp, target, false);
16049 }
16050 case VSX_BUILTIN_LD_ELEMREV_V4SF:
16051 {
16052 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
16053 : CODE_FOR_vsx_ld_elemrev_v4sf);
16054 return altivec_expand_lv_builtin (code, exp, target, false);
16055 }
16056 case VSX_BUILTIN_LD_ELEMREV_V4SI:
16057 {
16058 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
16059 : CODE_FOR_vsx_ld_elemrev_v4si);
16060 return altivec_expand_lv_builtin (code, exp, target, false);
16061 }
16062 case VSX_BUILTIN_LD_ELEMREV_V8HI:
16063 {
16064 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
16065 : CODE_FOR_vsx_ld_elemrev_v8hi);
16066 return altivec_expand_lv_builtin (code, exp, target, false);
16067 }
16068 case VSX_BUILTIN_LD_ELEMREV_V16QI:
16069 {
16070 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
16071 : CODE_FOR_vsx_ld_elemrev_v16qi);
16072 return altivec_expand_lv_builtin (code, exp, target, false);
16073 }
16074 break;
16075 default:
16076 break;
16077 /* Fall through. */
16078 }
16079
16080 *expandedp = false;
16081 return NULL_RTX;
16082 }
16083
16084 /* Expand the builtin in EXP and store the result in TARGET. Store
16085 true in *EXPANDEDP if we found a builtin to expand. */
16086 static rtx
16087 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
16088 {
16089 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16090 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16091 const struct builtin_description *d;
16092 size_t i;
16093
16094 *expandedp = true;
16095
16096 switch (fcode)
16097 {
16098 case PAIRED_BUILTIN_STX:
16099 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
16100 case PAIRED_BUILTIN_LX:
16101 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
16102 default:
16103 break;
16104 /* Fall through. */
16105 }
16106
16107 /* Expand the paired predicates. */
16108 d = bdesc_paired_preds;
16109 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
16110 if (d->code == fcode)
16111 return paired_expand_predicate_builtin (d->icode, exp, target);
16112
16113 *expandedp = false;
16114 return NULL_RTX;
16115 }
16116
16117 static rtx
16118 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
16119 {
16120 rtx pat, scratch, tmp;
16121 tree form = CALL_EXPR_ARG (exp, 0);
16122 tree arg0 = CALL_EXPR_ARG (exp, 1);
16123 tree arg1 = CALL_EXPR_ARG (exp, 2);
16124 rtx op0 = expand_normal (arg0);
16125 rtx op1 = expand_normal (arg1);
16126 machine_mode mode0 = insn_data[icode].operand[1].mode;
16127 machine_mode mode1 = insn_data[icode].operand[2].mode;
16128 int form_int;
16129 enum rtx_code code;
16130
16131 if (TREE_CODE (form) != INTEGER_CST)
16132 {
16133 error ("argument 1 of __builtin_paired_predicate must be a constant");
16134 return const0_rtx;
16135 }
16136 else
16137 form_int = TREE_INT_CST_LOW (form);
16138
16139 gcc_assert (mode0 == mode1);
16140
16141 if (arg0 == error_mark_node || arg1 == error_mark_node)
16142 return const0_rtx;
16143
16144 if (target == 0
16145 || GET_MODE (target) != SImode
16146 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
16147 target = gen_reg_rtx (SImode);
16148 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16149 op0 = copy_to_mode_reg (mode0, op0);
16150 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16151 op1 = copy_to_mode_reg (mode1, op1);
16152
16153 scratch = gen_reg_rtx (CCFPmode);
16154
16155 pat = GEN_FCN (icode) (scratch, op0, op1);
16156 if (!pat)
16157 return const0_rtx;
16158
16159 emit_insn (pat);
16160
16161 switch (form_int)
16162 {
16163 /* LT bit. */
16164 case 0:
16165 code = LT;
16166 break;
16167 /* GT bit. */
16168 case 1:
16169 code = GT;
16170 break;
16171 /* EQ bit. */
16172 case 2:
16173 code = EQ;
16174 break;
16175 /* UN bit. */
16176 case 3:
16177 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
16178 return target;
16179 default:
16180 error ("argument 1 of __builtin_paired_predicate is out of range");
16181 return const0_rtx;
16182 }
16183
16184 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
16185 emit_move_insn (target, tmp);
16186 return target;
16187 }
16188
16189 /* Raise an error message for a builtin function that is called without the
16190 appropriate target options being set. */
16191
16192 static void
16193 rs6000_invalid_builtin (enum rs6000_builtins fncode)
16194 {
16195 size_t uns_fncode = (size_t)fncode;
16196 const char *name = rs6000_builtin_info[uns_fncode].name;
16197 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16198
16199 gcc_assert (name != NULL);
16200 if ((fnmask & RS6000_BTM_CELL) != 0)
16201 error ("Builtin function %s is only valid for the cell processor", name);
16202 else if ((fnmask & RS6000_BTM_VSX) != 0)
16203 error ("Builtin function %s requires the -mvsx option", name);
16204 else if ((fnmask & RS6000_BTM_HTM) != 0)
16205 error ("Builtin function %s requires the -mhtm option", name);
16206 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16207 error ("Builtin function %s requires the -maltivec option", name);
16208 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16209 error ("Builtin function %s requires the -mpaired option", name);
16210 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16211 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16212 error ("Builtin function %s requires the -mhard-dfp and"
16213 " -mpower8-vector options", name);
16214 else if ((fnmask & RS6000_BTM_DFP) != 0)
16215 error ("Builtin function %s requires the -mhard-dfp option", name);
16216 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16217 error ("Builtin function %s requires the -mpower8-vector option", name);
16218 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16219 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16220 error ("Builtin function %s requires the -mcpu=power9 and"
16221 " -m64 options", name);
16222 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16223 error ("Builtin function %s requires the -mcpu=power9 option", name);
16224 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16225 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16226 error ("Builtin function %s requires the -mcpu=power9 and"
16227 " -m64 options", name);
16228 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16229 error ("Builtin function %s requires the -mcpu=power9 option", name);
16230 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16231 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16232 error ("Builtin function %s requires the -mhard-float and"
16233 " -mlong-double-128 options", name);
16234 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16235 error ("Builtin function %s requires the -mhard-float option", name);
16236 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16237 error ("Builtin function %s requires the -mfloat128 option", name);
16238 else
16239 error ("Builtin function %s is not supported with the current options",
16240 name);
16241 }
16242
16243 /* Target hook for early folding of built-ins, shamelessly stolen
16244 from ia64.c. */
16245
16246 static tree
16247 rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
16248 tree *args, bool ignore ATTRIBUTE_UNUSED)
16249 {
16250 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
16251 {
16252 enum rs6000_builtins fn_code
16253 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16254 switch (fn_code)
16255 {
16256 case RS6000_BUILTIN_NANQ:
16257 case RS6000_BUILTIN_NANSQ:
16258 {
16259 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16260 const char *str = c_getstr (*args);
16261 int quiet = fn_code == RS6000_BUILTIN_NANQ;
16262 REAL_VALUE_TYPE real;
16263
16264 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
16265 return build_real (type, real);
16266 return NULL_TREE;
16267 }
16268 case RS6000_BUILTIN_INFQ:
16269 case RS6000_BUILTIN_HUGE_VALQ:
16270 {
16271 tree type = TREE_TYPE (TREE_TYPE (fndecl));
16272 REAL_VALUE_TYPE inf;
16273 real_inf (&inf);
16274 return build_real (type, inf);
16275 }
16276 default:
16277 break;
16278 }
16279 }
16280 #ifdef SUBTARGET_FOLD_BUILTIN
16281 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16282 #else
16283 return NULL_TREE;
16284 #endif
16285 }
16286
16287 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16288 a constant, use rs6000_fold_builtin.) */
16289
16290 bool
16291 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16292 {
16293 gimple *stmt = gsi_stmt (*gsi);
16294 tree fndecl = gimple_call_fndecl (stmt);
16295 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16296 enum rs6000_builtins fn_code
16297 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16298 tree arg0, arg1, lhs;
16299
16300 switch (fn_code)
16301 {
16302 /* Flavors of vec_add. We deliberately don't expand
16303 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16304 TImode, resulting in much poorer code generation. */
16305 case ALTIVEC_BUILTIN_VADDUBM:
16306 case ALTIVEC_BUILTIN_VADDUHM:
16307 case ALTIVEC_BUILTIN_VADDUWM:
16308 case P8V_BUILTIN_VADDUDM:
16309 case ALTIVEC_BUILTIN_VADDFP:
16310 case VSX_BUILTIN_XVADDDP:
16311 {
16312 arg0 = gimple_call_arg (stmt, 0);
16313 arg1 = gimple_call_arg (stmt, 1);
16314 lhs = gimple_call_lhs (stmt);
16315 gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16316 gimple_set_location (g, gimple_location (stmt));
16317 gsi_replace (gsi, g, true);
16318 return true;
16319 }
16320 /* Flavors of vec_sub. We deliberately don't expand
16321 P8V_BUILTIN_VSUBUQM. */
16322 case ALTIVEC_BUILTIN_VSUBUBM:
16323 case ALTIVEC_BUILTIN_VSUBUHM:
16324 case ALTIVEC_BUILTIN_VSUBUWM:
16325 case P8V_BUILTIN_VSUBUDM:
16326 case ALTIVEC_BUILTIN_VSUBFP:
16327 case VSX_BUILTIN_XVSUBDP:
16328 {
16329 arg0 = gimple_call_arg (stmt, 0);
16330 arg1 = gimple_call_arg (stmt, 1);
16331 lhs = gimple_call_lhs (stmt);
16332 gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
16333 gimple_set_location (g, gimple_location (stmt));
16334 gsi_replace (gsi, g, true);
16335 return true;
16336 }
16337 case VSX_BUILTIN_XVMULSP:
16338 case VSX_BUILTIN_XVMULDP:
16339 {
16340 arg0 = gimple_call_arg (stmt, 0);
16341 arg1 = gimple_call_arg (stmt, 1);
16342 lhs = gimple_call_lhs (stmt);
16343 gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
16344 gimple_set_location (g, gimple_location (stmt));
16345 gsi_replace (gsi, g, true);
16346 return true;
16347 }
16348 /* Even element flavors of vec_mul (signed). */
16349 case ALTIVEC_BUILTIN_VMULESB:
16350 case ALTIVEC_BUILTIN_VMULESH:
16351 case ALTIVEC_BUILTIN_VMULESW:
16352 /* Even element flavors of vec_mul (unsigned). */
16353 case ALTIVEC_BUILTIN_VMULEUB:
16354 case ALTIVEC_BUILTIN_VMULEUH:
16355 case ALTIVEC_BUILTIN_VMULEUW:
16356 {
16357 arg0 = gimple_call_arg (stmt, 0);
16358 arg1 = gimple_call_arg (stmt, 1);
16359 lhs = gimple_call_lhs (stmt);
16360 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
16361 gimple_set_location (g, gimple_location (stmt));
16362 gsi_replace (gsi, g, true);
16363 return true;
16364 }
16365 /* Odd element flavors of vec_mul (signed). */
16366 case ALTIVEC_BUILTIN_VMULOSB:
16367 case ALTIVEC_BUILTIN_VMULOSH:
16368 case ALTIVEC_BUILTIN_VMULOSW:
16369 /* Odd element flavors of vec_mul (unsigned). */
16370 case ALTIVEC_BUILTIN_VMULOUB:
16371 case ALTIVEC_BUILTIN_VMULOUH:
16372 case ALTIVEC_BUILTIN_VMULOUW:
16373 {
16374 arg0 = gimple_call_arg (stmt, 0);
16375 arg1 = gimple_call_arg (stmt, 1);
16376 lhs = gimple_call_lhs (stmt);
16377 gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
16378 gimple_set_location (g, gimple_location (stmt));
16379 gsi_replace (gsi, g, true);
16380 return true;
16381 }
16382 /* Flavors of vec_div (Integer). */
16383 case VSX_BUILTIN_DIV_V2DI:
16384 case VSX_BUILTIN_UDIV_V2DI:
16385 {
16386 arg0 = gimple_call_arg (stmt, 0);
16387 arg1 = gimple_call_arg (stmt, 1);
16388 lhs = gimple_call_lhs (stmt);
16389 gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
16390 gimple_set_location (g, gimple_location (stmt));
16391 gsi_replace (gsi, g, true);
16392 return true;
16393 }
16394 /* Flavors of vec_div (Float). */
16395 case VSX_BUILTIN_XVDIVSP:
16396 case VSX_BUILTIN_XVDIVDP:
16397 {
16398 arg0 = gimple_call_arg (stmt, 0);
16399 arg1 = gimple_call_arg (stmt, 1);
16400 lhs = gimple_call_lhs (stmt);
16401 gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
16402 gimple_set_location (g, gimple_location (stmt));
16403 gsi_replace (gsi, g, true);
16404 return true;
16405 }
16406 /* Flavors of vec_and. */
16407 case ALTIVEC_BUILTIN_VAND:
16408 {
16409 arg0 = gimple_call_arg (stmt, 0);
16410 arg1 = gimple_call_arg (stmt, 1);
16411 lhs = gimple_call_lhs (stmt);
16412 gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
16413 gimple_set_location (g, gimple_location (stmt));
16414 gsi_replace (gsi, g, true);
16415 return true;
16416 }
16417 /* Flavors of vec_andc. */
16418 case ALTIVEC_BUILTIN_VANDC:
16419 {
16420 arg0 = gimple_call_arg (stmt, 0);
16421 arg1 = gimple_call_arg (stmt, 1);
16422 lhs = gimple_call_lhs (stmt);
16423 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16424 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
16425 gimple_set_location (g, gimple_location (stmt));
16426 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16427 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
16428 gimple_set_location (g, gimple_location (stmt));
16429 gsi_replace (gsi, g, true);
16430 return true;
16431 }
16432 /* Flavors of vec_nand. */
16433 case P8V_BUILTIN_VEC_NAND:
16434 case P8V_BUILTIN_NAND_V16QI:
16435 case P8V_BUILTIN_NAND_V8HI:
16436 case P8V_BUILTIN_NAND_V4SI:
16437 case P8V_BUILTIN_NAND_V4SF:
16438 case P8V_BUILTIN_NAND_V2DF:
16439 case P8V_BUILTIN_NAND_V2DI:
16440 {
16441 arg0 = gimple_call_arg (stmt, 0);
16442 arg1 = gimple_call_arg (stmt, 1);
16443 lhs = gimple_call_lhs (stmt);
16444 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16445 gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1);
16446 gimple_set_location (g, gimple_location (stmt));
16447 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16448 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16449 gimple_set_location (g, gimple_location (stmt));
16450 gsi_replace (gsi, g, true);
16451 return true;
16452 }
16453 /* Flavors of vec_or. */
16454 case ALTIVEC_BUILTIN_VOR:
16455 {
16456 arg0 = gimple_call_arg (stmt, 0);
16457 arg1 = gimple_call_arg (stmt, 1);
16458 lhs = gimple_call_lhs (stmt);
16459 gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
16460 gimple_set_location (g, gimple_location (stmt));
16461 gsi_replace (gsi, g, true);
16462 return true;
16463 }
16464 /* flavors of vec_orc. */
16465 case P8V_BUILTIN_ORC_V16QI:
16466 case P8V_BUILTIN_ORC_V8HI:
16467 case P8V_BUILTIN_ORC_V4SI:
16468 case P8V_BUILTIN_ORC_V4SF:
16469 case P8V_BUILTIN_ORC_V2DF:
16470 case P8V_BUILTIN_ORC_V2DI:
16471 {
16472 arg0 = gimple_call_arg (stmt, 0);
16473 arg1 = gimple_call_arg (stmt, 1);
16474 lhs = gimple_call_lhs (stmt);
16475 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16476 gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1);
16477 gimple_set_location (g, gimple_location (stmt));
16478 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16479 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
16480 gimple_set_location (g, gimple_location (stmt));
16481 gsi_replace (gsi, g, true);
16482 return true;
16483 }
16484 /* Flavors of vec_xor. */
16485 case ALTIVEC_BUILTIN_VXOR:
16486 {
16487 arg0 = gimple_call_arg (stmt, 0);
16488 arg1 = gimple_call_arg (stmt, 1);
16489 lhs = gimple_call_lhs (stmt);
16490 gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
16491 gimple_set_location (g, gimple_location (stmt));
16492 gsi_replace (gsi, g, true);
16493 return true;
16494 }
16495 /* Flavors of vec_nor. */
16496 case ALTIVEC_BUILTIN_VNOR:
16497 {
16498 arg0 = gimple_call_arg (stmt, 0);
16499 arg1 = gimple_call_arg (stmt, 1);
16500 lhs = gimple_call_lhs (stmt);
16501 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16502 gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
16503 gimple_set_location (g, gimple_location (stmt));
16504 gsi_insert_before(gsi, g, GSI_SAME_STMT);
16505 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16506 gimple_set_location (g, gimple_location (stmt));
16507 gsi_replace (gsi, g, true);
16508 return true;
16509 }
16510 /* flavors of vec_abs. */
16511 case ALTIVEC_BUILTIN_ABS_V16QI:
16512 case ALTIVEC_BUILTIN_ABS_V8HI:
16513 case ALTIVEC_BUILTIN_ABS_V4SI:
16514 case ALTIVEC_BUILTIN_ABS_V4SF:
16515 case P8V_BUILTIN_ABS_V2DI:
16516 case VSX_BUILTIN_XVABSDP:
16517 {
16518 arg0 = gimple_call_arg (stmt, 0);
16519 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16520 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16521 return false;
16522 lhs = gimple_call_lhs (stmt);
16523 gimple *g = gimple_build_assign (lhs, ABS_EXPR, arg0);
16524 gimple_set_location (g, gimple_location (stmt));
16525 gsi_replace (gsi, g, true);
16526 return true;
16527 }
16528 /* flavors of vec_min. */
16529 case VSX_BUILTIN_XVMINDP:
16530 case P8V_BUILTIN_VMINSD:
16531 case P8V_BUILTIN_VMINUD:
16532 case ALTIVEC_BUILTIN_VMINSB:
16533 case ALTIVEC_BUILTIN_VMINSH:
16534 case ALTIVEC_BUILTIN_VMINSW:
16535 case ALTIVEC_BUILTIN_VMINUB:
16536 case ALTIVEC_BUILTIN_VMINUH:
16537 case ALTIVEC_BUILTIN_VMINUW:
16538 case ALTIVEC_BUILTIN_VMINFP:
16539 {
16540 arg0 = gimple_call_arg (stmt, 0);
16541 arg1 = gimple_call_arg (stmt, 1);
16542 lhs = gimple_call_lhs (stmt);
16543 gimple *g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
16544 gimple_set_location (g, gimple_location (stmt));
16545 gsi_replace (gsi, g, true);
16546 return true;
16547 }
16548 /* flavors of vec_max. */
16549 case VSX_BUILTIN_XVMAXDP:
16550 case P8V_BUILTIN_VMAXSD:
16551 case P8V_BUILTIN_VMAXUD:
16552 case ALTIVEC_BUILTIN_VMAXSB:
16553 case ALTIVEC_BUILTIN_VMAXSH:
16554 case ALTIVEC_BUILTIN_VMAXSW:
16555 case ALTIVEC_BUILTIN_VMAXUB:
16556 case ALTIVEC_BUILTIN_VMAXUH:
16557 case ALTIVEC_BUILTIN_VMAXUW:
16558 case ALTIVEC_BUILTIN_VMAXFP:
16559 {
16560 arg0 = gimple_call_arg (stmt, 0);
16561 arg1 = gimple_call_arg (stmt, 1);
16562 lhs = gimple_call_lhs (stmt);
16563 gimple *g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
16564 gimple_set_location (g, gimple_location (stmt));
16565 gsi_replace (gsi, g, true);
16566 return true;
16567 }
16568 /* Flavors of vec_eqv. */
16569 case P8V_BUILTIN_EQV_V16QI:
16570 case P8V_BUILTIN_EQV_V8HI:
16571 case P8V_BUILTIN_EQV_V4SI:
16572 case P8V_BUILTIN_EQV_V4SF:
16573 case P8V_BUILTIN_EQV_V2DF:
16574 case P8V_BUILTIN_EQV_V2DI:
16575 {
16576 arg0 = gimple_call_arg (stmt, 0);
16577 arg1 = gimple_call_arg (stmt, 1);
16578 lhs = gimple_call_lhs (stmt);
16579 tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16580 gimple *g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
16581 gimple_set_location (g, gimple_location (stmt));
16582 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16583 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16584 gimple_set_location (g, gimple_location (stmt));
16585 gsi_replace (gsi, g, true);
16586 return true;
16587 }
16588 /* Flavors of vec_rotate_left. */
16589 case ALTIVEC_BUILTIN_VRLB:
16590 case ALTIVEC_BUILTIN_VRLH:
16591 case ALTIVEC_BUILTIN_VRLW:
16592 case P8V_BUILTIN_VRLD:
16593 {
16594 arg0 = gimple_call_arg (stmt, 0);
16595 arg1 = gimple_call_arg (stmt, 1);
16596 lhs = gimple_call_lhs (stmt);
16597 gimple *g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
16598 gimple_set_location (g, gimple_location (stmt));
16599 gsi_replace (gsi, g, true);
16600 return true;
16601 }
16602 /* Flavors of vector shift right algebraic.
16603 vec_sra{b,h,w} -> vsra{b,h,w}. */
16604 case ALTIVEC_BUILTIN_VSRAB:
16605 case ALTIVEC_BUILTIN_VSRAH:
16606 case ALTIVEC_BUILTIN_VSRAW:
16607 case P8V_BUILTIN_VSRAD:
16608 {
16609 arg0 = gimple_call_arg (stmt, 0);
16610 arg1 = gimple_call_arg (stmt, 1);
16611 lhs = gimple_call_lhs (stmt);
16612 gimple *g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
16613 gimple_set_location (g, gimple_location (stmt));
16614 gsi_replace (gsi, g, true);
16615 return true;
16616 }
16617 /* Flavors of vector shift left.
16618 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
16619 case ALTIVEC_BUILTIN_VSLB:
16620 case ALTIVEC_BUILTIN_VSLH:
16621 case ALTIVEC_BUILTIN_VSLW:
16622 case P8V_BUILTIN_VSLD:
16623 {
16624 arg0 = gimple_call_arg (stmt, 0);
16625 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16626 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16627 return false;
16628 arg1 = gimple_call_arg (stmt, 1);
16629 lhs = gimple_call_lhs (stmt);
16630 gimple *g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
16631 gimple_set_location (g, gimple_location (stmt));
16632 gsi_replace (gsi, g, true);
16633 return true;
16634 }
16635 /* Flavors of vector shift right. */
16636 case ALTIVEC_BUILTIN_VSRB:
16637 case ALTIVEC_BUILTIN_VSRH:
16638 case ALTIVEC_BUILTIN_VSRW:
16639 case P8V_BUILTIN_VSRD:
16640 {
16641 arg0 = gimple_call_arg (stmt, 0);
16642 arg1 = gimple_call_arg (stmt, 1);
16643 lhs = gimple_call_lhs (stmt);
16644 gimple_seq stmts = NULL;
16645 /* Convert arg0 to unsigned. */
16646 tree arg0_unsigned
16647 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
16648 unsigned_type_for (TREE_TYPE (arg0)), arg0);
16649 tree res
16650 = gimple_build (&stmts, RSHIFT_EXPR,
16651 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
16652 /* Convert result back to the lhs type. */
16653 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
16654 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16655 update_call_from_tree (gsi, res);
16656 return true;
16657 }
16658 default:
16659 break;
16660 }
16661
16662 return false;
16663 }
16664
16665 /* Expand an expression EXP that calls a built-in function,
16666 with result going to TARGET if that's convenient
16667 (and in mode MODE if that's convenient).
16668 SUBTARGET may be used as the target for computing one of EXP's operands.
16669 IGNORE is nonzero if the value is to be ignored. */
16670
16671 static rtx
16672 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16673 machine_mode mode ATTRIBUTE_UNUSED,
16674 int ignore ATTRIBUTE_UNUSED)
16675 {
16676 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16677 enum rs6000_builtins fcode
16678 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16679 size_t uns_fcode = (size_t)fcode;
16680 const struct builtin_description *d;
16681 size_t i;
16682 rtx ret;
16683 bool success;
16684 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16685 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16686
16687 if (TARGET_DEBUG_BUILTIN)
16688 {
16689 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16690 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16691 const char *name2 = ((icode != CODE_FOR_nothing)
16692 ? get_insn_name ((int)icode)
16693 : "nothing");
16694 const char *name3;
16695
16696 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16697 {
16698 default: name3 = "unknown"; break;
16699 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16700 case RS6000_BTC_UNARY: name3 = "unary"; break;
16701 case RS6000_BTC_BINARY: name3 = "binary"; break;
16702 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16703 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16704 case RS6000_BTC_ABS: name3 = "abs"; break;
16705 case RS6000_BTC_DST: name3 = "dst"; break;
16706 }
16707
16708
16709 fprintf (stderr,
16710 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16711 (name1) ? name1 : "---", fcode,
16712 (name2) ? name2 : "---", (int)icode,
16713 name3,
16714 func_valid_p ? "" : ", not valid");
16715 }
16716
16717 if (!func_valid_p)
16718 {
16719 rs6000_invalid_builtin (fcode);
16720
16721 /* Given it is invalid, just generate a normal call. */
16722 return expand_call (exp, target, ignore);
16723 }
16724
16725 switch (fcode)
16726 {
16727 case RS6000_BUILTIN_RECIP:
16728 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16729
16730 case RS6000_BUILTIN_RECIPF:
16731 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16732
16733 case RS6000_BUILTIN_RSQRTF:
16734 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16735
16736 case RS6000_BUILTIN_RSQRT:
16737 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16738
16739 case POWER7_BUILTIN_BPERMD:
16740 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16741 ? CODE_FOR_bpermd_di
16742 : CODE_FOR_bpermd_si), exp, target);
16743
16744 case RS6000_BUILTIN_GET_TB:
16745 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16746 target);
16747
16748 case RS6000_BUILTIN_MFTB:
16749 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16750 ? CODE_FOR_rs6000_mftb_di
16751 : CODE_FOR_rs6000_mftb_si),
16752 target);
16753
16754 case RS6000_BUILTIN_MFFS:
16755 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16756
16757 case RS6000_BUILTIN_MTFSF:
16758 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16759
16760 case RS6000_BUILTIN_CPU_INIT:
16761 case RS6000_BUILTIN_CPU_IS:
16762 case RS6000_BUILTIN_CPU_SUPPORTS:
16763 return cpu_expand_builtin (fcode, exp, target);
16764
16765 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16766 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16767 {
16768 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16769 : (int) CODE_FOR_altivec_lvsl_direct);
16770 machine_mode tmode = insn_data[icode].operand[0].mode;
16771 machine_mode mode = insn_data[icode].operand[1].mode;
16772 tree arg;
16773 rtx op, addr, pat;
16774
16775 gcc_assert (TARGET_ALTIVEC);
16776
16777 arg = CALL_EXPR_ARG (exp, 0);
16778 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16779 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16780 addr = memory_address (mode, op);
16781 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16782 op = addr;
16783 else
16784 {
16785 /* For the load case need to negate the address. */
16786 op = gen_reg_rtx (GET_MODE (addr));
16787 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16788 }
16789 op = gen_rtx_MEM (mode, op);
16790
16791 if (target == 0
16792 || GET_MODE (target) != tmode
16793 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16794 target = gen_reg_rtx (tmode);
16795
16796 pat = GEN_FCN (icode) (target, op);
16797 if (!pat)
16798 return 0;
16799 emit_insn (pat);
16800
16801 return target;
16802 }
16803
16804 case ALTIVEC_BUILTIN_VCFUX:
16805 case ALTIVEC_BUILTIN_VCFSX:
16806 case ALTIVEC_BUILTIN_VCTUXS:
16807 case ALTIVEC_BUILTIN_VCTSXS:
16808 /* FIXME: There's got to be a nicer way to handle this case than
16809 constructing a new CALL_EXPR. */
16810 if (call_expr_nargs (exp) == 1)
16811 {
16812 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16813 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16814 }
16815 break;
16816
16817 default:
16818 break;
16819 }
16820
16821 if (TARGET_ALTIVEC)
16822 {
16823 ret = altivec_expand_builtin (exp, target, &success);
16824
16825 if (success)
16826 return ret;
16827 }
16828 if (TARGET_PAIRED_FLOAT)
16829 {
16830 ret = paired_expand_builtin (exp, target, &success);
16831
16832 if (success)
16833 return ret;
16834 }
16835 if (TARGET_HTM)
16836 {
16837 ret = htm_expand_builtin (exp, target, &success);
16838
16839 if (success)
16840 return ret;
16841 }
16842
16843 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16844 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16845 gcc_assert (attr == RS6000_BTC_UNARY
16846 || attr == RS6000_BTC_BINARY
16847 || attr == RS6000_BTC_TERNARY
16848 || attr == RS6000_BTC_SPECIAL);
16849
16850 /* Handle simple unary operations. */
16851 d = bdesc_1arg;
16852 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16853 if (d->code == fcode)
16854 return rs6000_expand_unop_builtin (d->icode, exp, target);
16855
16856 /* Handle simple binary operations. */
16857 d = bdesc_2arg;
16858 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16859 if (d->code == fcode)
16860 return rs6000_expand_binop_builtin (d->icode, exp, target);
16861
16862 /* Handle simple ternary operations. */
16863 d = bdesc_3arg;
16864 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16865 if (d->code == fcode)
16866 return rs6000_expand_ternop_builtin (d->icode, exp, target);
16867
16868 /* Handle simple no-argument operations. */
16869 d = bdesc_0arg;
16870 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16871 if (d->code == fcode)
16872 return rs6000_expand_zeroop_builtin (d->icode, target);
16873
16874 gcc_unreachable ();
16875 }
16876
16877 /* Create a builtin vector type with a name. Taking care not to give
16878 the canonical type a name. */
16879
16880 static tree
16881 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16882 {
16883 tree result = build_vector_type (elt_type, num_elts);
16884
16885 /* Copy so we don't give the canonical type a name. */
16886 result = build_variant_type_copy (result);
16887
16888 add_builtin_type (name, result);
16889
16890 return result;
16891 }
16892
16893 static void
16894 rs6000_init_builtins (void)
16895 {
16896 tree tdecl;
16897 tree ftype;
16898 machine_mode mode;
16899
16900 if (TARGET_DEBUG_BUILTIN)
16901 fprintf (stderr, "rs6000_init_builtins%s%s%s\n",
16902 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16903 (TARGET_ALTIVEC) ? ", altivec" : "",
16904 (TARGET_VSX) ? ", vsx" : "");
16905
16906 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16907 V2SF_type_node = build_vector_type (float_type_node, 2);
16908 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16909 : "__vector long long",
16910 intDI_type_node, 2);
16911 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16912 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16913 intSI_type_node, 4);
16914 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16915 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16916 intHI_type_node, 8);
16917 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16918 intQI_type_node, 16);
16919
16920 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16921 unsigned_intQI_type_node, 16);
16922 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16923 unsigned_intHI_type_node, 8);
16924 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16925 unsigned_intSI_type_node, 4);
16926 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16927 ? "__vector unsigned long"
16928 : "__vector unsigned long long",
16929 unsigned_intDI_type_node, 2);
16930
16931 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16932 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16933 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16934 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16935
16936 const_str_type_node
16937 = build_pointer_type (build_qualified_type (char_type_node,
16938 TYPE_QUAL_CONST));
16939
16940 /* We use V1TI mode as a special container to hold __int128_t items that
16941 must live in VSX registers. */
16942 if (intTI_type_node)
16943 {
16944 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16945 intTI_type_node, 1);
16946 unsigned_V1TI_type_node
16947 = rs6000_vector_type ("__vector unsigned __int128",
16948 unsigned_intTI_type_node, 1);
16949 }
16950
16951 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16952 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16953 'vector unsigned short'. */
16954
16955 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16956 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16957 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16958 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16959 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16960
16961 long_integer_type_internal_node = long_integer_type_node;
16962 long_unsigned_type_internal_node = long_unsigned_type_node;
16963 long_long_integer_type_internal_node = long_long_integer_type_node;
16964 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16965 intQI_type_internal_node = intQI_type_node;
16966 uintQI_type_internal_node = unsigned_intQI_type_node;
16967 intHI_type_internal_node = intHI_type_node;
16968 uintHI_type_internal_node = unsigned_intHI_type_node;
16969 intSI_type_internal_node = intSI_type_node;
16970 uintSI_type_internal_node = unsigned_intSI_type_node;
16971 intDI_type_internal_node = intDI_type_node;
16972 uintDI_type_internal_node = unsigned_intDI_type_node;
16973 intTI_type_internal_node = intTI_type_node;
16974 uintTI_type_internal_node = unsigned_intTI_type_node;
16975 float_type_internal_node = float_type_node;
16976 double_type_internal_node = double_type_node;
16977 long_double_type_internal_node = long_double_type_node;
16978 dfloat64_type_internal_node = dfloat64_type_node;
16979 dfloat128_type_internal_node = dfloat128_type_node;
16980 void_type_internal_node = void_type_node;
16981
16982 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16983 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16984 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16985 format that uses a pair of doubles, depending on the switches and
16986 defaults.
16987
16988 We do not enable the actual __float128 keyword unless the user explicitly
16989 asks for it, because the library support is not yet complete.
16990
16991 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16992 floating point, we need make sure the type is non-zero or else self-test
16993 fails during bootstrap.
16994
16995 We don't register a built-in type for __ibm128 if the type is the same as
16996 long double. Instead we add a #define for __ibm128 in
16997 rs6000_cpu_cpp_builtins to long double. */
16998 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
16999 {
17000 ibm128_float_type_node = make_node (REAL_TYPE);
17001 TYPE_PRECISION (ibm128_float_type_node) = 128;
17002 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17003 layout_type (ibm128_float_type_node);
17004
17005 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17006 "__ibm128");
17007 }
17008 else
17009 ibm128_float_type_node = long_double_type_node;
17010
17011 if (TARGET_FLOAT128_KEYWORD)
17012 {
17013 ieee128_float_type_node = float128_type_node;
17014 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17015 "__float128");
17016 }
17017
17018 else if (TARGET_FLOAT128_TYPE)
17019 {
17020 ieee128_float_type_node = make_node (REAL_TYPE);
17021 TYPE_PRECISION (ibm128_float_type_node) = 128;
17022 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
17023 layout_type (ieee128_float_type_node);
17024
17025 /* If we are not exporting the __float128/_Float128 keywords, we need a
17026 keyword to get the types created. Use __ieee128 as the dummy
17027 keyword. */
17028 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17029 "__ieee128");
17030 }
17031
17032 else
17033 ieee128_float_type_node = long_double_type_node;
17034
17035 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17036 tree type node. */
17037 builtin_mode_to_type[QImode][0] = integer_type_node;
17038 builtin_mode_to_type[HImode][0] = integer_type_node;
17039 builtin_mode_to_type[SImode][0] = intSI_type_node;
17040 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17041 builtin_mode_to_type[DImode][0] = intDI_type_node;
17042 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17043 builtin_mode_to_type[TImode][0] = intTI_type_node;
17044 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17045 builtin_mode_to_type[SFmode][0] = float_type_node;
17046 builtin_mode_to_type[DFmode][0] = double_type_node;
17047 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17048 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17049 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17050 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17051 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17052 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17053 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17054 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17055 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17056 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17057 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17058 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17059 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17060 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17061 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17062 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17063 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17064 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17065 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17066
17067 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17068 TYPE_NAME (bool_char_type_node) = tdecl;
17069
17070 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17071 TYPE_NAME (bool_short_type_node) = tdecl;
17072
17073 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17074 TYPE_NAME (bool_int_type_node) = tdecl;
17075
17076 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17077 TYPE_NAME (pixel_type_node) = tdecl;
17078
17079 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17080 bool_char_type_node, 16);
17081 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17082 bool_short_type_node, 8);
17083 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17084 bool_int_type_node, 4);
17085 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17086 ? "__vector __bool long"
17087 : "__vector __bool long long",
17088 bool_long_type_node, 2);
17089 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17090 pixel_type_node, 8);
17091
17092 /* Paired builtins are only available if you build a compiler with the
17093 appropriate options, so only create those builtins with the appropriate
17094 compiler option. Create Altivec and VSX builtins on machines with at
17095 least the general purpose extensions (970 and newer) to allow the use of
17096 the target attribute. */
17097 if (TARGET_PAIRED_FLOAT)
17098 paired_init_builtins ();
17099 if (TARGET_EXTRA_BUILTINS)
17100 altivec_init_builtins ();
17101 if (TARGET_HTM)
17102 htm_init_builtins ();
17103
17104 if (TARGET_EXTRA_BUILTINS || TARGET_PAIRED_FLOAT)
17105 rs6000_common_init_builtins ();
17106
17107 ftype = build_function_type_list (ieee128_float_type_node,
17108 const_str_type_node, NULL_TREE);
17109 def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ);
17110 def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ);
17111
17112 ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE);
17113 def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ);
17114 def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ);
17115
17116 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17117 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17118 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17119
17120 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17121 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17122 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17123
17124 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17125 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17126 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17127
17128 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17129 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17130 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17131
17132 mode = (TARGET_64BIT) ? DImode : SImode;
17133 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17134 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17135 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17136
17137 ftype = build_function_type_list (unsigned_intDI_type_node,
17138 NULL_TREE);
17139 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17140
17141 if (TARGET_64BIT)
17142 ftype = build_function_type_list (unsigned_intDI_type_node,
17143 NULL_TREE);
17144 else
17145 ftype = build_function_type_list (unsigned_intSI_type_node,
17146 NULL_TREE);
17147 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17148
17149 ftype = build_function_type_list (double_type_node, NULL_TREE);
17150 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17151
17152 ftype = build_function_type_list (void_type_node,
17153 intSI_type_node, double_type_node,
17154 NULL_TREE);
17155 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17156
17157 ftype = build_function_type_list (void_type_node, NULL_TREE);
17158 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17159
17160 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17161 NULL_TREE);
17162 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17163 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17164
17165 /* AIX libm provides clog as __clog. */
17166 if (TARGET_XCOFF &&
17167 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17168 set_user_assembler_name (tdecl, "__clog");
17169
17170 #ifdef SUBTARGET_INIT_BUILTINS
17171 SUBTARGET_INIT_BUILTINS;
17172 #endif
17173 }
17174
17175 /* Returns the rs6000 builtin decl for CODE. */
17176
17177 static tree
17178 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17179 {
17180 HOST_WIDE_INT fnmask;
17181
17182 if (code >= RS6000_BUILTIN_COUNT)
17183 return error_mark_node;
17184
17185 fnmask = rs6000_builtin_info[code].mask;
17186 if ((fnmask & rs6000_builtin_mask) != fnmask)
17187 {
17188 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17189 return error_mark_node;
17190 }
17191
17192 return rs6000_builtin_decls[code];
17193 }
17194
17195 static void
17196 paired_init_builtins (void)
17197 {
17198 const struct builtin_description *d;
17199 size_t i;
17200 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17201
17202 tree int_ftype_int_v2sf_v2sf
17203 = build_function_type_list (integer_type_node,
17204 integer_type_node,
17205 V2SF_type_node,
17206 V2SF_type_node,
17207 NULL_TREE);
17208 tree pcfloat_type_node =
17209 build_pointer_type (build_qualified_type
17210 (float_type_node, TYPE_QUAL_CONST));
17211
17212 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17213 long_integer_type_node,
17214 pcfloat_type_node,
17215 NULL_TREE);
17216 tree void_ftype_v2sf_long_pcfloat =
17217 build_function_type_list (void_type_node,
17218 V2SF_type_node,
17219 long_integer_type_node,
17220 pcfloat_type_node,
17221 NULL_TREE);
17222
17223
17224 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17225 PAIRED_BUILTIN_LX);
17226
17227
17228 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17229 PAIRED_BUILTIN_STX);
17230
17231 /* Predicates. */
17232 d = bdesc_paired_preds;
17233 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17234 {
17235 tree type;
17236 HOST_WIDE_INT mask = d->mask;
17237
17238 if ((mask & builtin_mask) != mask)
17239 {
17240 if (TARGET_DEBUG_BUILTIN)
17241 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17242 d->name);
17243 continue;
17244 }
17245
17246 /* Cannot define builtin if the instruction is disabled. */
17247 gcc_assert (d->icode != CODE_FOR_nothing);
17248
17249 if (TARGET_DEBUG_BUILTIN)
17250 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17251 (int)i, get_insn_name (d->icode), (int)d->icode,
17252 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17253
17254 switch (insn_data[d->icode].operand[1].mode)
17255 {
17256 case V2SFmode:
17257 type = int_ftype_int_v2sf_v2sf;
17258 break;
17259 default:
17260 gcc_unreachable ();
17261 }
17262
17263 def_builtin (d->name, type, d->code);
17264 }
17265 }
17266
17267 static void
17268 altivec_init_builtins (void)
17269 {
17270 const struct builtin_description *d;
17271 size_t i;
17272 tree ftype;
17273 tree decl;
17274 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17275
17276 tree pvoid_type_node = build_pointer_type (void_type_node);
17277
17278 tree pcvoid_type_node
17279 = build_pointer_type (build_qualified_type (void_type_node,
17280 TYPE_QUAL_CONST));
17281
17282 tree int_ftype_opaque
17283 = build_function_type_list (integer_type_node,
17284 opaque_V4SI_type_node, NULL_TREE);
17285 tree opaque_ftype_opaque
17286 = build_function_type_list (integer_type_node, NULL_TREE);
17287 tree opaque_ftype_opaque_int
17288 = build_function_type_list (opaque_V4SI_type_node,
17289 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17290 tree opaque_ftype_opaque_opaque_int
17291 = build_function_type_list (opaque_V4SI_type_node,
17292 opaque_V4SI_type_node, opaque_V4SI_type_node,
17293 integer_type_node, NULL_TREE);
17294 tree opaque_ftype_opaque_opaque_opaque
17295 = build_function_type_list (opaque_V4SI_type_node,
17296 opaque_V4SI_type_node, opaque_V4SI_type_node,
17297 opaque_V4SI_type_node, NULL_TREE);
17298 tree opaque_ftype_opaque_opaque
17299 = build_function_type_list (opaque_V4SI_type_node,
17300 opaque_V4SI_type_node, opaque_V4SI_type_node,
17301 NULL_TREE);
17302 tree int_ftype_int_opaque_opaque
17303 = build_function_type_list (integer_type_node,
17304 integer_type_node, opaque_V4SI_type_node,
17305 opaque_V4SI_type_node, NULL_TREE);
17306 tree int_ftype_int_v4si_v4si
17307 = build_function_type_list (integer_type_node,
17308 integer_type_node, V4SI_type_node,
17309 V4SI_type_node, NULL_TREE);
17310 tree int_ftype_int_v2di_v2di
17311 = build_function_type_list (integer_type_node,
17312 integer_type_node, V2DI_type_node,
17313 V2DI_type_node, NULL_TREE);
17314 tree void_ftype_v4si
17315 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17316 tree v8hi_ftype_void
17317 = build_function_type_list (V8HI_type_node, NULL_TREE);
17318 tree void_ftype_void
17319 = build_function_type_list (void_type_node, NULL_TREE);
17320 tree void_ftype_int
17321 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17322
17323 tree opaque_ftype_long_pcvoid
17324 = build_function_type_list (opaque_V4SI_type_node,
17325 long_integer_type_node, pcvoid_type_node,
17326 NULL_TREE);
17327 tree v16qi_ftype_long_pcvoid
17328 = build_function_type_list (V16QI_type_node,
17329 long_integer_type_node, pcvoid_type_node,
17330 NULL_TREE);
17331 tree v8hi_ftype_long_pcvoid
17332 = build_function_type_list (V8HI_type_node,
17333 long_integer_type_node, pcvoid_type_node,
17334 NULL_TREE);
17335 tree v4si_ftype_long_pcvoid
17336 = build_function_type_list (V4SI_type_node,
17337 long_integer_type_node, pcvoid_type_node,
17338 NULL_TREE);
17339 tree v4sf_ftype_long_pcvoid
17340 = build_function_type_list (V4SF_type_node,
17341 long_integer_type_node, pcvoid_type_node,
17342 NULL_TREE);
17343 tree v2df_ftype_long_pcvoid
17344 = build_function_type_list (V2DF_type_node,
17345 long_integer_type_node, pcvoid_type_node,
17346 NULL_TREE);
17347 tree v2di_ftype_long_pcvoid
17348 = build_function_type_list (V2DI_type_node,
17349 long_integer_type_node, pcvoid_type_node,
17350 NULL_TREE);
17351
17352 tree void_ftype_opaque_long_pvoid
17353 = build_function_type_list (void_type_node,
17354 opaque_V4SI_type_node, long_integer_type_node,
17355 pvoid_type_node, NULL_TREE);
17356 tree void_ftype_v4si_long_pvoid
17357 = build_function_type_list (void_type_node,
17358 V4SI_type_node, long_integer_type_node,
17359 pvoid_type_node, NULL_TREE);
17360 tree void_ftype_v16qi_long_pvoid
17361 = build_function_type_list (void_type_node,
17362 V16QI_type_node, long_integer_type_node,
17363 pvoid_type_node, NULL_TREE);
17364
17365 tree void_ftype_v16qi_pvoid_long
17366 = build_function_type_list (void_type_node,
17367 V16QI_type_node, pvoid_type_node,
17368 long_integer_type_node, NULL_TREE);
17369
17370 tree void_ftype_v8hi_long_pvoid
17371 = build_function_type_list (void_type_node,
17372 V8HI_type_node, long_integer_type_node,
17373 pvoid_type_node, NULL_TREE);
17374 tree void_ftype_v4sf_long_pvoid
17375 = build_function_type_list (void_type_node,
17376 V4SF_type_node, long_integer_type_node,
17377 pvoid_type_node, NULL_TREE);
17378 tree void_ftype_v2df_long_pvoid
17379 = build_function_type_list (void_type_node,
17380 V2DF_type_node, long_integer_type_node,
17381 pvoid_type_node, NULL_TREE);
17382 tree void_ftype_v2di_long_pvoid
17383 = build_function_type_list (void_type_node,
17384 V2DI_type_node, long_integer_type_node,
17385 pvoid_type_node, NULL_TREE);
17386 tree int_ftype_int_v8hi_v8hi
17387 = build_function_type_list (integer_type_node,
17388 integer_type_node, V8HI_type_node,
17389 V8HI_type_node, NULL_TREE);
17390 tree int_ftype_int_v16qi_v16qi
17391 = build_function_type_list (integer_type_node,
17392 integer_type_node, V16QI_type_node,
17393 V16QI_type_node, NULL_TREE);
17394 tree int_ftype_int_v4sf_v4sf
17395 = build_function_type_list (integer_type_node,
17396 integer_type_node, V4SF_type_node,
17397 V4SF_type_node, NULL_TREE);
17398 tree int_ftype_int_v2df_v2df
17399 = build_function_type_list (integer_type_node,
17400 integer_type_node, V2DF_type_node,
17401 V2DF_type_node, NULL_TREE);
17402 tree v2di_ftype_v2di
17403 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17404 tree v4si_ftype_v4si
17405 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17406 tree v8hi_ftype_v8hi
17407 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17408 tree v16qi_ftype_v16qi
17409 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17410 tree v4sf_ftype_v4sf
17411 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17412 tree v2df_ftype_v2df
17413 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17414 tree void_ftype_pcvoid_int_int
17415 = build_function_type_list (void_type_node,
17416 pcvoid_type_node, integer_type_node,
17417 integer_type_node, NULL_TREE);
17418
17419 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17420 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17421 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17422 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17423 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17424 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17425 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17426 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17427 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17428 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17429 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17430 ALTIVEC_BUILTIN_LVXL_V2DF);
17431 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17432 ALTIVEC_BUILTIN_LVXL_V2DI);
17433 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17434 ALTIVEC_BUILTIN_LVXL_V4SF);
17435 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17436 ALTIVEC_BUILTIN_LVXL_V4SI);
17437 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17438 ALTIVEC_BUILTIN_LVXL_V8HI);
17439 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17440 ALTIVEC_BUILTIN_LVXL_V16QI);
17441 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17442 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17443 ALTIVEC_BUILTIN_LVX_V2DF);
17444 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17445 ALTIVEC_BUILTIN_LVX_V2DI);
17446 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17447 ALTIVEC_BUILTIN_LVX_V4SF);
17448 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17449 ALTIVEC_BUILTIN_LVX_V4SI);
17450 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17451 ALTIVEC_BUILTIN_LVX_V8HI);
17452 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17453 ALTIVEC_BUILTIN_LVX_V16QI);
17454 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17455 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17456 ALTIVEC_BUILTIN_STVX_V2DF);
17457 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17458 ALTIVEC_BUILTIN_STVX_V2DI);
17459 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17460 ALTIVEC_BUILTIN_STVX_V4SF);
17461 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17462 ALTIVEC_BUILTIN_STVX_V4SI);
17463 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17464 ALTIVEC_BUILTIN_STVX_V8HI);
17465 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17466 ALTIVEC_BUILTIN_STVX_V16QI);
17467 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17468 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17469 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17470 ALTIVEC_BUILTIN_STVXL_V2DF);
17471 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17472 ALTIVEC_BUILTIN_STVXL_V2DI);
17473 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17474 ALTIVEC_BUILTIN_STVXL_V4SF);
17475 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17476 ALTIVEC_BUILTIN_STVXL_V4SI);
17477 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17478 ALTIVEC_BUILTIN_STVXL_V8HI);
17479 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17480 ALTIVEC_BUILTIN_STVXL_V16QI);
17481 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17482 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17483 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17484 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17485 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17486 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17487 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17488 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17489 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17490 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17491 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17492 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17493 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17494 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17495 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17496 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17497
17498 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17499 VSX_BUILTIN_LXVD2X_V2DF);
17500 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17501 VSX_BUILTIN_LXVD2X_V2DI);
17502 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17503 VSX_BUILTIN_LXVW4X_V4SF);
17504 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17505 VSX_BUILTIN_LXVW4X_V4SI);
17506 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17507 VSX_BUILTIN_LXVW4X_V8HI);
17508 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17509 VSX_BUILTIN_LXVW4X_V16QI);
17510 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17511 VSX_BUILTIN_STXVD2X_V2DF);
17512 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17513 VSX_BUILTIN_STXVD2X_V2DI);
17514 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17515 VSX_BUILTIN_STXVW4X_V4SF);
17516 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17517 VSX_BUILTIN_STXVW4X_V4SI);
17518 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17519 VSX_BUILTIN_STXVW4X_V8HI);
17520 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17521 VSX_BUILTIN_STXVW4X_V16QI);
17522
17523 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17524 VSX_BUILTIN_LD_ELEMREV_V2DF);
17525 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17526 VSX_BUILTIN_LD_ELEMREV_V2DI);
17527 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17528 VSX_BUILTIN_LD_ELEMREV_V4SF);
17529 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17530 VSX_BUILTIN_LD_ELEMREV_V4SI);
17531 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17532 VSX_BUILTIN_ST_ELEMREV_V2DF);
17533 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17534 VSX_BUILTIN_ST_ELEMREV_V2DI);
17535 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17536 VSX_BUILTIN_ST_ELEMREV_V4SF);
17537 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17538 VSX_BUILTIN_ST_ELEMREV_V4SI);
17539
17540 if (TARGET_P9_VECTOR)
17541 {
17542 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17543 VSX_BUILTIN_LD_ELEMREV_V8HI);
17544 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17545 VSX_BUILTIN_LD_ELEMREV_V16QI);
17546 def_builtin ("__builtin_vsx_st_elemrev_v8hi",
17547 void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI);
17548 def_builtin ("__builtin_vsx_st_elemrev_v16qi",
17549 void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI);
17550 }
17551 else
17552 {
17553 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI]
17554 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI];
17555 rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI]
17556 = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI];
17557 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI]
17558 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI];
17559 rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI]
17560 = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI];
17561 }
17562
17563 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17564 VSX_BUILTIN_VEC_LD);
17565 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17566 VSX_BUILTIN_VEC_ST);
17567 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17568 VSX_BUILTIN_VEC_XL);
17569 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17570 VSX_BUILTIN_VEC_XST);
17571
17572 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17573 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17574 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17575
17576 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17577 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17578 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17579 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17580 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17581 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17582 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17583 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17584 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17585 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17586 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17587 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17588
17589 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17590 ALTIVEC_BUILTIN_VEC_ADDE);
17591 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17592 ALTIVEC_BUILTIN_VEC_ADDEC);
17593 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17594 ALTIVEC_BUILTIN_VEC_CMPNE);
17595 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17596 ALTIVEC_BUILTIN_VEC_MUL);
17597
17598 /* Cell builtins. */
17599 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17600 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17601 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17602 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17603
17604 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17605 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17606 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17607 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17608
17609 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17610 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17611 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17612 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17613
17614 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17615 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17616 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17617 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17618
17619 if (TARGET_P9_VECTOR)
17620 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17621 P9V_BUILTIN_STXVL);
17622
17623 /* Add the DST variants. */
17624 d = bdesc_dst;
17625 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17626 {
17627 HOST_WIDE_INT mask = d->mask;
17628
17629 /* It is expected that these dst built-in functions may have
17630 d->icode equal to CODE_FOR_nothing. */
17631 if ((mask & builtin_mask) != mask)
17632 {
17633 if (TARGET_DEBUG_BUILTIN)
17634 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17635 d->name);
17636 continue;
17637 }
17638 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17639 }
17640
17641 /* Initialize the predicates. */
17642 d = bdesc_altivec_preds;
17643 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17644 {
17645 machine_mode mode1;
17646 tree type;
17647 HOST_WIDE_INT mask = d->mask;
17648
17649 if ((mask & builtin_mask) != mask)
17650 {
17651 if (TARGET_DEBUG_BUILTIN)
17652 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17653 d->name);
17654 continue;
17655 }
17656
17657 if (rs6000_overloaded_builtin_p (d->code))
17658 mode1 = VOIDmode;
17659 else
17660 {
17661 /* Cannot define builtin if the instruction is disabled. */
17662 gcc_assert (d->icode != CODE_FOR_nothing);
17663 mode1 = insn_data[d->icode].operand[1].mode;
17664 }
17665
17666 switch (mode1)
17667 {
17668 case VOIDmode:
17669 type = int_ftype_int_opaque_opaque;
17670 break;
17671 case V2DImode:
17672 type = int_ftype_int_v2di_v2di;
17673 break;
17674 case V4SImode:
17675 type = int_ftype_int_v4si_v4si;
17676 break;
17677 case V8HImode:
17678 type = int_ftype_int_v8hi_v8hi;
17679 break;
17680 case V16QImode:
17681 type = int_ftype_int_v16qi_v16qi;
17682 break;
17683 case V4SFmode:
17684 type = int_ftype_int_v4sf_v4sf;
17685 break;
17686 case V2DFmode:
17687 type = int_ftype_int_v2df_v2df;
17688 break;
17689 default:
17690 gcc_unreachable ();
17691 }
17692
17693 def_builtin (d->name, type, d->code);
17694 }
17695
17696 /* Initialize the abs* operators. */
17697 d = bdesc_abs;
17698 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17699 {
17700 machine_mode mode0;
17701 tree type;
17702 HOST_WIDE_INT mask = d->mask;
17703
17704 if ((mask & builtin_mask) != mask)
17705 {
17706 if (TARGET_DEBUG_BUILTIN)
17707 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17708 d->name);
17709 continue;
17710 }
17711
17712 /* Cannot define builtin if the instruction is disabled. */
17713 gcc_assert (d->icode != CODE_FOR_nothing);
17714 mode0 = insn_data[d->icode].operand[0].mode;
17715
17716 switch (mode0)
17717 {
17718 case V2DImode:
17719 type = v2di_ftype_v2di;
17720 break;
17721 case V4SImode:
17722 type = v4si_ftype_v4si;
17723 break;
17724 case V8HImode:
17725 type = v8hi_ftype_v8hi;
17726 break;
17727 case V16QImode:
17728 type = v16qi_ftype_v16qi;
17729 break;
17730 case V4SFmode:
17731 type = v4sf_ftype_v4sf;
17732 break;
17733 case V2DFmode:
17734 type = v2df_ftype_v2df;
17735 break;
17736 default:
17737 gcc_unreachable ();
17738 }
17739
17740 def_builtin (d->name, type, d->code);
17741 }
17742
17743 /* Initialize target builtin that implements
17744 targetm.vectorize.builtin_mask_for_load. */
17745
17746 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17747 v16qi_ftype_long_pcvoid,
17748 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17749 BUILT_IN_MD, NULL, NULL_TREE);
17750 TREE_READONLY (decl) = 1;
17751 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17752 altivec_builtin_mask_for_load = decl;
17753
17754 /* Access to the vec_init patterns. */
17755 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17756 integer_type_node, integer_type_node,
17757 integer_type_node, NULL_TREE);
17758 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17759
17760 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17761 short_integer_type_node,
17762 short_integer_type_node,
17763 short_integer_type_node,
17764 short_integer_type_node,
17765 short_integer_type_node,
17766 short_integer_type_node,
17767 short_integer_type_node, NULL_TREE);
17768 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17769
17770 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17771 char_type_node, char_type_node,
17772 char_type_node, char_type_node,
17773 char_type_node, char_type_node,
17774 char_type_node, char_type_node,
17775 char_type_node, char_type_node,
17776 char_type_node, char_type_node,
17777 char_type_node, char_type_node,
17778 char_type_node, NULL_TREE);
17779 def_builtin ("__builtin_vec_init_v16qi", ftype,
17780 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17781
17782 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17783 float_type_node, float_type_node,
17784 float_type_node, NULL_TREE);
17785 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17786
17787 /* VSX builtins. */
17788 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17789 double_type_node, NULL_TREE);
17790 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17791
17792 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17793 intDI_type_node, NULL_TREE);
17794 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17795
17796 /* Access to the vec_set patterns. */
17797 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17798 intSI_type_node,
17799 integer_type_node, NULL_TREE);
17800 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17801
17802 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17803 intHI_type_node,
17804 integer_type_node, NULL_TREE);
17805 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17806
17807 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17808 intQI_type_node,
17809 integer_type_node, NULL_TREE);
17810 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17811
17812 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17813 float_type_node,
17814 integer_type_node, NULL_TREE);
17815 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17816
17817 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17818 double_type_node,
17819 integer_type_node, NULL_TREE);
17820 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17821
17822 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17823 intDI_type_node,
17824 integer_type_node, NULL_TREE);
17825 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17826
17827 /* Access to the vec_extract patterns. */
17828 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17829 integer_type_node, NULL_TREE);
17830 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17831
17832 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17833 integer_type_node, NULL_TREE);
17834 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17835
17836 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17837 integer_type_node, NULL_TREE);
17838 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17839
17840 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17841 integer_type_node, NULL_TREE);
17842 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17843
17844 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17845 integer_type_node, NULL_TREE);
17846 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17847
17848 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17849 integer_type_node, NULL_TREE);
17850 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17851
17852
17853 if (V1TI_type_node)
17854 {
17855 tree v1ti_ftype_long_pcvoid
17856 = build_function_type_list (V1TI_type_node,
17857 long_integer_type_node, pcvoid_type_node,
17858 NULL_TREE);
17859 tree void_ftype_v1ti_long_pvoid
17860 = build_function_type_list (void_type_node,
17861 V1TI_type_node, long_integer_type_node,
17862 pvoid_type_node, NULL_TREE);
17863 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17864 VSX_BUILTIN_LXVD2X_V1TI);
17865 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17866 VSX_BUILTIN_STXVD2X_V1TI);
17867 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17868 NULL_TREE, NULL_TREE);
17869 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17870 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17871 intTI_type_node,
17872 integer_type_node, NULL_TREE);
17873 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17874 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17875 integer_type_node, NULL_TREE);
17876 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17877 }
17878
17879 }
17880
17881 static void
17882 htm_init_builtins (void)
17883 {
17884 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17885 const struct builtin_description *d;
17886 size_t i;
17887
17888 d = bdesc_htm;
17889 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17890 {
17891 tree op[MAX_HTM_OPERANDS], type;
17892 HOST_WIDE_INT mask = d->mask;
17893 unsigned attr = rs6000_builtin_info[d->code].attr;
17894 bool void_func = (attr & RS6000_BTC_VOID);
17895 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17896 int nopnds = 0;
17897 tree gpr_type_node;
17898 tree rettype;
17899 tree argtype;
17900
17901 /* It is expected that these htm built-in functions may have
17902 d->icode equal to CODE_FOR_nothing. */
17903
17904 if (TARGET_32BIT && TARGET_POWERPC64)
17905 gpr_type_node = long_long_unsigned_type_node;
17906 else
17907 gpr_type_node = long_unsigned_type_node;
17908
17909 if (attr & RS6000_BTC_SPR)
17910 {
17911 rettype = gpr_type_node;
17912 argtype = gpr_type_node;
17913 }
17914 else if (d->code == HTM_BUILTIN_TABORTDC
17915 || d->code == HTM_BUILTIN_TABORTDCI)
17916 {
17917 rettype = unsigned_type_node;
17918 argtype = gpr_type_node;
17919 }
17920 else
17921 {
17922 rettype = unsigned_type_node;
17923 argtype = unsigned_type_node;
17924 }
17925
17926 if ((mask & builtin_mask) != mask)
17927 {
17928 if (TARGET_DEBUG_BUILTIN)
17929 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17930 continue;
17931 }
17932
17933 if (d->name == 0)
17934 {
17935 if (TARGET_DEBUG_BUILTIN)
17936 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17937 (long unsigned) i);
17938 continue;
17939 }
17940
17941 op[nopnds++] = (void_func) ? void_type_node : rettype;
17942
17943 if (attr_args == RS6000_BTC_UNARY)
17944 op[nopnds++] = argtype;
17945 else if (attr_args == RS6000_BTC_BINARY)
17946 {
17947 op[nopnds++] = argtype;
17948 op[nopnds++] = argtype;
17949 }
17950 else if (attr_args == RS6000_BTC_TERNARY)
17951 {
17952 op[nopnds++] = argtype;
17953 op[nopnds++] = argtype;
17954 op[nopnds++] = argtype;
17955 }
17956
17957 switch (nopnds)
17958 {
17959 case 1:
17960 type = build_function_type_list (op[0], NULL_TREE);
17961 break;
17962 case 2:
17963 type = build_function_type_list (op[0], op[1], NULL_TREE);
17964 break;
17965 case 3:
17966 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17967 break;
17968 case 4:
17969 type = build_function_type_list (op[0], op[1], op[2], op[3],
17970 NULL_TREE);
17971 break;
17972 default:
17973 gcc_unreachable ();
17974 }
17975
17976 def_builtin (d->name, type, d->code);
17977 }
17978 }
17979
17980 /* Hash function for builtin functions with up to 3 arguments and a return
17981 type. */
17982 hashval_t
17983 builtin_hasher::hash (builtin_hash_struct *bh)
17984 {
17985 unsigned ret = 0;
17986 int i;
17987
17988 for (i = 0; i < 4; i++)
17989 {
17990 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17991 ret = (ret * 2) + bh->uns_p[i];
17992 }
17993
17994 return ret;
17995 }
17996
17997 /* Compare builtin hash entries H1 and H2 for equivalence. */
17998 bool
17999 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
18000 {
18001 return ((p1->mode[0] == p2->mode[0])
18002 && (p1->mode[1] == p2->mode[1])
18003 && (p1->mode[2] == p2->mode[2])
18004 && (p1->mode[3] == p2->mode[3])
18005 && (p1->uns_p[0] == p2->uns_p[0])
18006 && (p1->uns_p[1] == p2->uns_p[1])
18007 && (p1->uns_p[2] == p2->uns_p[2])
18008 && (p1->uns_p[3] == p2->uns_p[3]));
18009 }
18010
18011 /* Map types for builtin functions with an explicit return type and up to 3
18012 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
18013 of the argument. */
18014 static tree
18015 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
18016 machine_mode mode_arg1, machine_mode mode_arg2,
18017 enum rs6000_builtins builtin, const char *name)
18018 {
18019 struct builtin_hash_struct h;
18020 struct builtin_hash_struct *h2;
18021 int num_args = 3;
18022 int i;
18023 tree ret_type = NULL_TREE;
18024 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18025
18026 /* Create builtin_hash_table. */
18027 if (builtin_hash_table == NULL)
18028 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18029
18030 h.type = NULL_TREE;
18031 h.mode[0] = mode_ret;
18032 h.mode[1] = mode_arg0;
18033 h.mode[2] = mode_arg1;
18034 h.mode[3] = mode_arg2;
18035 h.uns_p[0] = 0;
18036 h.uns_p[1] = 0;
18037 h.uns_p[2] = 0;
18038 h.uns_p[3] = 0;
18039
18040 /* If the builtin is a type that produces unsigned results or takes unsigned
18041 arguments, and it is returned as a decl for the vectorizer (such as
18042 widening multiplies, permute), make sure the arguments and return value
18043 are type correct. */
18044 switch (builtin)
18045 {
18046 /* unsigned 1 argument functions. */
18047 case CRYPTO_BUILTIN_VSBOX:
18048 case P8V_BUILTIN_VGBBD:
18049 case MISC_BUILTIN_CDTBCD:
18050 case MISC_BUILTIN_CBCDTD:
18051 h.uns_p[0] = 1;
18052 h.uns_p[1] = 1;
18053 break;
18054
18055 /* unsigned 2 argument functions. */
18056 case ALTIVEC_BUILTIN_VMULEUB:
18057 case ALTIVEC_BUILTIN_VMULEUH:
18058 case ALTIVEC_BUILTIN_VMULEUW:
18059 case ALTIVEC_BUILTIN_VMULOUB:
18060 case ALTIVEC_BUILTIN_VMULOUH:
18061 case ALTIVEC_BUILTIN_VMULOUW:
18062 case CRYPTO_BUILTIN_VCIPHER:
18063 case CRYPTO_BUILTIN_VCIPHERLAST:
18064 case CRYPTO_BUILTIN_VNCIPHER:
18065 case CRYPTO_BUILTIN_VNCIPHERLAST:
18066 case CRYPTO_BUILTIN_VPMSUMB:
18067 case CRYPTO_BUILTIN_VPMSUMH:
18068 case CRYPTO_BUILTIN_VPMSUMW:
18069 case CRYPTO_BUILTIN_VPMSUMD:
18070 case CRYPTO_BUILTIN_VPMSUM:
18071 case MISC_BUILTIN_ADDG6S:
18072 case MISC_BUILTIN_DIVWEU:
18073 case MISC_BUILTIN_DIVWEUO:
18074 case MISC_BUILTIN_DIVDEU:
18075 case MISC_BUILTIN_DIVDEUO:
18076 case VSX_BUILTIN_UDIV_V2DI:
18077 case ALTIVEC_BUILTIN_VMAXUB:
18078 case ALTIVEC_BUILTIN_VMINUB:
18079 case ALTIVEC_BUILTIN_VMAXUH:
18080 case ALTIVEC_BUILTIN_VMINUH:
18081 case ALTIVEC_BUILTIN_VMAXUW:
18082 case ALTIVEC_BUILTIN_VMINUW:
18083 case P8V_BUILTIN_VMAXUD:
18084 case P8V_BUILTIN_VMINUD:
18085 h.uns_p[0] = 1;
18086 h.uns_p[1] = 1;
18087 h.uns_p[2] = 1;
18088 break;
18089
18090 /* unsigned 3 argument functions. */
18091 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18092 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18093 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18094 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18095 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18096 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18097 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18098 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18099 case VSX_BUILTIN_VPERM_16QI_UNS:
18100 case VSX_BUILTIN_VPERM_8HI_UNS:
18101 case VSX_BUILTIN_VPERM_4SI_UNS:
18102 case VSX_BUILTIN_VPERM_2DI_UNS:
18103 case VSX_BUILTIN_XXSEL_16QI_UNS:
18104 case VSX_BUILTIN_XXSEL_8HI_UNS:
18105 case VSX_BUILTIN_XXSEL_4SI_UNS:
18106 case VSX_BUILTIN_XXSEL_2DI_UNS:
18107 case CRYPTO_BUILTIN_VPERMXOR:
18108 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18109 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18110 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18111 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18112 case CRYPTO_BUILTIN_VSHASIGMAW:
18113 case CRYPTO_BUILTIN_VSHASIGMAD:
18114 case CRYPTO_BUILTIN_VSHASIGMA:
18115 h.uns_p[0] = 1;
18116 h.uns_p[1] = 1;
18117 h.uns_p[2] = 1;
18118 h.uns_p[3] = 1;
18119 break;
18120
18121 /* signed permute functions with unsigned char mask. */
18122 case ALTIVEC_BUILTIN_VPERM_16QI:
18123 case ALTIVEC_BUILTIN_VPERM_8HI:
18124 case ALTIVEC_BUILTIN_VPERM_4SI:
18125 case ALTIVEC_BUILTIN_VPERM_4SF:
18126 case ALTIVEC_BUILTIN_VPERM_2DI:
18127 case ALTIVEC_BUILTIN_VPERM_2DF:
18128 case VSX_BUILTIN_VPERM_16QI:
18129 case VSX_BUILTIN_VPERM_8HI:
18130 case VSX_BUILTIN_VPERM_4SI:
18131 case VSX_BUILTIN_VPERM_4SF:
18132 case VSX_BUILTIN_VPERM_2DI:
18133 case VSX_BUILTIN_VPERM_2DF:
18134 h.uns_p[3] = 1;
18135 break;
18136
18137 /* unsigned args, signed return. */
18138 case VSX_BUILTIN_XVCVUXDSP:
18139 case VSX_BUILTIN_XVCVUXDDP_UNS:
18140 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18141 h.uns_p[1] = 1;
18142 break;
18143
18144 /* signed args, unsigned return. */
18145 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18146 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18147 case MISC_BUILTIN_UNPACK_TD:
18148 case MISC_BUILTIN_UNPACK_V1TI:
18149 h.uns_p[0] = 1;
18150 break;
18151
18152 /* unsigned arguments for 128-bit pack instructions. */
18153 case MISC_BUILTIN_PACK_TD:
18154 case MISC_BUILTIN_PACK_V1TI:
18155 h.uns_p[1] = 1;
18156 h.uns_p[2] = 1;
18157 break;
18158
18159 /* unsigned second arguments (vector shift right). */
18160 case ALTIVEC_BUILTIN_VSRB:
18161 case ALTIVEC_BUILTIN_VSRH:
18162 case ALTIVEC_BUILTIN_VSRW:
18163 case P8V_BUILTIN_VSRD:
18164 h.uns_p[2] = 1;
18165 break;
18166
18167 default:
18168 break;
18169 }
18170
18171 /* Figure out how many args are present. */
18172 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18173 num_args--;
18174
18175 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18176 if (!ret_type && h.uns_p[0])
18177 ret_type = builtin_mode_to_type[h.mode[0]][0];
18178
18179 if (!ret_type)
18180 fatal_error (input_location,
18181 "internal error: builtin function %s had an unexpected "
18182 "return type %s", name, GET_MODE_NAME (h.mode[0]));
18183
18184 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18185 arg_type[i] = NULL_TREE;
18186
18187 for (i = 0; i < num_args; i++)
18188 {
18189 int m = (int) h.mode[i+1];
18190 int uns_p = h.uns_p[i+1];
18191
18192 arg_type[i] = builtin_mode_to_type[m][uns_p];
18193 if (!arg_type[i] && uns_p)
18194 arg_type[i] = builtin_mode_to_type[m][0];
18195
18196 if (!arg_type[i])
18197 fatal_error (input_location,
18198 "internal error: builtin function %s, argument %d "
18199 "had unexpected argument type %s", name, i,
18200 GET_MODE_NAME (m));
18201 }
18202
18203 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18204 if (*found == NULL)
18205 {
18206 h2 = ggc_alloc<builtin_hash_struct> ();
18207 *h2 = h;
18208 *found = h2;
18209
18210 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18211 arg_type[2], NULL_TREE);
18212 }
18213
18214 return (*found)->type;
18215 }
18216
18217 static void
18218 rs6000_common_init_builtins (void)
18219 {
18220 const struct builtin_description *d;
18221 size_t i;
18222
18223 tree opaque_ftype_opaque = NULL_TREE;
18224 tree opaque_ftype_opaque_opaque = NULL_TREE;
18225 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18226 tree v2si_ftype = NULL_TREE;
18227 tree v2si_ftype_qi = NULL_TREE;
18228 tree v2si_ftype_v2si_qi = NULL_TREE;
18229 tree v2si_ftype_int_qi = NULL_TREE;
18230 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18231
18232 if (!TARGET_PAIRED_FLOAT)
18233 {
18234 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18235 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18236 }
18237
18238 /* Paired builtins are only available if you build a compiler with the
18239 appropriate options, so only create those builtins with the appropriate
18240 compiler option. Create Altivec and VSX builtins on machines with at
18241 least the general purpose extensions (970 and newer) to allow the use of
18242 the target attribute.. */
18243
18244 if (TARGET_EXTRA_BUILTINS)
18245 builtin_mask |= RS6000_BTM_COMMON;
18246
18247 /* Add the ternary operators. */
18248 d = bdesc_3arg;
18249 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18250 {
18251 tree type;
18252 HOST_WIDE_INT mask = d->mask;
18253
18254 if ((mask & builtin_mask) != mask)
18255 {
18256 if (TARGET_DEBUG_BUILTIN)
18257 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18258 continue;
18259 }
18260
18261 if (rs6000_overloaded_builtin_p (d->code))
18262 {
18263 if (! (type = opaque_ftype_opaque_opaque_opaque))
18264 type = opaque_ftype_opaque_opaque_opaque
18265 = build_function_type_list (opaque_V4SI_type_node,
18266 opaque_V4SI_type_node,
18267 opaque_V4SI_type_node,
18268 opaque_V4SI_type_node,
18269 NULL_TREE);
18270 }
18271 else
18272 {
18273 enum insn_code icode = d->icode;
18274 if (d->name == 0)
18275 {
18276 if (TARGET_DEBUG_BUILTIN)
18277 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18278 (long unsigned)i);
18279
18280 continue;
18281 }
18282
18283 if (icode == CODE_FOR_nothing)
18284 {
18285 if (TARGET_DEBUG_BUILTIN)
18286 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18287 d->name);
18288
18289 continue;
18290 }
18291
18292 type = builtin_function_type (insn_data[icode].operand[0].mode,
18293 insn_data[icode].operand[1].mode,
18294 insn_data[icode].operand[2].mode,
18295 insn_data[icode].operand[3].mode,
18296 d->code, d->name);
18297 }
18298
18299 def_builtin (d->name, type, d->code);
18300 }
18301
18302 /* Add the binary operators. */
18303 d = bdesc_2arg;
18304 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18305 {
18306 machine_mode mode0, mode1, mode2;
18307 tree type;
18308 HOST_WIDE_INT mask = d->mask;
18309
18310 if ((mask & builtin_mask) != mask)
18311 {
18312 if (TARGET_DEBUG_BUILTIN)
18313 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18314 continue;
18315 }
18316
18317 if (rs6000_overloaded_builtin_p (d->code))
18318 {
18319 if (! (type = opaque_ftype_opaque_opaque))
18320 type = opaque_ftype_opaque_opaque
18321 = build_function_type_list (opaque_V4SI_type_node,
18322 opaque_V4SI_type_node,
18323 opaque_V4SI_type_node,
18324 NULL_TREE);
18325 }
18326 else
18327 {
18328 enum insn_code icode = d->icode;
18329 if (d->name == 0)
18330 {
18331 if (TARGET_DEBUG_BUILTIN)
18332 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18333 (long unsigned)i);
18334
18335 continue;
18336 }
18337
18338 if (icode == CODE_FOR_nothing)
18339 {
18340 if (TARGET_DEBUG_BUILTIN)
18341 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18342 d->name);
18343
18344 continue;
18345 }
18346
18347 mode0 = insn_data[icode].operand[0].mode;
18348 mode1 = insn_data[icode].operand[1].mode;
18349 mode2 = insn_data[icode].operand[2].mode;
18350
18351 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18352 {
18353 if (! (type = v2si_ftype_v2si_qi))
18354 type = v2si_ftype_v2si_qi
18355 = build_function_type_list (opaque_V2SI_type_node,
18356 opaque_V2SI_type_node,
18357 char_type_node,
18358 NULL_TREE);
18359 }
18360
18361 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18362 && mode2 == QImode)
18363 {
18364 if (! (type = v2si_ftype_int_qi))
18365 type = v2si_ftype_int_qi
18366 = build_function_type_list (opaque_V2SI_type_node,
18367 integer_type_node,
18368 char_type_node,
18369 NULL_TREE);
18370 }
18371
18372 else
18373 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18374 d->code, d->name);
18375 }
18376
18377 def_builtin (d->name, type, d->code);
18378 }
18379
18380 /* Add the simple unary operators. */
18381 d = bdesc_1arg;
18382 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18383 {
18384 machine_mode mode0, mode1;
18385 tree type;
18386 HOST_WIDE_INT mask = d->mask;
18387
18388 if ((mask & builtin_mask) != mask)
18389 {
18390 if (TARGET_DEBUG_BUILTIN)
18391 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18392 continue;
18393 }
18394
18395 if (rs6000_overloaded_builtin_p (d->code))
18396 {
18397 if (! (type = opaque_ftype_opaque))
18398 type = opaque_ftype_opaque
18399 = build_function_type_list (opaque_V4SI_type_node,
18400 opaque_V4SI_type_node,
18401 NULL_TREE);
18402 }
18403 else
18404 {
18405 enum insn_code icode = d->icode;
18406 if (d->name == 0)
18407 {
18408 if (TARGET_DEBUG_BUILTIN)
18409 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18410 (long unsigned)i);
18411
18412 continue;
18413 }
18414
18415 if (icode == CODE_FOR_nothing)
18416 {
18417 if (TARGET_DEBUG_BUILTIN)
18418 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18419 d->name);
18420
18421 continue;
18422 }
18423
18424 mode0 = insn_data[icode].operand[0].mode;
18425 mode1 = insn_data[icode].operand[1].mode;
18426
18427 if (mode0 == V2SImode && mode1 == QImode)
18428 {
18429 if (! (type = v2si_ftype_qi))
18430 type = v2si_ftype_qi
18431 = build_function_type_list (opaque_V2SI_type_node,
18432 char_type_node,
18433 NULL_TREE);
18434 }
18435
18436 else
18437 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18438 d->code, d->name);
18439 }
18440
18441 def_builtin (d->name, type, d->code);
18442 }
18443
18444 /* Add the simple no-argument operators. */
18445 d = bdesc_0arg;
18446 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18447 {
18448 machine_mode mode0;
18449 tree type;
18450 HOST_WIDE_INT mask = d->mask;
18451
18452 if ((mask & builtin_mask) != mask)
18453 {
18454 if (TARGET_DEBUG_BUILTIN)
18455 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18456 continue;
18457 }
18458 if (rs6000_overloaded_builtin_p (d->code))
18459 {
18460 if (!opaque_ftype_opaque)
18461 opaque_ftype_opaque
18462 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18463 type = opaque_ftype_opaque;
18464 }
18465 else
18466 {
18467 enum insn_code icode = d->icode;
18468 if (d->name == 0)
18469 {
18470 if (TARGET_DEBUG_BUILTIN)
18471 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18472 (long unsigned) i);
18473 continue;
18474 }
18475 if (icode == CODE_FOR_nothing)
18476 {
18477 if (TARGET_DEBUG_BUILTIN)
18478 fprintf (stderr,
18479 "rs6000_builtin, skip no-argument %s (no code)\n",
18480 d->name);
18481 continue;
18482 }
18483 mode0 = insn_data[icode].operand[0].mode;
18484 if (mode0 == V2SImode)
18485 {
18486 /* code for paired single */
18487 if (! (type = v2si_ftype))
18488 {
18489 v2si_ftype
18490 = build_function_type_list (opaque_V2SI_type_node,
18491 NULL_TREE);
18492 type = v2si_ftype;
18493 }
18494 }
18495 else
18496 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18497 d->code, d->name);
18498 }
18499 def_builtin (d->name, type, d->code);
18500 }
18501 }
18502
18503 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18504 static void
18505 init_float128_ibm (machine_mode mode)
18506 {
18507 if (!TARGET_XL_COMPAT)
18508 {
18509 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18510 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18511 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18512 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18513
18514 if (!TARGET_HARD_FLOAT)
18515 {
18516 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18517 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18518 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18519 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18520 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18521 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18522 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18523 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18524
18525 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18526 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18527 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18528 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18529 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18530 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18531 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18532 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18533 }
18534 }
18535 else
18536 {
18537 set_optab_libfunc (add_optab, mode, "_xlqadd");
18538 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18539 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18540 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18541 }
18542
18543 /* Add various conversions for IFmode to use the traditional TFmode
18544 names. */
18545 if (mode == IFmode)
18546 {
18547 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18548 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18549 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18550 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18551 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18552 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18553
18554 if (TARGET_POWERPC64)
18555 {
18556 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18557 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18558 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18559 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18560 }
18561 }
18562 }
18563
18564 /* Set up IEEE 128-bit floating point routines. Use different names if the
18565 arguments can be passed in a vector register. The historical PowerPC
18566 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18567 continue to use that if we aren't using vector registers to pass IEEE
18568 128-bit floating point. */
18569
18570 static void
18571 init_float128_ieee (machine_mode mode)
18572 {
18573 if (FLOAT128_VECTOR_P (mode))
18574 {
18575 set_optab_libfunc (add_optab, mode, "__addkf3");
18576 set_optab_libfunc (sub_optab, mode, "__subkf3");
18577 set_optab_libfunc (neg_optab, mode, "__negkf2");
18578 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18579 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18580 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18581 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18582
18583 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18584 set_optab_libfunc (ne_optab, mode, "__nekf2");
18585 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18586 set_optab_libfunc (ge_optab, mode, "__gekf2");
18587 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18588 set_optab_libfunc (le_optab, mode, "__lekf2");
18589 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18590
18591 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18592 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18593 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18594 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18595
18596 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18597 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18598 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18599
18600 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18601 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18602 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18603
18604 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18605 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18606 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18607 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18608 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18609 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18610
18611 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18612 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18613 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18614 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18615
18616 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18617 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18618 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18619 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18620
18621 if (TARGET_POWERPC64)
18622 {
18623 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18624 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18625 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18626 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18627 }
18628 }
18629
18630 else
18631 {
18632 set_optab_libfunc (add_optab, mode, "_q_add");
18633 set_optab_libfunc (sub_optab, mode, "_q_sub");
18634 set_optab_libfunc (neg_optab, mode, "_q_neg");
18635 set_optab_libfunc (smul_optab, mode, "_q_mul");
18636 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18637 if (TARGET_PPC_GPOPT)
18638 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18639
18640 set_optab_libfunc (eq_optab, mode, "_q_feq");
18641 set_optab_libfunc (ne_optab, mode, "_q_fne");
18642 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18643 set_optab_libfunc (ge_optab, mode, "_q_fge");
18644 set_optab_libfunc (lt_optab, mode, "_q_flt");
18645 set_optab_libfunc (le_optab, mode, "_q_fle");
18646
18647 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18648 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18649 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18650 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18651 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18652 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18653 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18654 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18655 }
18656 }
18657
18658 static void
18659 rs6000_init_libfuncs (void)
18660 {
18661 /* __float128 support. */
18662 if (TARGET_FLOAT128_TYPE)
18663 {
18664 init_float128_ibm (IFmode);
18665 init_float128_ieee (KFmode);
18666 }
18667
18668 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18669 if (TARGET_LONG_DOUBLE_128)
18670 {
18671 if (!TARGET_IEEEQUAD)
18672 init_float128_ibm (TFmode);
18673
18674 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18675 else
18676 init_float128_ieee (TFmode);
18677 }
18678 }
18679
18680 /* Emit a potentially record-form instruction, setting DST from SRC.
18681 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18682 signed comparison of DST with zero. If DOT is 1, the generated RTL
18683 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18684 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18685 a separate COMPARE. */
18686
18687 void
18688 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18689 {
18690 if (dot == 0)
18691 {
18692 emit_move_insn (dst, src);
18693 return;
18694 }
18695
18696 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18697 {
18698 emit_move_insn (dst, src);
18699 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18700 return;
18701 }
18702
18703 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18704 if (dot == 1)
18705 {
18706 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18707 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18708 }
18709 else
18710 {
18711 rtx set = gen_rtx_SET (dst, src);
18712 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18713 }
18714 }
18715
18716 \f
18717 /* A validation routine: say whether CODE, a condition code, and MODE
18718 match. The other alternatives either don't make sense or should
18719 never be generated. */
18720
18721 void
18722 validate_condition_mode (enum rtx_code code, machine_mode mode)
18723 {
18724 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18725 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18726 && GET_MODE_CLASS (mode) == MODE_CC);
18727
18728 /* These don't make sense. */
18729 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18730 || mode != CCUNSmode);
18731
18732 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18733 || mode == CCUNSmode);
18734
18735 gcc_assert (mode == CCFPmode
18736 || (code != ORDERED && code != UNORDERED
18737 && code != UNEQ && code != LTGT
18738 && code != UNGT && code != UNLT
18739 && code != UNGE && code != UNLE));
18740
18741 /* These should never be generated except for
18742 flag_finite_math_only. */
18743 gcc_assert (mode != CCFPmode
18744 || flag_finite_math_only
18745 || (code != LE && code != GE
18746 && code != UNEQ && code != LTGT
18747 && code != UNGT && code != UNLT));
18748
18749 /* These are invalid; the information is not there. */
18750 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18751 }
18752
18753 \f
18754 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18755 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18756 not zero, store there the bit offset (counted from the right) where
18757 the single stretch of 1 bits begins; and similarly for B, the bit
18758 offset where it ends. */
18759
18760 bool
18761 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18762 {
18763 unsigned HOST_WIDE_INT val = INTVAL (mask);
18764 unsigned HOST_WIDE_INT bit;
18765 int nb, ne;
18766 int n = GET_MODE_PRECISION (mode);
18767
18768 if (mode != DImode && mode != SImode)
18769 return false;
18770
18771 if (INTVAL (mask) >= 0)
18772 {
18773 bit = val & -val;
18774 ne = exact_log2 (bit);
18775 nb = exact_log2 (val + bit);
18776 }
18777 else if (val + 1 == 0)
18778 {
18779 nb = n;
18780 ne = 0;
18781 }
18782 else if (val & 1)
18783 {
18784 val = ~val;
18785 bit = val & -val;
18786 nb = exact_log2 (bit);
18787 ne = exact_log2 (val + bit);
18788 }
18789 else
18790 {
18791 bit = val & -val;
18792 ne = exact_log2 (bit);
18793 if (val + bit == 0)
18794 nb = n;
18795 else
18796 nb = 0;
18797 }
18798
18799 nb--;
18800
18801 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18802 return false;
18803
18804 if (b)
18805 *b = nb;
18806 if (e)
18807 *e = ne;
18808
18809 return true;
18810 }
18811
18812 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18813 or rldicr instruction, to implement an AND with it in mode MODE. */
18814
18815 bool
18816 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18817 {
18818 int nb, ne;
18819
18820 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18821 return false;
18822
18823 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18824 does not wrap. */
18825 if (mode == DImode)
18826 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18827
18828 /* For SImode, rlwinm can do everything. */
18829 if (mode == SImode)
18830 return (nb < 32 && ne < 32);
18831
18832 return false;
18833 }
18834
18835 /* Return the instruction template for an AND with mask in mode MODE, with
18836 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18837
18838 const char *
18839 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18840 {
18841 int nb, ne;
18842
18843 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18844 gcc_unreachable ();
18845
18846 if (mode == DImode && ne == 0)
18847 {
18848 operands[3] = GEN_INT (63 - nb);
18849 if (dot)
18850 return "rldicl. %0,%1,0,%3";
18851 return "rldicl %0,%1,0,%3";
18852 }
18853
18854 if (mode == DImode && nb == 63)
18855 {
18856 operands[3] = GEN_INT (63 - ne);
18857 if (dot)
18858 return "rldicr. %0,%1,0,%3";
18859 return "rldicr %0,%1,0,%3";
18860 }
18861
18862 if (nb < 32 && ne < 32)
18863 {
18864 operands[3] = GEN_INT (31 - nb);
18865 operands[4] = GEN_INT (31 - ne);
18866 if (dot)
18867 return "rlwinm. %0,%1,0,%3,%4";
18868 return "rlwinm %0,%1,0,%3,%4";
18869 }
18870
18871 gcc_unreachable ();
18872 }
18873
18874 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18875 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18876 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18877
18878 bool
18879 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18880 {
18881 int nb, ne;
18882
18883 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18884 return false;
18885
18886 int n = GET_MODE_PRECISION (mode);
18887 int sh = -1;
18888
18889 if (CONST_INT_P (XEXP (shift, 1)))
18890 {
18891 sh = INTVAL (XEXP (shift, 1));
18892 if (sh < 0 || sh >= n)
18893 return false;
18894 }
18895
18896 rtx_code code = GET_CODE (shift);
18897
18898 /* Convert any shift by 0 to a rotate, to simplify below code. */
18899 if (sh == 0)
18900 code = ROTATE;
18901
18902 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18903 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18904 code = ASHIFT;
18905 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18906 {
18907 code = LSHIFTRT;
18908 sh = n - sh;
18909 }
18910
18911 /* DImode rotates need rld*. */
18912 if (mode == DImode && code == ROTATE)
18913 return (nb == 63 || ne == 0 || ne == sh);
18914
18915 /* SImode rotates need rlw*. */
18916 if (mode == SImode && code == ROTATE)
18917 return (nb < 32 && ne < 32 && sh < 32);
18918
18919 /* Wrap-around masks are only okay for rotates. */
18920 if (ne > nb)
18921 return false;
18922
18923 /* Variable shifts are only okay for rotates. */
18924 if (sh < 0)
18925 return false;
18926
18927 /* Don't allow ASHIFT if the mask is wrong for that. */
18928 if (code == ASHIFT && ne < sh)
18929 return false;
18930
18931 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18932 if the mask is wrong for that. */
18933 if (nb < 32 && ne < 32 && sh < 32
18934 && !(code == LSHIFTRT && nb >= 32 - sh))
18935 return true;
18936
18937 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18938 if the mask is wrong for that. */
18939 if (code == LSHIFTRT)
18940 sh = 64 - sh;
18941 if (nb == 63 || ne == 0 || ne == sh)
18942 return !(code == LSHIFTRT && nb >= sh);
18943
18944 return false;
18945 }
18946
18947 /* Return the instruction template for a shift with mask in mode MODE, with
18948 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18949
18950 const char *
18951 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18952 {
18953 int nb, ne;
18954
18955 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18956 gcc_unreachable ();
18957
18958 if (mode == DImode && ne == 0)
18959 {
18960 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18961 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18962 operands[3] = GEN_INT (63 - nb);
18963 if (dot)
18964 return "rld%I2cl. %0,%1,%2,%3";
18965 return "rld%I2cl %0,%1,%2,%3";
18966 }
18967
18968 if (mode == DImode && nb == 63)
18969 {
18970 operands[3] = GEN_INT (63 - ne);
18971 if (dot)
18972 return "rld%I2cr. %0,%1,%2,%3";
18973 return "rld%I2cr %0,%1,%2,%3";
18974 }
18975
18976 if (mode == DImode
18977 && GET_CODE (operands[4]) != LSHIFTRT
18978 && CONST_INT_P (operands[2])
18979 && ne == INTVAL (operands[2]))
18980 {
18981 operands[3] = GEN_INT (63 - nb);
18982 if (dot)
18983 return "rld%I2c. %0,%1,%2,%3";
18984 return "rld%I2c %0,%1,%2,%3";
18985 }
18986
18987 if (nb < 32 && ne < 32)
18988 {
18989 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18990 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18991 operands[3] = GEN_INT (31 - nb);
18992 operands[4] = GEN_INT (31 - ne);
18993 /* This insn can also be a 64-bit rotate with mask that really makes
18994 it just a shift right (with mask); the %h below are to adjust for
18995 that situation (shift count is >= 32 in that case). */
18996 if (dot)
18997 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18998 return "rlw%I2nm %0,%1,%h2,%3,%4";
18999 }
19000
19001 gcc_unreachable ();
19002 }
19003
19004 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
19005 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
19006 ASHIFT, or LSHIFTRT) in mode MODE. */
19007
19008 bool
19009 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19010 {
19011 int nb, ne;
19012
19013 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19014 return false;
19015
19016 int n = GET_MODE_PRECISION (mode);
19017
19018 int sh = INTVAL (XEXP (shift, 1));
19019 if (sh < 0 || sh >= n)
19020 return false;
19021
19022 rtx_code code = GET_CODE (shift);
19023
19024 /* Convert any shift by 0 to a rotate, to simplify below code. */
19025 if (sh == 0)
19026 code = ROTATE;
19027
19028 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19029 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19030 code = ASHIFT;
19031 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19032 {
19033 code = LSHIFTRT;
19034 sh = n - sh;
19035 }
19036
19037 /* DImode rotates need rldimi. */
19038 if (mode == DImode && code == ROTATE)
19039 return (ne == sh);
19040
19041 /* SImode rotates need rlwimi. */
19042 if (mode == SImode && code == ROTATE)
19043 return (nb < 32 && ne < 32 && sh < 32);
19044
19045 /* Wrap-around masks are only okay for rotates. */
19046 if (ne > nb)
19047 return false;
19048
19049 /* Don't allow ASHIFT if the mask is wrong for that. */
19050 if (code == ASHIFT && ne < sh)
19051 return false;
19052
19053 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19054 if the mask is wrong for that. */
19055 if (nb < 32 && ne < 32 && sh < 32
19056 && !(code == LSHIFTRT && nb >= 32 - sh))
19057 return true;
19058
19059 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19060 if the mask is wrong for that. */
19061 if (code == LSHIFTRT)
19062 sh = 64 - sh;
19063 if (ne == sh)
19064 return !(code == LSHIFTRT && nb >= sh);
19065
19066 return false;
19067 }
19068
19069 /* Return the instruction template for an insert with mask in mode MODE, with
19070 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19071
19072 const char *
19073 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19074 {
19075 int nb, ne;
19076
19077 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19078 gcc_unreachable ();
19079
19080 /* Prefer rldimi because rlwimi is cracked. */
19081 if (TARGET_POWERPC64
19082 && (!dot || mode == DImode)
19083 && GET_CODE (operands[4]) != LSHIFTRT
19084 && ne == INTVAL (operands[2]))
19085 {
19086 operands[3] = GEN_INT (63 - nb);
19087 if (dot)
19088 return "rldimi. %0,%1,%2,%3";
19089 return "rldimi %0,%1,%2,%3";
19090 }
19091
19092 if (nb < 32 && ne < 32)
19093 {
19094 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19095 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19096 operands[3] = GEN_INT (31 - nb);
19097 operands[4] = GEN_INT (31 - ne);
19098 if (dot)
19099 return "rlwimi. %0,%1,%2,%3,%4";
19100 return "rlwimi %0,%1,%2,%3,%4";
19101 }
19102
19103 gcc_unreachable ();
19104 }
19105
19106 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19107 using two machine instructions. */
19108
19109 bool
19110 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19111 {
19112 /* There are two kinds of AND we can handle with two insns:
19113 1) those we can do with two rl* insn;
19114 2) ori[s];xori[s].
19115
19116 We do not handle that last case yet. */
19117
19118 /* If there is just one stretch of ones, we can do it. */
19119 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19120 return true;
19121
19122 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19123 one insn, we can do the whole thing with two. */
19124 unsigned HOST_WIDE_INT val = INTVAL (c);
19125 unsigned HOST_WIDE_INT bit1 = val & -val;
19126 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19127 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19128 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19129 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19130 }
19131
19132 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19133 If EXPAND is true, split rotate-and-mask instructions we generate to
19134 their constituent parts as well (this is used during expand); if DOT
19135 is 1, make the last insn a record-form instruction clobbering the
19136 destination GPR and setting the CC reg (from operands[3]); if 2, set
19137 that GPR as well as the CC reg. */
19138
19139 void
19140 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19141 {
19142 gcc_assert (!(expand && dot));
19143
19144 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19145
19146 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19147 shift right. This generates better code than doing the masks without
19148 shifts, or shifting first right and then left. */
19149 int nb, ne;
19150 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19151 {
19152 gcc_assert (mode == DImode);
19153
19154 int shift = 63 - nb;
19155 if (expand)
19156 {
19157 rtx tmp1 = gen_reg_rtx (DImode);
19158 rtx tmp2 = gen_reg_rtx (DImode);
19159 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19160 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19161 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19162 }
19163 else
19164 {
19165 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19166 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19167 emit_move_insn (operands[0], tmp);
19168 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19169 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19170 }
19171 return;
19172 }
19173
19174 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19175 that does the rest. */
19176 unsigned HOST_WIDE_INT bit1 = val & -val;
19177 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19178 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19179 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19180
19181 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19182 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19183
19184 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19185
19186 /* Two "no-rotate"-and-mask instructions, for SImode. */
19187 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19188 {
19189 gcc_assert (mode == SImode);
19190
19191 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19192 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19193 emit_move_insn (reg, tmp);
19194 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19195 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19196 return;
19197 }
19198
19199 gcc_assert (mode == DImode);
19200
19201 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19202 insns; we have to do the first in SImode, because it wraps. */
19203 if (mask2 <= 0xffffffff
19204 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19205 {
19206 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19207 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19208 GEN_INT (mask1));
19209 rtx reg_low = gen_lowpart (SImode, reg);
19210 emit_move_insn (reg_low, tmp);
19211 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19212 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19213 return;
19214 }
19215
19216 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19217 at the top end), rotate back and clear the other hole. */
19218 int right = exact_log2 (bit3);
19219 int left = 64 - right;
19220
19221 /* Rotate the mask too. */
19222 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19223
19224 if (expand)
19225 {
19226 rtx tmp1 = gen_reg_rtx (DImode);
19227 rtx tmp2 = gen_reg_rtx (DImode);
19228 rtx tmp3 = gen_reg_rtx (DImode);
19229 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19230 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19231 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19232 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19233 }
19234 else
19235 {
19236 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19237 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19238 emit_move_insn (operands[0], tmp);
19239 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19240 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19241 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19242 }
19243 }
19244 \f
19245 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19246 for lfq and stfq insns iff the registers are hard registers. */
19247
19248 int
19249 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19250 {
19251 /* We might have been passed a SUBREG. */
19252 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19253 return 0;
19254
19255 /* We might have been passed non floating point registers. */
19256 if (!FP_REGNO_P (REGNO (reg1))
19257 || !FP_REGNO_P (REGNO (reg2)))
19258 return 0;
19259
19260 return (REGNO (reg1) == REGNO (reg2) - 1);
19261 }
19262
19263 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19264 addr1 and addr2 must be in consecutive memory locations
19265 (addr2 == addr1 + 8). */
19266
19267 int
19268 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19269 {
19270 rtx addr1, addr2;
19271 unsigned int reg1, reg2;
19272 int offset1, offset2;
19273
19274 /* The mems cannot be volatile. */
19275 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19276 return 0;
19277
19278 addr1 = XEXP (mem1, 0);
19279 addr2 = XEXP (mem2, 0);
19280
19281 /* Extract an offset (if used) from the first addr. */
19282 if (GET_CODE (addr1) == PLUS)
19283 {
19284 /* If not a REG, return zero. */
19285 if (GET_CODE (XEXP (addr1, 0)) != REG)
19286 return 0;
19287 else
19288 {
19289 reg1 = REGNO (XEXP (addr1, 0));
19290 /* The offset must be constant! */
19291 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19292 return 0;
19293 offset1 = INTVAL (XEXP (addr1, 1));
19294 }
19295 }
19296 else if (GET_CODE (addr1) != REG)
19297 return 0;
19298 else
19299 {
19300 reg1 = REGNO (addr1);
19301 /* This was a simple (mem (reg)) expression. Offset is 0. */
19302 offset1 = 0;
19303 }
19304
19305 /* And now for the second addr. */
19306 if (GET_CODE (addr2) == PLUS)
19307 {
19308 /* If not a REG, return zero. */
19309 if (GET_CODE (XEXP (addr2, 0)) != REG)
19310 return 0;
19311 else
19312 {
19313 reg2 = REGNO (XEXP (addr2, 0));
19314 /* The offset must be constant. */
19315 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19316 return 0;
19317 offset2 = INTVAL (XEXP (addr2, 1));
19318 }
19319 }
19320 else if (GET_CODE (addr2) != REG)
19321 return 0;
19322 else
19323 {
19324 reg2 = REGNO (addr2);
19325 /* This was a simple (mem (reg)) expression. Offset is 0. */
19326 offset2 = 0;
19327 }
19328
19329 /* Both of these must have the same base register. */
19330 if (reg1 != reg2)
19331 return 0;
19332
19333 /* The offset for the second addr must be 8 more than the first addr. */
19334 if (offset2 != offset1 + 8)
19335 return 0;
19336
19337 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19338 instructions. */
19339 return 1;
19340 }
19341 \f
19342
19343 rtx
19344 rs6000_secondary_memory_needed_rtx (machine_mode mode)
19345 {
19346 static bool eliminated = false;
19347 rtx ret;
19348
19349 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
19350 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19351 else
19352 {
19353 rtx mem = cfun->machine->sdmode_stack_slot;
19354 gcc_assert (mem != NULL_RTX);
19355
19356 if (!eliminated)
19357 {
19358 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
19359 cfun->machine->sdmode_stack_slot = mem;
19360 eliminated = true;
19361 }
19362 ret = mem;
19363 }
19364
19365 if (TARGET_DEBUG_ADDR)
19366 {
19367 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
19368 GET_MODE_NAME (mode));
19369 if (!ret)
19370 fprintf (stderr, "\tNULL_RTX\n");
19371 else
19372 debug_rtx (ret);
19373 }
19374
19375 return ret;
19376 }
19377
19378 /* Return the mode to be used for memory when a secondary memory
19379 location is needed. For SDmode values we need to use DDmode, in
19380 all other cases we can use the same mode. */
19381 machine_mode
19382 rs6000_secondary_memory_needed_mode (machine_mode mode)
19383 {
19384 if (lra_in_progress && mode == SDmode)
19385 return DDmode;
19386 return mode;
19387 }
19388
19389 static tree
19390 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
19391 {
19392 /* Don't walk into types. */
19393 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
19394 {
19395 *walk_subtrees = 0;
19396 return NULL_TREE;
19397 }
19398
19399 switch (TREE_CODE (*tp))
19400 {
19401 case VAR_DECL:
19402 case PARM_DECL:
19403 case FIELD_DECL:
19404 case RESULT_DECL:
19405 case SSA_NAME:
19406 case REAL_CST:
19407 case MEM_REF:
19408 case VIEW_CONVERT_EXPR:
19409 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
19410 return *tp;
19411 break;
19412 default:
19413 break;
19414 }
19415
19416 return NULL_TREE;
19417 }
19418
19419 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19420 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19421 only work on the traditional altivec registers, note if an altivec register
19422 was chosen. */
19423
19424 static enum rs6000_reg_type
19425 register_to_reg_type (rtx reg, bool *is_altivec)
19426 {
19427 HOST_WIDE_INT regno;
19428 enum reg_class rclass;
19429
19430 if (GET_CODE (reg) == SUBREG)
19431 reg = SUBREG_REG (reg);
19432
19433 if (!REG_P (reg))
19434 return NO_REG_TYPE;
19435
19436 regno = REGNO (reg);
19437 if (regno >= FIRST_PSEUDO_REGISTER)
19438 {
19439 if (!lra_in_progress && !reload_in_progress && !reload_completed)
19440 return PSEUDO_REG_TYPE;
19441
19442 regno = true_regnum (reg);
19443 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19444 return PSEUDO_REG_TYPE;
19445 }
19446
19447 gcc_assert (regno >= 0);
19448
19449 if (is_altivec && ALTIVEC_REGNO_P (regno))
19450 *is_altivec = true;
19451
19452 rclass = rs6000_regno_regclass[regno];
19453 return reg_class_to_reg_type[(int)rclass];
19454 }
19455
19456 /* Helper function to return the cost of adding a TOC entry address. */
19457
19458 static inline int
19459 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19460 {
19461 int ret;
19462
19463 if (TARGET_CMODEL != CMODEL_SMALL)
19464 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19465
19466 else
19467 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19468
19469 return ret;
19470 }
19471
19472 /* Helper function for rs6000_secondary_reload to determine whether the memory
19473 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19474 needs reloading. Return negative if the memory is not handled by the memory
19475 helper functions and to try a different reload method, 0 if no additional
19476 instructions are need, and positive to give the extra cost for the
19477 memory. */
19478
19479 static int
19480 rs6000_secondary_reload_memory (rtx addr,
19481 enum reg_class rclass,
19482 machine_mode mode)
19483 {
19484 int extra_cost = 0;
19485 rtx reg, and_arg, plus_arg0, plus_arg1;
19486 addr_mask_type addr_mask;
19487 const char *type = NULL;
19488 const char *fail_msg = NULL;
19489
19490 if (GPR_REG_CLASS_P (rclass))
19491 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19492
19493 else if (rclass == FLOAT_REGS)
19494 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19495
19496 else if (rclass == ALTIVEC_REGS)
19497 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19498
19499 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19500 else if (rclass == VSX_REGS)
19501 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19502 & ~RELOAD_REG_AND_M16);
19503
19504 /* If the register allocator hasn't made up its mind yet on the register
19505 class to use, settle on defaults to use. */
19506 else if (rclass == NO_REGS)
19507 {
19508 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19509 & ~RELOAD_REG_AND_M16);
19510
19511 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19512 addr_mask &= ~(RELOAD_REG_INDEXED
19513 | RELOAD_REG_PRE_INCDEC
19514 | RELOAD_REG_PRE_MODIFY);
19515 }
19516
19517 else
19518 addr_mask = 0;
19519
19520 /* If the register isn't valid in this register class, just return now. */
19521 if ((addr_mask & RELOAD_REG_VALID) == 0)
19522 {
19523 if (TARGET_DEBUG_ADDR)
19524 {
19525 fprintf (stderr,
19526 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19527 "not valid in class\n",
19528 GET_MODE_NAME (mode), reg_class_names[rclass]);
19529 debug_rtx (addr);
19530 }
19531
19532 return -1;
19533 }
19534
19535 switch (GET_CODE (addr))
19536 {
19537 /* Does the register class supports auto update forms for this mode? We
19538 don't need a scratch register, since the powerpc only supports
19539 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19540 case PRE_INC:
19541 case PRE_DEC:
19542 reg = XEXP (addr, 0);
19543 if (!base_reg_operand (addr, GET_MODE (reg)))
19544 {
19545 fail_msg = "no base register #1";
19546 extra_cost = -1;
19547 }
19548
19549 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19550 {
19551 extra_cost = 1;
19552 type = "update";
19553 }
19554 break;
19555
19556 case PRE_MODIFY:
19557 reg = XEXP (addr, 0);
19558 plus_arg1 = XEXP (addr, 1);
19559 if (!base_reg_operand (reg, GET_MODE (reg))
19560 || GET_CODE (plus_arg1) != PLUS
19561 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19562 {
19563 fail_msg = "bad PRE_MODIFY";
19564 extra_cost = -1;
19565 }
19566
19567 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19568 {
19569 extra_cost = 1;
19570 type = "update";
19571 }
19572 break;
19573
19574 /* Do we need to simulate AND -16 to clear the bottom address bits used
19575 in VMX load/stores? Only allow the AND for vector sizes. */
19576 case AND:
19577 and_arg = XEXP (addr, 0);
19578 if (GET_MODE_SIZE (mode) != 16
19579 || GET_CODE (XEXP (addr, 1)) != CONST_INT
19580 || INTVAL (XEXP (addr, 1)) != -16)
19581 {
19582 fail_msg = "bad Altivec AND #1";
19583 extra_cost = -1;
19584 }
19585
19586 if (rclass != ALTIVEC_REGS)
19587 {
19588 if (legitimate_indirect_address_p (and_arg, false))
19589 extra_cost = 1;
19590
19591 else if (legitimate_indexed_address_p (and_arg, false))
19592 extra_cost = 2;
19593
19594 else
19595 {
19596 fail_msg = "bad Altivec AND #2";
19597 extra_cost = -1;
19598 }
19599
19600 type = "and";
19601 }
19602 break;
19603
19604 /* If this is an indirect address, make sure it is a base register. */
19605 case REG:
19606 case SUBREG:
19607 if (!legitimate_indirect_address_p (addr, false))
19608 {
19609 extra_cost = 1;
19610 type = "move";
19611 }
19612 break;
19613
19614 /* If this is an indexed address, make sure the register class can handle
19615 indexed addresses for this mode. */
19616 case PLUS:
19617 plus_arg0 = XEXP (addr, 0);
19618 plus_arg1 = XEXP (addr, 1);
19619
19620 /* (plus (plus (reg) (constant)) (constant)) is generated during
19621 push_reload processing, so handle it now. */
19622 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19623 {
19624 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19625 {
19626 extra_cost = 1;
19627 type = "offset";
19628 }
19629 }
19630
19631 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19632 push_reload processing, so handle it now. */
19633 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19634 {
19635 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19636 {
19637 extra_cost = 1;
19638 type = "indexed #2";
19639 }
19640 }
19641
19642 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19643 {
19644 fail_msg = "no base register #2";
19645 extra_cost = -1;
19646 }
19647
19648 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19649 {
19650 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19651 || !legitimate_indexed_address_p (addr, false))
19652 {
19653 extra_cost = 1;
19654 type = "indexed";
19655 }
19656 }
19657
19658 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19659 && CONST_INT_P (plus_arg1))
19660 {
19661 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19662 {
19663 extra_cost = 1;
19664 type = "vector d-form offset";
19665 }
19666 }
19667
19668 /* Make sure the register class can handle offset addresses. */
19669 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19670 {
19671 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19672 {
19673 extra_cost = 1;
19674 type = "offset #2";
19675 }
19676 }
19677
19678 else
19679 {
19680 fail_msg = "bad PLUS";
19681 extra_cost = -1;
19682 }
19683
19684 break;
19685
19686 case LO_SUM:
19687 /* Quad offsets are restricted and can't handle normal addresses. */
19688 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19689 {
19690 extra_cost = -1;
19691 type = "vector d-form lo_sum";
19692 }
19693
19694 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19695 {
19696 fail_msg = "bad LO_SUM";
19697 extra_cost = -1;
19698 }
19699
19700 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19701 {
19702 extra_cost = 1;
19703 type = "lo_sum";
19704 }
19705 break;
19706
19707 /* Static addresses need to create a TOC entry. */
19708 case CONST:
19709 case SYMBOL_REF:
19710 case LABEL_REF:
19711 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19712 {
19713 extra_cost = -1;
19714 type = "vector d-form lo_sum #2";
19715 }
19716
19717 else
19718 {
19719 type = "address";
19720 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19721 }
19722 break;
19723
19724 /* TOC references look like offsetable memory. */
19725 case UNSPEC:
19726 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19727 {
19728 fail_msg = "bad UNSPEC";
19729 extra_cost = -1;
19730 }
19731
19732 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19733 {
19734 extra_cost = -1;
19735 type = "vector d-form lo_sum #3";
19736 }
19737
19738 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19739 {
19740 extra_cost = 1;
19741 type = "toc reference";
19742 }
19743 break;
19744
19745 default:
19746 {
19747 fail_msg = "bad address";
19748 extra_cost = -1;
19749 }
19750 }
19751
19752 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19753 {
19754 if (extra_cost < 0)
19755 fprintf (stderr,
19756 "rs6000_secondary_reload_memory error: mode = %s, "
19757 "class = %s, addr_mask = '%s', %s\n",
19758 GET_MODE_NAME (mode),
19759 reg_class_names[rclass],
19760 rs6000_debug_addr_mask (addr_mask, false),
19761 (fail_msg != NULL) ? fail_msg : "<bad address>");
19762
19763 else
19764 fprintf (stderr,
19765 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19766 "addr_mask = '%s', extra cost = %d, %s\n",
19767 GET_MODE_NAME (mode),
19768 reg_class_names[rclass],
19769 rs6000_debug_addr_mask (addr_mask, false),
19770 extra_cost,
19771 (type) ? type : "<none>");
19772
19773 debug_rtx (addr);
19774 }
19775
19776 return extra_cost;
19777 }
19778
19779 /* Helper function for rs6000_secondary_reload to return true if a move to a
19780 different register classe is really a simple move. */
19781
19782 static bool
19783 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19784 enum rs6000_reg_type from_type,
19785 machine_mode mode)
19786 {
19787 int size = GET_MODE_SIZE (mode);
19788
19789 /* Add support for various direct moves available. In this function, we only
19790 look at cases where we don't need any extra registers, and one or more
19791 simple move insns are issued. Originally small integers are not allowed
19792 in FPR/VSX registers. Single precision binary floating is not a simple
19793 move because we need to convert to the single precision memory layout.
19794 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19795 need special direct move handling, which we do not support yet. */
19796 if (TARGET_DIRECT_MOVE
19797 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19798 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19799 {
19800 if (TARGET_POWERPC64)
19801 {
19802 /* ISA 2.07: MTVSRD or MVFVSRD. */
19803 if (size == 8)
19804 return true;
19805
19806 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19807 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19808 return true;
19809 }
19810
19811 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19812 if (TARGET_VSX_SMALL_INTEGER)
19813 {
19814 if (mode == SImode)
19815 return true;
19816
19817 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19818 return true;
19819 }
19820
19821 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19822 if (mode == SDmode)
19823 return true;
19824 }
19825
19826 /* Power6+: MFTGPR or MFFGPR. */
19827 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19828 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19829 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19830 return true;
19831
19832 /* Move to/from SPR. */
19833 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19834 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19835 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19836 return true;
19837
19838 return false;
19839 }
19840
19841 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19842 special direct moves that involve allocating an extra register, return the
19843 insn code of the helper function if there is such a function or
19844 CODE_FOR_nothing if not. */
19845
19846 static bool
19847 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19848 enum rs6000_reg_type from_type,
19849 machine_mode mode,
19850 secondary_reload_info *sri,
19851 bool altivec_p)
19852 {
19853 bool ret = false;
19854 enum insn_code icode = CODE_FOR_nothing;
19855 int cost = 0;
19856 int size = GET_MODE_SIZE (mode);
19857
19858 if (TARGET_POWERPC64 && size == 16)
19859 {
19860 /* Handle moving 128-bit values from GPRs to VSX point registers on
19861 ISA 2.07 (power8, power9) when running in 64-bit mode using
19862 XXPERMDI to glue the two 64-bit values back together. */
19863 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19864 {
19865 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19866 icode = reg_addr[mode].reload_vsx_gpr;
19867 }
19868
19869 /* Handle moving 128-bit values from VSX point registers to GPRs on
19870 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19871 bottom 64-bit value. */
19872 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19873 {
19874 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19875 icode = reg_addr[mode].reload_gpr_vsx;
19876 }
19877 }
19878
19879 else if (TARGET_POWERPC64 && mode == SFmode)
19880 {
19881 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19882 {
19883 cost = 3; /* xscvdpspn, mfvsrd, and. */
19884 icode = reg_addr[mode].reload_gpr_vsx;
19885 }
19886
19887 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19888 {
19889 cost = 2; /* mtvsrz, xscvspdpn. */
19890 icode = reg_addr[mode].reload_vsx_gpr;
19891 }
19892 }
19893
19894 else if (!TARGET_POWERPC64 && size == 8)
19895 {
19896 /* Handle moving 64-bit values from GPRs to floating point registers on
19897 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19898 32-bit values back together. Altivec register classes must be handled
19899 specially since a different instruction is used, and the secondary
19900 reload support requires a single instruction class in the scratch
19901 register constraint. However, right now TFmode is not allowed in
19902 Altivec registers, so the pattern will never match. */
19903 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19904 {
19905 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19906 icode = reg_addr[mode].reload_fpr_gpr;
19907 }
19908 }
19909
19910 if (icode != CODE_FOR_nothing)
19911 {
19912 ret = true;
19913 if (sri)
19914 {
19915 sri->icode = icode;
19916 sri->extra_cost = cost;
19917 }
19918 }
19919
19920 return ret;
19921 }
19922
19923 /* Return whether a move between two register classes can be done either
19924 directly (simple move) or via a pattern that uses a single extra temporary
19925 (using ISA 2.07's direct move in this case. */
19926
19927 static bool
19928 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19929 enum rs6000_reg_type from_type,
19930 machine_mode mode,
19931 secondary_reload_info *sri,
19932 bool altivec_p)
19933 {
19934 /* Fall back to load/store reloads if either type is not a register. */
19935 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19936 return false;
19937
19938 /* If we haven't allocated registers yet, assume the move can be done for the
19939 standard register types. */
19940 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19941 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19942 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19943 return true;
19944
19945 /* Moves to the same set of registers is a simple move for non-specialized
19946 registers. */
19947 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19948 return true;
19949
19950 /* Check whether a simple move can be done directly. */
19951 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19952 {
19953 if (sri)
19954 {
19955 sri->icode = CODE_FOR_nothing;
19956 sri->extra_cost = 0;
19957 }
19958 return true;
19959 }
19960
19961 /* Now check if we can do it in a few steps. */
19962 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19963 altivec_p);
19964 }
19965
19966 /* Inform reload about cases where moving X with a mode MODE to a register in
19967 RCLASS requires an extra scratch or immediate register. Return the class
19968 needed for the immediate register.
19969
19970 For VSX and Altivec, we may need a register to convert sp+offset into
19971 reg+sp.
19972
19973 For misaligned 64-bit gpr loads and stores we need a register to
19974 convert an offset address to indirect. */
19975
19976 static reg_class_t
19977 rs6000_secondary_reload (bool in_p,
19978 rtx x,
19979 reg_class_t rclass_i,
19980 machine_mode mode,
19981 secondary_reload_info *sri)
19982 {
19983 enum reg_class rclass = (enum reg_class) rclass_i;
19984 reg_class_t ret = ALL_REGS;
19985 enum insn_code icode;
19986 bool default_p = false;
19987 bool done_p = false;
19988
19989 /* Allow subreg of memory before/during reload. */
19990 bool memory_p = (MEM_P (x)
19991 || (!reload_completed && GET_CODE (x) == SUBREG
19992 && MEM_P (SUBREG_REG (x))));
19993
19994 sri->icode = CODE_FOR_nothing;
19995 sri->t_icode = CODE_FOR_nothing;
19996 sri->extra_cost = 0;
19997 icode = ((in_p)
19998 ? reg_addr[mode].reload_load
19999 : reg_addr[mode].reload_store);
20000
20001 if (REG_P (x) || register_operand (x, mode))
20002 {
20003 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
20004 bool altivec_p = (rclass == ALTIVEC_REGS);
20005 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
20006
20007 if (!in_p)
20008 std::swap (to_type, from_type);
20009
20010 /* Can we do a direct move of some sort? */
20011 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
20012 altivec_p))
20013 {
20014 icode = (enum insn_code)sri->icode;
20015 default_p = false;
20016 done_p = true;
20017 ret = NO_REGS;
20018 }
20019 }
20020
20021 /* Make sure 0.0 is not reloaded or forced into memory. */
20022 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
20023 {
20024 ret = NO_REGS;
20025 default_p = false;
20026 done_p = true;
20027 }
20028
20029 /* If this is a scalar floating point value and we want to load it into the
20030 traditional Altivec registers, do it via a move via a traditional floating
20031 point register, unless we have D-form addressing. Also make sure that
20032 non-zero constants use a FPR. */
20033 if (!done_p && reg_addr[mode].scalar_in_vmx_p
20034 && !mode_supports_vmx_dform (mode)
20035 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20036 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
20037 {
20038 ret = FLOAT_REGS;
20039 default_p = false;
20040 done_p = true;
20041 }
20042
20043 /* Handle reload of load/stores if we have reload helper functions. */
20044 if (!done_p && icode != CODE_FOR_nothing && memory_p)
20045 {
20046 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
20047 mode);
20048
20049 if (extra_cost >= 0)
20050 {
20051 done_p = true;
20052 ret = NO_REGS;
20053 if (extra_cost > 0)
20054 {
20055 sri->extra_cost = extra_cost;
20056 sri->icode = icode;
20057 }
20058 }
20059 }
20060
20061 /* Handle unaligned loads and stores of integer registers. */
20062 if (!done_p && TARGET_POWERPC64
20063 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20064 && memory_p
20065 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
20066 {
20067 rtx addr = XEXP (x, 0);
20068 rtx off = address_offset (addr);
20069
20070 if (off != NULL_RTX)
20071 {
20072 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20073 unsigned HOST_WIDE_INT offset = INTVAL (off);
20074
20075 /* We need a secondary reload when our legitimate_address_p
20076 says the address is good (as otherwise the entire address
20077 will be reloaded), and the offset is not a multiple of
20078 four or we have an address wrap. Address wrap will only
20079 occur for LO_SUMs since legitimate_offset_address_p
20080 rejects addresses for 16-byte mems that will wrap. */
20081 if (GET_CODE (addr) == LO_SUM
20082 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20083 && ((offset & 3) != 0
20084 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20085 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20086 && (offset & 3) != 0))
20087 {
20088 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20089 if (in_p)
20090 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20091 : CODE_FOR_reload_di_load);
20092 else
20093 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20094 : CODE_FOR_reload_di_store);
20095 sri->extra_cost = 2;
20096 ret = NO_REGS;
20097 done_p = true;
20098 }
20099 else
20100 default_p = true;
20101 }
20102 else
20103 default_p = true;
20104 }
20105
20106 if (!done_p && !TARGET_POWERPC64
20107 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20108 && memory_p
20109 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20110 {
20111 rtx addr = XEXP (x, 0);
20112 rtx off = address_offset (addr);
20113
20114 if (off != NULL_RTX)
20115 {
20116 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20117 unsigned HOST_WIDE_INT offset = INTVAL (off);
20118
20119 /* We need a secondary reload when our legitimate_address_p
20120 says the address is good (as otherwise the entire address
20121 will be reloaded), and we have a wrap.
20122
20123 legitimate_lo_sum_address_p allows LO_SUM addresses to
20124 have any offset so test for wrap in the low 16 bits.
20125
20126 legitimate_offset_address_p checks for the range
20127 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20128 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20129 [0x7ff4,0x7fff] respectively, so test for the
20130 intersection of these ranges, [0x7ffc,0x7fff] and
20131 [0x7ff4,0x7ff7] respectively.
20132
20133 Note that the address we see here may have been
20134 manipulated by legitimize_reload_address. */
20135 if (GET_CODE (addr) == LO_SUM
20136 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20137 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20138 {
20139 if (in_p)
20140 sri->icode = CODE_FOR_reload_si_load;
20141 else
20142 sri->icode = CODE_FOR_reload_si_store;
20143 sri->extra_cost = 2;
20144 ret = NO_REGS;
20145 done_p = true;
20146 }
20147 else
20148 default_p = true;
20149 }
20150 else
20151 default_p = true;
20152 }
20153
20154 if (!done_p)
20155 default_p = true;
20156
20157 if (default_p)
20158 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20159
20160 gcc_assert (ret != ALL_REGS);
20161
20162 if (TARGET_DEBUG_ADDR)
20163 {
20164 fprintf (stderr,
20165 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20166 "mode = %s",
20167 reg_class_names[ret],
20168 in_p ? "true" : "false",
20169 reg_class_names[rclass],
20170 GET_MODE_NAME (mode));
20171
20172 if (reload_completed)
20173 fputs (", after reload", stderr);
20174
20175 if (!done_p)
20176 fputs (", done_p not set", stderr);
20177
20178 if (default_p)
20179 fputs (", default secondary reload", stderr);
20180
20181 if (sri->icode != CODE_FOR_nothing)
20182 fprintf (stderr, ", reload func = %s, extra cost = %d",
20183 insn_data[sri->icode].name, sri->extra_cost);
20184
20185 else if (sri->extra_cost > 0)
20186 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20187
20188 fputs ("\n", stderr);
20189 debug_rtx (x);
20190 }
20191
20192 return ret;
20193 }
20194
20195 /* Better tracing for rs6000_secondary_reload_inner. */
20196
20197 static void
20198 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20199 bool store_p)
20200 {
20201 rtx set, clobber;
20202
20203 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20204
20205 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20206 store_p ? "store" : "load");
20207
20208 if (store_p)
20209 set = gen_rtx_SET (mem, reg);
20210 else
20211 set = gen_rtx_SET (reg, mem);
20212
20213 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20214 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20215 }
20216
20217 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20218 ATTRIBUTE_NORETURN;
20219
20220 static void
20221 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20222 bool store_p)
20223 {
20224 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20225 gcc_unreachable ();
20226 }
20227
20228 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20229 reload helper functions. These were identified in
20230 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20231 reload, it calls the insns:
20232 reload_<RELOAD:mode>_<P:mptrsize>_store
20233 reload_<RELOAD:mode>_<P:mptrsize>_load
20234
20235 which in turn calls this function, to do whatever is necessary to create
20236 valid addresses. */
20237
20238 void
20239 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20240 {
20241 int regno = true_regnum (reg);
20242 machine_mode mode = GET_MODE (reg);
20243 addr_mask_type addr_mask;
20244 rtx addr;
20245 rtx new_addr;
20246 rtx op_reg, op0, op1;
20247 rtx and_op;
20248 rtx cc_clobber;
20249 rtvec rv;
20250
20251 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20252 || !base_reg_operand (scratch, GET_MODE (scratch)))
20253 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20254
20255 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20256 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20257
20258 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20259 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20260
20261 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20262 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20263
20264 else
20265 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20266
20267 /* Make sure the mode is valid in this register class. */
20268 if ((addr_mask & RELOAD_REG_VALID) == 0)
20269 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20270
20271 if (TARGET_DEBUG_ADDR)
20272 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20273
20274 new_addr = addr = XEXP (mem, 0);
20275 switch (GET_CODE (addr))
20276 {
20277 /* Does the register class support auto update forms for this mode? If
20278 not, do the update now. We don't need a scratch register, since the
20279 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20280 case PRE_INC:
20281 case PRE_DEC:
20282 op_reg = XEXP (addr, 0);
20283 if (!base_reg_operand (op_reg, Pmode))
20284 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20285
20286 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20287 {
20288 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20289 new_addr = op_reg;
20290 }
20291 break;
20292
20293 case PRE_MODIFY:
20294 op0 = XEXP (addr, 0);
20295 op1 = XEXP (addr, 1);
20296 if (!base_reg_operand (op0, Pmode)
20297 || GET_CODE (op1) != PLUS
20298 || !rtx_equal_p (op0, XEXP (op1, 0)))
20299 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20300
20301 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20302 {
20303 emit_insn (gen_rtx_SET (op0, op1));
20304 new_addr = reg;
20305 }
20306 break;
20307
20308 /* Do we need to simulate AND -16 to clear the bottom address bits used
20309 in VMX load/stores? */
20310 case AND:
20311 op0 = XEXP (addr, 0);
20312 op1 = XEXP (addr, 1);
20313 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20314 {
20315 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20316 op_reg = op0;
20317
20318 else if (GET_CODE (op1) == PLUS)
20319 {
20320 emit_insn (gen_rtx_SET (scratch, op1));
20321 op_reg = scratch;
20322 }
20323
20324 else
20325 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20326
20327 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20328 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20329 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20330 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20331 new_addr = scratch;
20332 }
20333 break;
20334
20335 /* If this is an indirect address, make sure it is a base register. */
20336 case REG:
20337 case SUBREG:
20338 if (!base_reg_operand (addr, GET_MODE (addr)))
20339 {
20340 emit_insn (gen_rtx_SET (scratch, addr));
20341 new_addr = scratch;
20342 }
20343 break;
20344
20345 /* If this is an indexed address, make sure the register class can handle
20346 indexed addresses for this mode. */
20347 case PLUS:
20348 op0 = XEXP (addr, 0);
20349 op1 = XEXP (addr, 1);
20350 if (!base_reg_operand (op0, Pmode))
20351 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20352
20353 else if (int_reg_operand (op1, Pmode))
20354 {
20355 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20356 {
20357 emit_insn (gen_rtx_SET (scratch, addr));
20358 new_addr = scratch;
20359 }
20360 }
20361
20362 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20363 {
20364 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20365 || !quad_address_p (addr, mode, false))
20366 {
20367 emit_insn (gen_rtx_SET (scratch, addr));
20368 new_addr = scratch;
20369 }
20370 }
20371
20372 /* Make sure the register class can handle offset addresses. */
20373 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20374 {
20375 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20376 {
20377 emit_insn (gen_rtx_SET (scratch, addr));
20378 new_addr = scratch;
20379 }
20380 }
20381
20382 else
20383 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20384
20385 break;
20386
20387 case LO_SUM:
20388 op0 = XEXP (addr, 0);
20389 op1 = XEXP (addr, 1);
20390 if (!base_reg_operand (op0, Pmode))
20391 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20392
20393 else if (int_reg_operand (op1, Pmode))
20394 {
20395 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20396 {
20397 emit_insn (gen_rtx_SET (scratch, addr));
20398 new_addr = scratch;
20399 }
20400 }
20401
20402 /* Quad offsets are restricted and can't handle normal addresses. */
20403 else if (mode_supports_vsx_dform_quad (mode))
20404 {
20405 emit_insn (gen_rtx_SET (scratch, addr));
20406 new_addr = scratch;
20407 }
20408
20409 /* Make sure the register class can handle offset addresses. */
20410 else if (legitimate_lo_sum_address_p (mode, addr, false))
20411 {
20412 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20413 {
20414 emit_insn (gen_rtx_SET (scratch, addr));
20415 new_addr = scratch;
20416 }
20417 }
20418
20419 else
20420 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20421
20422 break;
20423
20424 case SYMBOL_REF:
20425 case CONST:
20426 case LABEL_REF:
20427 rs6000_emit_move (scratch, addr, Pmode);
20428 new_addr = scratch;
20429 break;
20430
20431 default:
20432 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20433 }
20434
20435 /* Adjust the address if it changed. */
20436 if (addr != new_addr)
20437 {
20438 mem = replace_equiv_address_nv (mem, new_addr);
20439 if (TARGET_DEBUG_ADDR)
20440 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20441 }
20442
20443 /* Now create the move. */
20444 if (store_p)
20445 emit_insn (gen_rtx_SET (mem, reg));
20446 else
20447 emit_insn (gen_rtx_SET (reg, mem));
20448
20449 return;
20450 }
20451
20452 /* Convert reloads involving 64-bit gprs and misaligned offset
20453 addressing, or multiple 32-bit gprs and offsets that are too large,
20454 to use indirect addressing. */
20455
20456 void
20457 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20458 {
20459 int regno = true_regnum (reg);
20460 enum reg_class rclass;
20461 rtx addr;
20462 rtx scratch_or_premodify = scratch;
20463
20464 if (TARGET_DEBUG_ADDR)
20465 {
20466 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20467 store_p ? "store" : "load");
20468 fprintf (stderr, "reg:\n");
20469 debug_rtx (reg);
20470 fprintf (stderr, "mem:\n");
20471 debug_rtx (mem);
20472 fprintf (stderr, "scratch:\n");
20473 debug_rtx (scratch);
20474 }
20475
20476 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20477 gcc_assert (GET_CODE (mem) == MEM);
20478 rclass = REGNO_REG_CLASS (regno);
20479 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20480 addr = XEXP (mem, 0);
20481
20482 if (GET_CODE (addr) == PRE_MODIFY)
20483 {
20484 gcc_assert (REG_P (XEXP (addr, 0))
20485 && GET_CODE (XEXP (addr, 1)) == PLUS
20486 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20487 scratch_or_premodify = XEXP (addr, 0);
20488 if (!HARD_REGISTER_P (scratch_or_premodify))
20489 /* If we have a pseudo here then reload will have arranged
20490 to have it replaced, but only in the original insn.
20491 Use the replacement here too. */
20492 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20493
20494 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20495 expressions from the original insn, without unsharing them.
20496 Any RTL that points into the original insn will of course
20497 have register replacements applied. That is why we don't
20498 need to look for replacements under the PLUS. */
20499 addr = XEXP (addr, 1);
20500 }
20501 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20502
20503 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20504
20505 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20506
20507 /* Now create the move. */
20508 if (store_p)
20509 emit_insn (gen_rtx_SET (mem, reg));
20510 else
20511 emit_insn (gen_rtx_SET (reg, mem));
20512
20513 return;
20514 }
20515
20516 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
20517 this function has any SDmode references. If we are on a power7 or later, we
20518 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
20519 can load/store the value. */
20520
20521 static void
20522 rs6000_alloc_sdmode_stack_slot (void)
20523 {
20524 tree t;
20525 basic_block bb;
20526 gimple_stmt_iterator gsi;
20527
20528 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
20529 /* We use a different approach for dealing with the secondary
20530 memory in LRA. */
20531 if (ira_use_lra_p)
20532 return;
20533
20534 if (TARGET_NO_SDMODE_STACK)
20535 return;
20536
20537 FOR_EACH_BB_FN (bb, cfun)
20538 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
20539 {
20540 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
20541 if (ret)
20542 {
20543 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20544 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20545 SDmode, 0);
20546 return;
20547 }
20548 }
20549
20550 /* Check for any SDmode parameters of the function. */
20551 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
20552 {
20553 if (TREE_TYPE (t) == error_mark_node)
20554 continue;
20555
20556 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
20557 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
20558 {
20559 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
20560 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
20561 SDmode, 0);
20562 return;
20563 }
20564 }
20565 }
20566
20567 static void
20568 rs6000_instantiate_decls (void)
20569 {
20570 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
20571 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
20572 }
20573
20574 /* Given an rtx X being reloaded into a reg required to be
20575 in class CLASS, return the class of reg to actually use.
20576 In general this is just CLASS; but on some machines
20577 in some cases it is preferable to use a more restrictive class.
20578
20579 On the RS/6000, we have to return NO_REGS when we want to reload a
20580 floating-point CONST_DOUBLE to force it to be copied to memory.
20581
20582 We also don't want to reload integer values into floating-point
20583 registers if we can at all help it. In fact, this can
20584 cause reload to die, if it tries to generate a reload of CTR
20585 into a FP register and discovers it doesn't have the memory location
20586 required.
20587
20588 ??? Would it be a good idea to have reload do the converse, that is
20589 try to reload floating modes into FP registers if possible?
20590 */
20591
20592 static enum reg_class
20593 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20594 {
20595 machine_mode mode = GET_MODE (x);
20596 bool is_constant = CONSTANT_P (x);
20597
20598 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20599 reload class for it. */
20600 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20601 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20602 return NO_REGS;
20603
20604 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20605 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20606 return NO_REGS;
20607
20608 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20609 the reloading of address expressions using PLUS into floating point
20610 registers. */
20611 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20612 {
20613 if (is_constant)
20614 {
20615 /* Zero is always allowed in all VSX registers. */
20616 if (x == CONST0_RTX (mode))
20617 return rclass;
20618
20619 /* If this is a vector constant that can be formed with a few Altivec
20620 instructions, we want altivec registers. */
20621 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20622 return ALTIVEC_REGS;
20623
20624 /* If this is an integer constant that can easily be loaded into
20625 vector registers, allow it. */
20626 if (CONST_INT_P (x))
20627 {
20628 HOST_WIDE_INT value = INTVAL (x);
20629
20630 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20631 2.06 can generate it in the Altivec registers with
20632 VSPLTI<x>. */
20633 if (value == -1)
20634 {
20635 if (TARGET_P8_VECTOR)
20636 return rclass;
20637 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20638 return ALTIVEC_REGS;
20639 else
20640 return NO_REGS;
20641 }
20642
20643 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20644 a sign extend in the Altivec registers. */
20645 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
20646 && TARGET_VSX_SMALL_INTEGER
20647 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
20648 return ALTIVEC_REGS;
20649 }
20650
20651 /* Force constant to memory. */
20652 return NO_REGS;
20653 }
20654
20655 /* D-form addressing can easily reload the value. */
20656 if (mode_supports_vmx_dform (mode)
20657 || mode_supports_vsx_dform_quad (mode))
20658 return rclass;
20659
20660 /* If this is a scalar floating point value and we don't have D-form
20661 addressing, prefer the traditional floating point registers so that we
20662 can use D-form (register+offset) addressing. */
20663 if (rclass == VSX_REGS
20664 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
20665 return FLOAT_REGS;
20666
20667 /* Prefer the Altivec registers if Altivec is handling the vector
20668 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20669 loads. */
20670 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20671 || mode == V1TImode)
20672 return ALTIVEC_REGS;
20673
20674 return rclass;
20675 }
20676
20677 if (is_constant || GET_CODE (x) == PLUS)
20678 {
20679 if (reg_class_subset_p (GENERAL_REGS, rclass))
20680 return GENERAL_REGS;
20681 if (reg_class_subset_p (BASE_REGS, rclass))
20682 return BASE_REGS;
20683 return NO_REGS;
20684 }
20685
20686 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20687 return GENERAL_REGS;
20688
20689 return rclass;
20690 }
20691
20692 /* Debug version of rs6000_preferred_reload_class. */
20693 static enum reg_class
20694 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20695 {
20696 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20697
20698 fprintf (stderr,
20699 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20700 "mode = %s, x:\n",
20701 reg_class_names[ret], reg_class_names[rclass],
20702 GET_MODE_NAME (GET_MODE (x)));
20703 debug_rtx (x);
20704
20705 return ret;
20706 }
20707
20708 /* If we are copying between FP or AltiVec registers and anything else, we need
20709 a memory location. The exception is when we are targeting ppc64 and the
20710 move to/from fpr to gpr instructions are available. Also, under VSX, you
20711 can copy vector registers from the FP register set to the Altivec register
20712 set and vice versa. */
20713
20714 static bool
20715 rs6000_secondary_memory_needed (enum reg_class from_class,
20716 enum reg_class to_class,
20717 machine_mode mode)
20718 {
20719 enum rs6000_reg_type from_type, to_type;
20720 bool altivec_p = ((from_class == ALTIVEC_REGS)
20721 || (to_class == ALTIVEC_REGS));
20722
20723 /* If a simple/direct move is available, we don't need secondary memory */
20724 from_type = reg_class_to_reg_type[(int)from_class];
20725 to_type = reg_class_to_reg_type[(int)to_class];
20726
20727 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20728 (secondary_reload_info *)0, altivec_p))
20729 return false;
20730
20731 /* If we have a floating point or vector register class, we need to use
20732 memory to transfer the data. */
20733 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20734 return true;
20735
20736 return false;
20737 }
20738
20739 /* Debug version of rs6000_secondary_memory_needed. */
20740 static bool
20741 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
20742 enum reg_class to_class,
20743 machine_mode mode)
20744 {
20745 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
20746
20747 fprintf (stderr,
20748 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20749 "to_class = %s, mode = %s\n",
20750 ret ? "true" : "false",
20751 reg_class_names[from_class],
20752 reg_class_names[to_class],
20753 GET_MODE_NAME (mode));
20754
20755 return ret;
20756 }
20757
20758 /* Return the register class of a scratch register needed to copy IN into
20759 or out of a register in RCLASS in MODE. If it can be done directly,
20760 NO_REGS is returned. */
20761
20762 static enum reg_class
20763 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20764 rtx in)
20765 {
20766 int regno;
20767
20768 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20769 #if TARGET_MACHO
20770 && MACHOPIC_INDIRECT
20771 #endif
20772 ))
20773 {
20774 /* We cannot copy a symbolic operand directly into anything
20775 other than BASE_REGS for TARGET_ELF. So indicate that a
20776 register from BASE_REGS is needed as an intermediate
20777 register.
20778
20779 On Darwin, pic addresses require a load from memory, which
20780 needs a base register. */
20781 if (rclass != BASE_REGS
20782 && (GET_CODE (in) == SYMBOL_REF
20783 || GET_CODE (in) == HIGH
20784 || GET_CODE (in) == LABEL_REF
20785 || GET_CODE (in) == CONST))
20786 return BASE_REGS;
20787 }
20788
20789 if (GET_CODE (in) == REG)
20790 {
20791 regno = REGNO (in);
20792 if (regno >= FIRST_PSEUDO_REGISTER)
20793 {
20794 regno = true_regnum (in);
20795 if (regno >= FIRST_PSEUDO_REGISTER)
20796 regno = -1;
20797 }
20798 }
20799 else if (GET_CODE (in) == SUBREG)
20800 {
20801 regno = true_regnum (in);
20802 if (regno >= FIRST_PSEUDO_REGISTER)
20803 regno = -1;
20804 }
20805 else
20806 regno = -1;
20807
20808 /* If we have VSX register moves, prefer moving scalar values between
20809 Altivec registers and GPR by going via an FPR (and then via memory)
20810 instead of reloading the secondary memory address for Altivec moves. */
20811 if (TARGET_VSX
20812 && GET_MODE_SIZE (mode) < 16
20813 && !mode_supports_vmx_dform (mode)
20814 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20815 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20816 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20817 && (regno >= 0 && INT_REGNO_P (regno)))))
20818 return FLOAT_REGS;
20819
20820 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20821 into anything. */
20822 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20823 || (regno >= 0 && INT_REGNO_P (regno)))
20824 return NO_REGS;
20825
20826 /* Constants, memory, and VSX registers can go into VSX registers (both the
20827 traditional floating point and the altivec registers). */
20828 if (rclass == VSX_REGS
20829 && (regno == -1 || VSX_REGNO_P (regno)))
20830 return NO_REGS;
20831
20832 /* Constants, memory, and FP registers can go into FP registers. */
20833 if ((regno == -1 || FP_REGNO_P (regno))
20834 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20835 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20836
20837 /* Memory, and AltiVec registers can go into AltiVec registers. */
20838 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20839 && rclass == ALTIVEC_REGS)
20840 return NO_REGS;
20841
20842 /* We can copy among the CR registers. */
20843 if ((rclass == CR_REGS || rclass == CR0_REGS)
20844 && regno >= 0 && CR_REGNO_P (regno))
20845 return NO_REGS;
20846
20847 /* Otherwise, we need GENERAL_REGS. */
20848 return GENERAL_REGS;
20849 }
20850
20851 /* Debug version of rs6000_secondary_reload_class. */
20852 static enum reg_class
20853 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20854 machine_mode mode, rtx in)
20855 {
20856 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20857 fprintf (stderr,
20858 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20859 "mode = %s, input rtx:\n",
20860 reg_class_names[ret], reg_class_names[rclass],
20861 GET_MODE_NAME (mode));
20862 debug_rtx (in);
20863
20864 return ret;
20865 }
20866
20867 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
20868
20869 static bool
20870 rs6000_cannot_change_mode_class (machine_mode from,
20871 machine_mode to,
20872 enum reg_class rclass)
20873 {
20874 unsigned from_size = GET_MODE_SIZE (from);
20875 unsigned to_size = GET_MODE_SIZE (to);
20876
20877 if (from_size != to_size)
20878 {
20879 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20880
20881 if (reg_classes_intersect_p (xclass, rclass))
20882 {
20883 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
20884 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
20885 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20886 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20887
20888 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20889 single register under VSX because the scalar part of the register
20890 is in the upper 64-bits, and not the lower 64-bits. Types like
20891 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20892 IEEE floating point can't overlap, and neither can small
20893 values. */
20894
20895 if (to_float128_vector_p && from_float128_vector_p)
20896 return false;
20897
20898 else if (to_float128_vector_p || from_float128_vector_p)
20899 return true;
20900
20901 /* TDmode in floating-mode registers must always go into a register
20902 pair with the most significant word in the even-numbered register
20903 to match ISA requirements. In little-endian mode, this does not
20904 match subreg numbering, so we cannot allow subregs. */
20905 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20906 return true;
20907
20908 if (from_size < 8 || to_size < 8)
20909 return true;
20910
20911 if (from_size == 8 && (8 * to_nregs) != to_size)
20912 return true;
20913
20914 if (to_size == 8 && (8 * from_nregs) != from_size)
20915 return true;
20916
20917 return false;
20918 }
20919 else
20920 return false;
20921 }
20922
20923 /* Since the VSX register set includes traditional floating point registers
20924 and altivec registers, just check for the size being different instead of
20925 trying to check whether the modes are vector modes. Otherwise it won't
20926 allow say DF and DI to change classes. For types like TFmode and TDmode
20927 that take 2 64-bit registers, rather than a single 128-bit register, don't
20928 allow subregs of those types to other 128 bit types. */
20929 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20930 {
20931 unsigned num_regs = (from_size + 15) / 16;
20932 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
20933 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
20934 return true;
20935
20936 return (from_size != 8 && from_size != 16);
20937 }
20938
20939 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20940 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20941 return true;
20942
20943 return false;
20944 }
20945
20946 /* Debug version of rs6000_cannot_change_mode_class. */
20947 static bool
20948 rs6000_debug_cannot_change_mode_class (machine_mode from,
20949 machine_mode to,
20950 enum reg_class rclass)
20951 {
20952 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
20953
20954 fprintf (stderr,
20955 "rs6000_cannot_change_mode_class, return %s, from = %s, "
20956 "to = %s, rclass = %s\n",
20957 ret ? "true" : "false",
20958 GET_MODE_NAME (from), GET_MODE_NAME (to),
20959 reg_class_names[rclass]);
20960
20961 return ret;
20962 }
20963 \f
20964 /* Return a string to do a move operation of 128 bits of data. */
20965
20966 const char *
20967 rs6000_output_move_128bit (rtx operands[])
20968 {
20969 rtx dest = operands[0];
20970 rtx src = operands[1];
20971 machine_mode mode = GET_MODE (dest);
20972 int dest_regno;
20973 int src_regno;
20974 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20975 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20976
20977 if (REG_P (dest))
20978 {
20979 dest_regno = REGNO (dest);
20980 dest_gpr_p = INT_REGNO_P (dest_regno);
20981 dest_fp_p = FP_REGNO_P (dest_regno);
20982 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20983 dest_vsx_p = dest_fp_p | dest_vmx_p;
20984 }
20985 else
20986 {
20987 dest_regno = -1;
20988 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20989 }
20990
20991 if (REG_P (src))
20992 {
20993 src_regno = REGNO (src);
20994 src_gpr_p = INT_REGNO_P (src_regno);
20995 src_fp_p = FP_REGNO_P (src_regno);
20996 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20997 src_vsx_p = src_fp_p | src_vmx_p;
20998 }
20999 else
21000 {
21001 src_regno = -1;
21002 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
21003 }
21004
21005 /* Register moves. */
21006 if (dest_regno >= 0 && src_regno >= 0)
21007 {
21008 if (dest_gpr_p)
21009 {
21010 if (src_gpr_p)
21011 return "#";
21012
21013 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
21014 return (WORDS_BIG_ENDIAN
21015 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
21016 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
21017
21018 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
21019 return "#";
21020 }
21021
21022 else if (TARGET_VSX && dest_vsx_p)
21023 {
21024 if (src_vsx_p)
21025 return "xxlor %x0,%x1,%x1";
21026
21027 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
21028 return (WORDS_BIG_ENDIAN
21029 ? "mtvsrdd %x0,%1,%L1"
21030 : "mtvsrdd %x0,%L1,%1");
21031
21032 else if (TARGET_DIRECT_MOVE && src_gpr_p)
21033 return "#";
21034 }
21035
21036 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
21037 return "vor %0,%1,%1";
21038
21039 else if (dest_fp_p && src_fp_p)
21040 return "#";
21041 }
21042
21043 /* Loads. */
21044 else if (dest_regno >= 0 && MEM_P (src))
21045 {
21046 if (dest_gpr_p)
21047 {
21048 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21049 return "lq %0,%1";
21050 else
21051 return "#";
21052 }
21053
21054 else if (TARGET_ALTIVEC && dest_vmx_p
21055 && altivec_indexed_or_indirect_operand (src, mode))
21056 return "lvx %0,%y1";
21057
21058 else if (TARGET_VSX && dest_vsx_p)
21059 {
21060 if (mode_supports_vsx_dform_quad (mode)
21061 && quad_address_p (XEXP (src, 0), mode, true))
21062 return "lxv %x0,%1";
21063
21064 else if (TARGET_P9_VECTOR)
21065 return "lxvx %x0,%y1";
21066
21067 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21068 return "lxvw4x %x0,%y1";
21069
21070 else
21071 return "lxvd2x %x0,%y1";
21072 }
21073
21074 else if (TARGET_ALTIVEC && dest_vmx_p)
21075 return "lvx %0,%y1";
21076
21077 else if (dest_fp_p)
21078 return "#";
21079 }
21080
21081 /* Stores. */
21082 else if (src_regno >= 0 && MEM_P (dest))
21083 {
21084 if (src_gpr_p)
21085 {
21086 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
21087 return "stq %1,%0";
21088 else
21089 return "#";
21090 }
21091
21092 else if (TARGET_ALTIVEC && src_vmx_p
21093 && altivec_indexed_or_indirect_operand (src, mode))
21094 return "stvx %1,%y0";
21095
21096 else if (TARGET_VSX && src_vsx_p)
21097 {
21098 if (mode_supports_vsx_dform_quad (mode)
21099 && quad_address_p (XEXP (dest, 0), mode, true))
21100 return "stxv %x1,%0";
21101
21102 else if (TARGET_P9_VECTOR)
21103 return "stxvx %x1,%y0";
21104
21105 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
21106 return "stxvw4x %x1,%y0";
21107
21108 else
21109 return "stxvd2x %x1,%y0";
21110 }
21111
21112 else if (TARGET_ALTIVEC && src_vmx_p)
21113 return "stvx %1,%y0";
21114
21115 else if (src_fp_p)
21116 return "#";
21117 }
21118
21119 /* Constants. */
21120 else if (dest_regno >= 0
21121 && (GET_CODE (src) == CONST_INT
21122 || GET_CODE (src) == CONST_WIDE_INT
21123 || GET_CODE (src) == CONST_DOUBLE
21124 || GET_CODE (src) == CONST_VECTOR))
21125 {
21126 if (dest_gpr_p)
21127 return "#";
21128
21129 else if ((dest_vmx_p && TARGET_ALTIVEC)
21130 || (dest_vsx_p && TARGET_VSX))
21131 return output_vec_const_move (operands);
21132 }
21133
21134 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21135 }
21136
21137 /* Validate a 128-bit move. */
21138 bool
21139 rs6000_move_128bit_ok_p (rtx operands[])
21140 {
21141 machine_mode mode = GET_MODE (operands[0]);
21142 return (gpc_reg_operand (operands[0], mode)
21143 || gpc_reg_operand (operands[1], mode));
21144 }
21145
21146 /* Return true if a 128-bit move needs to be split. */
21147 bool
21148 rs6000_split_128bit_ok_p (rtx operands[])
21149 {
21150 if (!reload_completed)
21151 return false;
21152
21153 if (!gpr_or_gpr_p (operands[0], operands[1]))
21154 return false;
21155
21156 if (quad_load_store_p (operands[0], operands[1]))
21157 return false;
21158
21159 return true;
21160 }
21161
21162 \f
21163 /* Given a comparison operation, return the bit number in CCR to test. We
21164 know this is a valid comparison.
21165
21166 SCC_P is 1 if this is for an scc. That means that %D will have been
21167 used instead of %C, so the bits will be in different places.
21168
21169 Return -1 if OP isn't a valid comparison for some reason. */
21170
21171 int
21172 ccr_bit (rtx op, int scc_p)
21173 {
21174 enum rtx_code code = GET_CODE (op);
21175 machine_mode cc_mode;
21176 int cc_regnum;
21177 int base_bit;
21178 rtx reg;
21179
21180 if (!COMPARISON_P (op))
21181 return -1;
21182
21183 reg = XEXP (op, 0);
21184
21185 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21186
21187 cc_mode = GET_MODE (reg);
21188 cc_regnum = REGNO (reg);
21189 base_bit = 4 * (cc_regnum - CR0_REGNO);
21190
21191 validate_condition_mode (code, cc_mode);
21192
21193 /* When generating a sCOND operation, only positive conditions are
21194 allowed. */
21195 gcc_assert (!scc_p
21196 || code == EQ || code == GT || code == LT || code == UNORDERED
21197 || code == GTU || code == LTU);
21198
21199 switch (code)
21200 {
21201 case NE:
21202 return scc_p ? base_bit + 3 : base_bit + 2;
21203 case EQ:
21204 return base_bit + 2;
21205 case GT: case GTU: case UNLE:
21206 return base_bit + 1;
21207 case LT: case LTU: case UNGE:
21208 return base_bit;
21209 case ORDERED: case UNORDERED:
21210 return base_bit + 3;
21211
21212 case GE: case GEU:
21213 /* If scc, we will have done a cror to put the bit in the
21214 unordered position. So test that bit. For integer, this is ! LT
21215 unless this is an scc insn. */
21216 return scc_p ? base_bit + 3 : base_bit;
21217
21218 case LE: case LEU:
21219 return scc_p ? base_bit + 3 : base_bit + 1;
21220
21221 default:
21222 gcc_unreachable ();
21223 }
21224 }
21225 \f
21226 /* Return the GOT register. */
21227
21228 rtx
21229 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21230 {
21231 /* The second flow pass currently (June 1999) can't update
21232 regs_ever_live without disturbing other parts of the compiler, so
21233 update it here to make the prolog/epilogue code happy. */
21234 if (!can_create_pseudo_p ()
21235 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21236 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21237
21238 crtl->uses_pic_offset_table = 1;
21239
21240 return pic_offset_table_rtx;
21241 }
21242 \f
21243 static rs6000_stack_t stack_info;
21244
21245 /* Function to init struct machine_function.
21246 This will be called, via a pointer variable,
21247 from push_function_context. */
21248
21249 static struct machine_function *
21250 rs6000_init_machine_status (void)
21251 {
21252 stack_info.reload_completed = 0;
21253 return ggc_cleared_alloc<machine_function> ();
21254 }
21255 \f
21256 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21257
21258 /* Write out a function code label. */
21259
21260 void
21261 rs6000_output_function_entry (FILE *file, const char *fname)
21262 {
21263 if (fname[0] != '.')
21264 {
21265 switch (DEFAULT_ABI)
21266 {
21267 default:
21268 gcc_unreachable ();
21269
21270 case ABI_AIX:
21271 if (DOT_SYMBOLS)
21272 putc ('.', file);
21273 else
21274 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21275 break;
21276
21277 case ABI_ELFv2:
21278 case ABI_V4:
21279 case ABI_DARWIN:
21280 break;
21281 }
21282 }
21283
21284 RS6000_OUTPUT_BASENAME (file, fname);
21285 }
21286
21287 /* Print an operand. Recognize special options, documented below. */
21288
21289 #if TARGET_ELF
21290 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21291 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21292 #else
21293 #define SMALL_DATA_RELOC "sda21"
21294 #define SMALL_DATA_REG 0
21295 #endif
21296
21297 void
21298 print_operand (FILE *file, rtx x, int code)
21299 {
21300 int i;
21301 unsigned HOST_WIDE_INT uval;
21302
21303 switch (code)
21304 {
21305 /* %a is output_address. */
21306
21307 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21308 output_operand. */
21309
21310 case 'D':
21311 /* Like 'J' but get to the GT bit only. */
21312 gcc_assert (REG_P (x));
21313
21314 /* Bit 1 is GT bit. */
21315 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21316
21317 /* Add one for shift count in rlinm for scc. */
21318 fprintf (file, "%d", i + 1);
21319 return;
21320
21321 case 'e':
21322 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21323 if (! INT_P (x))
21324 {
21325 output_operand_lossage ("invalid %%e value");
21326 return;
21327 }
21328
21329 uval = INTVAL (x);
21330 if ((uval & 0xffff) == 0 && uval != 0)
21331 putc ('s', file);
21332 return;
21333
21334 case 'E':
21335 /* X is a CR register. Print the number of the EQ bit of the CR */
21336 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21337 output_operand_lossage ("invalid %%E value");
21338 else
21339 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21340 return;
21341
21342 case 'f':
21343 /* X is a CR register. Print the shift count needed to move it
21344 to the high-order four bits. */
21345 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21346 output_operand_lossage ("invalid %%f value");
21347 else
21348 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21349 return;
21350
21351 case 'F':
21352 /* Similar, but print the count for the rotate in the opposite
21353 direction. */
21354 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21355 output_operand_lossage ("invalid %%F value");
21356 else
21357 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21358 return;
21359
21360 case 'G':
21361 /* X is a constant integer. If it is negative, print "m",
21362 otherwise print "z". This is to make an aze or ame insn. */
21363 if (GET_CODE (x) != CONST_INT)
21364 output_operand_lossage ("invalid %%G value");
21365 else if (INTVAL (x) >= 0)
21366 putc ('z', file);
21367 else
21368 putc ('m', file);
21369 return;
21370
21371 case 'h':
21372 /* If constant, output low-order five bits. Otherwise, write
21373 normally. */
21374 if (INT_P (x))
21375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21376 else
21377 print_operand (file, x, 0);
21378 return;
21379
21380 case 'H':
21381 /* If constant, output low-order six bits. Otherwise, write
21382 normally. */
21383 if (INT_P (x))
21384 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21385 else
21386 print_operand (file, x, 0);
21387 return;
21388
21389 case 'I':
21390 /* Print `i' if this is a constant, else nothing. */
21391 if (INT_P (x))
21392 putc ('i', file);
21393 return;
21394
21395 case 'j':
21396 /* Write the bit number in CCR for jump. */
21397 i = ccr_bit (x, 0);
21398 if (i == -1)
21399 output_operand_lossage ("invalid %%j code");
21400 else
21401 fprintf (file, "%d", i);
21402 return;
21403
21404 case 'J':
21405 /* Similar, but add one for shift count in rlinm for scc and pass
21406 scc flag to `ccr_bit'. */
21407 i = ccr_bit (x, 1);
21408 if (i == -1)
21409 output_operand_lossage ("invalid %%J code");
21410 else
21411 /* If we want bit 31, write a shift count of zero, not 32. */
21412 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21413 return;
21414
21415 case 'k':
21416 /* X must be a constant. Write the 1's complement of the
21417 constant. */
21418 if (! INT_P (x))
21419 output_operand_lossage ("invalid %%k value");
21420 else
21421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21422 return;
21423
21424 case 'K':
21425 /* X must be a symbolic constant on ELF. Write an
21426 expression suitable for an 'addi' that adds in the low 16
21427 bits of the MEM. */
21428 if (GET_CODE (x) == CONST)
21429 {
21430 if (GET_CODE (XEXP (x, 0)) != PLUS
21431 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21432 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21433 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21434 output_operand_lossage ("invalid %%K value");
21435 }
21436 print_operand_address (file, x);
21437 fputs ("@l", file);
21438 return;
21439
21440 /* %l is output_asm_label. */
21441
21442 case 'L':
21443 /* Write second word of DImode or DFmode reference. Works on register
21444 or non-indexed memory only. */
21445 if (REG_P (x))
21446 fputs (reg_names[REGNO (x) + 1], file);
21447 else if (MEM_P (x))
21448 {
21449 machine_mode mode = GET_MODE (x);
21450 /* Handle possible auto-increment. Since it is pre-increment and
21451 we have already done it, we can just use an offset of word. */
21452 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21453 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21454 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21455 UNITS_PER_WORD));
21456 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21457 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21458 UNITS_PER_WORD));
21459 else
21460 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21461 UNITS_PER_WORD),
21462 0));
21463
21464 if (small_data_operand (x, GET_MODE (x)))
21465 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21466 reg_names[SMALL_DATA_REG]);
21467 }
21468 return;
21469
21470 case 'N':
21471 /* Write the number of elements in the vector times 4. */
21472 if (GET_CODE (x) != PARALLEL)
21473 output_operand_lossage ("invalid %%N value");
21474 else
21475 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21476 return;
21477
21478 case 'O':
21479 /* Similar, but subtract 1 first. */
21480 if (GET_CODE (x) != PARALLEL)
21481 output_operand_lossage ("invalid %%O value");
21482 else
21483 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21484 return;
21485
21486 case 'p':
21487 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21488 if (! INT_P (x)
21489 || INTVAL (x) < 0
21490 || (i = exact_log2 (INTVAL (x))) < 0)
21491 output_operand_lossage ("invalid %%p value");
21492 else
21493 fprintf (file, "%d", i);
21494 return;
21495
21496 case 'P':
21497 /* The operand must be an indirect memory reference. The result
21498 is the register name. */
21499 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21500 || REGNO (XEXP (x, 0)) >= 32)
21501 output_operand_lossage ("invalid %%P value");
21502 else
21503 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21504 return;
21505
21506 case 'q':
21507 /* This outputs the logical code corresponding to a boolean
21508 expression. The expression may have one or both operands
21509 negated (if one, only the first one). For condition register
21510 logical operations, it will also treat the negated
21511 CR codes as NOTs, but not handle NOTs of them. */
21512 {
21513 const char *const *t = 0;
21514 const char *s;
21515 enum rtx_code code = GET_CODE (x);
21516 static const char * const tbl[3][3] = {
21517 { "and", "andc", "nor" },
21518 { "or", "orc", "nand" },
21519 { "xor", "eqv", "xor" } };
21520
21521 if (code == AND)
21522 t = tbl[0];
21523 else if (code == IOR)
21524 t = tbl[1];
21525 else if (code == XOR)
21526 t = tbl[2];
21527 else
21528 output_operand_lossage ("invalid %%q value");
21529
21530 if (GET_CODE (XEXP (x, 0)) != NOT)
21531 s = t[0];
21532 else
21533 {
21534 if (GET_CODE (XEXP (x, 1)) == NOT)
21535 s = t[2];
21536 else
21537 s = t[1];
21538 }
21539
21540 fputs (s, file);
21541 }
21542 return;
21543
21544 case 'Q':
21545 if (! TARGET_MFCRF)
21546 return;
21547 fputc (',', file);
21548 /* FALLTHRU */
21549
21550 case 'R':
21551 /* X is a CR register. Print the mask for `mtcrf'. */
21552 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21553 output_operand_lossage ("invalid %%R value");
21554 else
21555 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21556 return;
21557
21558 case 's':
21559 /* Low 5 bits of 32 - value */
21560 if (! INT_P (x))
21561 output_operand_lossage ("invalid %%s value");
21562 else
21563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21564 return;
21565
21566 case 't':
21567 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21568 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21569
21570 /* Bit 3 is OV bit. */
21571 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21572
21573 /* If we want bit 31, write a shift count of zero, not 32. */
21574 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21575 return;
21576
21577 case 'T':
21578 /* Print the symbolic name of a branch target register. */
21579 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21580 && REGNO (x) != CTR_REGNO))
21581 output_operand_lossage ("invalid %%T value");
21582 else if (REGNO (x) == LR_REGNO)
21583 fputs ("lr", file);
21584 else
21585 fputs ("ctr", file);
21586 return;
21587
21588 case 'u':
21589 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21590 for use in unsigned operand. */
21591 if (! INT_P (x))
21592 {
21593 output_operand_lossage ("invalid %%u value");
21594 return;
21595 }
21596
21597 uval = INTVAL (x);
21598 if ((uval & 0xffff) == 0)
21599 uval >>= 16;
21600
21601 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21602 return;
21603
21604 case 'v':
21605 /* High-order 16 bits of constant for use in signed operand. */
21606 if (! INT_P (x))
21607 output_operand_lossage ("invalid %%v value");
21608 else
21609 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21610 (INTVAL (x) >> 16) & 0xffff);
21611 return;
21612
21613 case 'U':
21614 /* Print `u' if this has an auto-increment or auto-decrement. */
21615 if (MEM_P (x)
21616 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21617 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21618 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21619 putc ('u', file);
21620 return;
21621
21622 case 'V':
21623 /* Print the trap code for this operand. */
21624 switch (GET_CODE (x))
21625 {
21626 case EQ:
21627 fputs ("eq", file); /* 4 */
21628 break;
21629 case NE:
21630 fputs ("ne", file); /* 24 */
21631 break;
21632 case LT:
21633 fputs ("lt", file); /* 16 */
21634 break;
21635 case LE:
21636 fputs ("le", file); /* 20 */
21637 break;
21638 case GT:
21639 fputs ("gt", file); /* 8 */
21640 break;
21641 case GE:
21642 fputs ("ge", file); /* 12 */
21643 break;
21644 case LTU:
21645 fputs ("llt", file); /* 2 */
21646 break;
21647 case LEU:
21648 fputs ("lle", file); /* 6 */
21649 break;
21650 case GTU:
21651 fputs ("lgt", file); /* 1 */
21652 break;
21653 case GEU:
21654 fputs ("lge", file); /* 5 */
21655 break;
21656 default:
21657 gcc_unreachable ();
21658 }
21659 break;
21660
21661 case 'w':
21662 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21663 normally. */
21664 if (INT_P (x))
21665 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21666 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21667 else
21668 print_operand (file, x, 0);
21669 return;
21670
21671 case 'x':
21672 /* X is a FPR or Altivec register used in a VSX context. */
21673 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21674 output_operand_lossage ("invalid %%x value");
21675 else
21676 {
21677 int reg = REGNO (x);
21678 int vsx_reg = (FP_REGNO_P (reg)
21679 ? reg - 32
21680 : reg - FIRST_ALTIVEC_REGNO + 32);
21681
21682 #ifdef TARGET_REGNAMES
21683 if (TARGET_REGNAMES)
21684 fprintf (file, "%%vs%d", vsx_reg);
21685 else
21686 #endif
21687 fprintf (file, "%d", vsx_reg);
21688 }
21689 return;
21690
21691 case 'X':
21692 if (MEM_P (x)
21693 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21694 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21695 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21696 putc ('x', file);
21697 return;
21698
21699 case 'Y':
21700 /* Like 'L', for third word of TImode/PTImode */
21701 if (REG_P (x))
21702 fputs (reg_names[REGNO (x) + 2], file);
21703 else if (MEM_P (x))
21704 {
21705 machine_mode mode = GET_MODE (x);
21706 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21707 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21708 output_address (mode, plus_constant (Pmode,
21709 XEXP (XEXP (x, 0), 0), 8));
21710 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21711 output_address (mode, plus_constant (Pmode,
21712 XEXP (XEXP (x, 0), 0), 8));
21713 else
21714 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21715 if (small_data_operand (x, GET_MODE (x)))
21716 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21717 reg_names[SMALL_DATA_REG]);
21718 }
21719 return;
21720
21721 case 'z':
21722 /* X is a SYMBOL_REF. Write out the name preceded by a
21723 period and without any trailing data in brackets. Used for function
21724 names. If we are configured for System V (or the embedded ABI) on
21725 the PowerPC, do not emit the period, since those systems do not use
21726 TOCs and the like. */
21727 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21728
21729 /* For macho, check to see if we need a stub. */
21730 if (TARGET_MACHO)
21731 {
21732 const char *name = XSTR (x, 0);
21733 #if TARGET_MACHO
21734 if (darwin_emit_branch_islands
21735 && MACHOPIC_INDIRECT
21736 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21737 name = machopic_indirection_name (x, /*stub_p=*/true);
21738 #endif
21739 assemble_name (file, name);
21740 }
21741 else if (!DOT_SYMBOLS)
21742 assemble_name (file, XSTR (x, 0));
21743 else
21744 rs6000_output_function_entry (file, XSTR (x, 0));
21745 return;
21746
21747 case 'Z':
21748 /* Like 'L', for last word of TImode/PTImode. */
21749 if (REG_P (x))
21750 fputs (reg_names[REGNO (x) + 3], file);
21751 else if (MEM_P (x))
21752 {
21753 machine_mode mode = GET_MODE (x);
21754 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21755 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21756 output_address (mode, plus_constant (Pmode,
21757 XEXP (XEXP (x, 0), 0), 12));
21758 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21759 output_address (mode, plus_constant (Pmode,
21760 XEXP (XEXP (x, 0), 0), 12));
21761 else
21762 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21763 if (small_data_operand (x, GET_MODE (x)))
21764 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21765 reg_names[SMALL_DATA_REG]);
21766 }
21767 return;
21768
21769 /* Print AltiVec memory operand. */
21770 case 'y':
21771 {
21772 rtx tmp;
21773
21774 gcc_assert (MEM_P (x));
21775
21776 tmp = XEXP (x, 0);
21777
21778 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
21779 && GET_CODE (tmp) == AND
21780 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21781 && INTVAL (XEXP (tmp, 1)) == -16)
21782 tmp = XEXP (tmp, 0);
21783 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21784 && GET_CODE (tmp) == PRE_MODIFY)
21785 tmp = XEXP (tmp, 1);
21786 if (REG_P (tmp))
21787 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21788 else
21789 {
21790 if (GET_CODE (tmp) != PLUS
21791 || !REG_P (XEXP (tmp, 0))
21792 || !REG_P (XEXP (tmp, 1)))
21793 {
21794 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21795 break;
21796 }
21797
21798 if (REGNO (XEXP (tmp, 0)) == 0)
21799 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21800 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21801 else
21802 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21803 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21804 }
21805 break;
21806 }
21807
21808 case 0:
21809 if (REG_P (x))
21810 fprintf (file, "%s", reg_names[REGNO (x)]);
21811 else if (MEM_P (x))
21812 {
21813 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21814 know the width from the mode. */
21815 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21816 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21817 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21818 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21819 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21820 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21821 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21822 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21823 else
21824 output_address (GET_MODE (x), XEXP (x, 0));
21825 }
21826 else
21827 {
21828 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21829 /* This hack along with a corresponding hack in
21830 rs6000_output_addr_const_extra arranges to output addends
21831 where the assembler expects to find them. eg.
21832 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21833 without this hack would be output as "x@toc+4". We
21834 want "x+4@toc". */
21835 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21836 else
21837 output_addr_const (file, x);
21838 }
21839 return;
21840
21841 case '&':
21842 if (const char *name = get_some_local_dynamic_name ())
21843 assemble_name (file, name);
21844 else
21845 output_operand_lossage ("'%%&' used without any "
21846 "local dynamic TLS references");
21847 return;
21848
21849 default:
21850 output_operand_lossage ("invalid %%xn code");
21851 }
21852 }
21853 \f
21854 /* Print the address of an operand. */
21855
21856 void
21857 print_operand_address (FILE *file, rtx x)
21858 {
21859 if (REG_P (x))
21860 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21861 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21862 || GET_CODE (x) == LABEL_REF)
21863 {
21864 output_addr_const (file, x);
21865 if (small_data_operand (x, GET_MODE (x)))
21866 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21867 reg_names[SMALL_DATA_REG]);
21868 else
21869 gcc_assert (!TARGET_TOC);
21870 }
21871 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21872 && REG_P (XEXP (x, 1)))
21873 {
21874 if (REGNO (XEXP (x, 0)) == 0)
21875 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21876 reg_names[ REGNO (XEXP (x, 0)) ]);
21877 else
21878 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21879 reg_names[ REGNO (XEXP (x, 1)) ]);
21880 }
21881 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21882 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21883 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21884 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21885 #if TARGET_MACHO
21886 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21887 && CONSTANT_P (XEXP (x, 1)))
21888 {
21889 fprintf (file, "lo16(");
21890 output_addr_const (file, XEXP (x, 1));
21891 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21892 }
21893 #endif
21894 #if TARGET_ELF
21895 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21896 && CONSTANT_P (XEXP (x, 1)))
21897 {
21898 output_addr_const (file, XEXP (x, 1));
21899 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21900 }
21901 #endif
21902 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21903 {
21904 /* This hack along with a corresponding hack in
21905 rs6000_output_addr_const_extra arranges to output addends
21906 where the assembler expects to find them. eg.
21907 (lo_sum (reg 9)
21908 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21909 without this hack would be output as "x@toc+8@l(9)". We
21910 want "x+8@toc@l(9)". */
21911 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21912 if (GET_CODE (x) == LO_SUM)
21913 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21914 else
21915 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21916 }
21917 else
21918 gcc_unreachable ();
21919 }
21920 \f
21921 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21922
21923 static bool
21924 rs6000_output_addr_const_extra (FILE *file, rtx x)
21925 {
21926 if (GET_CODE (x) == UNSPEC)
21927 switch (XINT (x, 1))
21928 {
21929 case UNSPEC_TOCREL:
21930 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21931 && REG_P (XVECEXP (x, 0, 1))
21932 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21933 output_addr_const (file, XVECEXP (x, 0, 0));
21934 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21935 {
21936 if (INTVAL (tocrel_offset_oac) >= 0)
21937 fprintf (file, "+");
21938 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21939 }
21940 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21941 {
21942 putc ('-', file);
21943 assemble_name (file, toc_label_name);
21944 need_toc_init = 1;
21945 }
21946 else if (TARGET_ELF)
21947 fputs ("@toc", file);
21948 return true;
21949
21950 #if TARGET_MACHO
21951 case UNSPEC_MACHOPIC_OFFSET:
21952 output_addr_const (file, XVECEXP (x, 0, 0));
21953 putc ('-', file);
21954 machopic_output_function_base_name (file);
21955 return true;
21956 #endif
21957 }
21958 return false;
21959 }
21960 \f
21961 /* Target hook for assembling integer objects. The PowerPC version has
21962 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21963 is defined. It also needs to handle DI-mode objects on 64-bit
21964 targets. */
21965
21966 static bool
21967 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21968 {
21969 #ifdef RELOCATABLE_NEEDS_FIXUP
21970 /* Special handling for SI values. */
21971 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21972 {
21973 static int recurse = 0;
21974
21975 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21976 the .fixup section. Since the TOC section is already relocated, we
21977 don't need to mark it here. We used to skip the text section, but it
21978 should never be valid for relocated addresses to be placed in the text
21979 section. */
21980 if (DEFAULT_ABI == ABI_V4
21981 && (TARGET_RELOCATABLE || flag_pic > 1)
21982 && in_section != toc_section
21983 && !recurse
21984 && !CONST_SCALAR_INT_P (x)
21985 && CONSTANT_P (x))
21986 {
21987 char buf[256];
21988
21989 recurse = 1;
21990 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21991 fixuplabelno++;
21992 ASM_OUTPUT_LABEL (asm_out_file, buf);
21993 fprintf (asm_out_file, "\t.long\t(");
21994 output_addr_const (asm_out_file, x);
21995 fprintf (asm_out_file, ")@fixup\n");
21996 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21997 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21998 fprintf (asm_out_file, "\t.long\t");
21999 assemble_name (asm_out_file, buf);
22000 fprintf (asm_out_file, "\n\t.previous\n");
22001 recurse = 0;
22002 return true;
22003 }
22004 /* Remove initial .'s to turn a -mcall-aixdesc function
22005 address into the address of the descriptor, not the function
22006 itself. */
22007 else if (GET_CODE (x) == SYMBOL_REF
22008 && XSTR (x, 0)[0] == '.'
22009 && DEFAULT_ABI == ABI_AIX)
22010 {
22011 const char *name = XSTR (x, 0);
22012 while (*name == '.')
22013 name++;
22014
22015 fprintf (asm_out_file, "\t.long\t%s\n", name);
22016 return true;
22017 }
22018 }
22019 #endif /* RELOCATABLE_NEEDS_FIXUP */
22020 return default_assemble_integer (x, size, aligned_p);
22021 }
22022
22023 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
22024 /* Emit an assembler directive to set symbol visibility for DECL to
22025 VISIBILITY_TYPE. */
22026
22027 static void
22028 rs6000_assemble_visibility (tree decl, int vis)
22029 {
22030 if (TARGET_XCOFF)
22031 return;
22032
22033 /* Functions need to have their entry point symbol visibility set as
22034 well as their descriptor symbol visibility. */
22035 if (DEFAULT_ABI == ABI_AIX
22036 && DOT_SYMBOLS
22037 && TREE_CODE (decl) == FUNCTION_DECL)
22038 {
22039 static const char * const visibility_types[] = {
22040 NULL, "protected", "hidden", "internal"
22041 };
22042
22043 const char *name, *type;
22044
22045 name = ((* targetm.strip_name_encoding)
22046 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
22047 type = visibility_types[vis];
22048
22049 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
22050 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
22051 }
22052 else
22053 default_assemble_visibility (decl, vis);
22054 }
22055 #endif
22056 \f
22057 enum rtx_code
22058 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
22059 {
22060 /* Reversal of FP compares takes care -- an ordered compare
22061 becomes an unordered compare and vice versa. */
22062 if (mode == CCFPmode
22063 && (!flag_finite_math_only
22064 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
22065 || code == UNEQ || code == LTGT))
22066 return reverse_condition_maybe_unordered (code);
22067 else
22068 return reverse_condition (code);
22069 }
22070
22071 /* Generate a compare for CODE. Return a brand-new rtx that
22072 represents the result of the compare. */
22073
22074 static rtx
22075 rs6000_generate_compare (rtx cmp, machine_mode mode)
22076 {
22077 machine_mode comp_mode;
22078 rtx compare_result;
22079 enum rtx_code code = GET_CODE (cmp);
22080 rtx op0 = XEXP (cmp, 0);
22081 rtx op1 = XEXP (cmp, 1);
22082
22083 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22084 comp_mode = CCmode;
22085 else if (FLOAT_MODE_P (mode))
22086 comp_mode = CCFPmode;
22087 else if (code == GTU || code == LTU
22088 || code == GEU || code == LEU)
22089 comp_mode = CCUNSmode;
22090 else if ((code == EQ || code == NE)
22091 && unsigned_reg_p (op0)
22092 && (unsigned_reg_p (op1)
22093 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
22094 /* These are unsigned values, perhaps there will be a later
22095 ordering compare that can be shared with this one. */
22096 comp_mode = CCUNSmode;
22097 else
22098 comp_mode = CCmode;
22099
22100 /* If we have an unsigned compare, make sure we don't have a signed value as
22101 an immediate. */
22102 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
22103 && INTVAL (op1) < 0)
22104 {
22105 op0 = copy_rtx_if_shared (op0);
22106 op1 = force_reg (GET_MODE (op0), op1);
22107 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
22108 }
22109
22110 /* First, the compare. */
22111 compare_result = gen_reg_rtx (comp_mode);
22112
22113 /* IEEE 128-bit support in VSX registers when we do not have hardware
22114 support. */
22115 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
22116 {
22117 rtx libfunc = NULL_RTX;
22118 bool check_nan = false;
22119 rtx dest;
22120
22121 switch (code)
22122 {
22123 case EQ:
22124 case NE:
22125 libfunc = optab_libfunc (eq_optab, mode);
22126 break;
22127
22128 case GT:
22129 case GE:
22130 libfunc = optab_libfunc (ge_optab, mode);
22131 break;
22132
22133 case LT:
22134 case LE:
22135 libfunc = optab_libfunc (le_optab, mode);
22136 break;
22137
22138 case UNORDERED:
22139 case ORDERED:
22140 libfunc = optab_libfunc (unord_optab, mode);
22141 code = (code == UNORDERED) ? NE : EQ;
22142 break;
22143
22144 case UNGE:
22145 case UNGT:
22146 check_nan = true;
22147 libfunc = optab_libfunc (ge_optab, mode);
22148 code = (code == UNGE) ? GE : GT;
22149 break;
22150
22151 case UNLE:
22152 case UNLT:
22153 check_nan = true;
22154 libfunc = optab_libfunc (le_optab, mode);
22155 code = (code == UNLE) ? LE : LT;
22156 break;
22157
22158 case UNEQ:
22159 case LTGT:
22160 check_nan = true;
22161 libfunc = optab_libfunc (eq_optab, mode);
22162 code = (code = UNEQ) ? EQ : NE;
22163 break;
22164
22165 default:
22166 gcc_unreachable ();
22167 }
22168
22169 gcc_assert (libfunc);
22170
22171 if (!check_nan)
22172 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22173 SImode, 2, op0, mode, op1, mode);
22174
22175 /* The library signals an exception for signalling NaNs, so we need to
22176 handle isgreater, etc. by first checking isordered. */
22177 else
22178 {
22179 rtx ne_rtx, normal_dest, unord_dest;
22180 rtx unord_func = optab_libfunc (unord_optab, mode);
22181 rtx join_label = gen_label_rtx ();
22182 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22183 rtx unord_cmp = gen_reg_rtx (comp_mode);
22184
22185
22186 /* Test for either value being a NaN. */
22187 gcc_assert (unord_func);
22188 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22189 SImode, 2, op0, mode, op1,
22190 mode);
22191
22192 /* Set value (0) if either value is a NaN, and jump to the join
22193 label. */
22194 dest = gen_reg_rtx (SImode);
22195 emit_move_insn (dest, const1_rtx);
22196 emit_insn (gen_rtx_SET (unord_cmp,
22197 gen_rtx_COMPARE (comp_mode, unord_dest,
22198 const0_rtx)));
22199
22200 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22201 emit_jump_insn (gen_rtx_SET (pc_rtx,
22202 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22203 join_ref,
22204 pc_rtx)));
22205
22206 /* Do the normal comparison, knowing that the values are not
22207 NaNs. */
22208 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22209 SImode, 2, op0, mode, op1,
22210 mode);
22211
22212 emit_insn (gen_cstoresi4 (dest,
22213 gen_rtx_fmt_ee (code, SImode, normal_dest,
22214 const0_rtx),
22215 normal_dest, const0_rtx));
22216
22217 /* Join NaN and non-Nan paths. Compare dest against 0. */
22218 emit_label (join_label);
22219 code = NE;
22220 }
22221
22222 emit_insn (gen_rtx_SET (compare_result,
22223 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22224 }
22225
22226 else
22227 {
22228 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22229 CLOBBERs to match cmptf_internal2 pattern. */
22230 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22231 && FLOAT128_IBM_P (GET_MODE (op0))
22232 && TARGET_HARD_FLOAT)
22233 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22234 gen_rtvec (10,
22235 gen_rtx_SET (compare_result,
22236 gen_rtx_COMPARE (comp_mode, op0, op1)),
22237 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22238 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22239 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22240 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22241 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22242 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22243 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22244 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22245 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22246 else if (GET_CODE (op1) == UNSPEC
22247 && XINT (op1, 1) == UNSPEC_SP_TEST)
22248 {
22249 rtx op1b = XVECEXP (op1, 0, 0);
22250 comp_mode = CCEQmode;
22251 compare_result = gen_reg_rtx (CCEQmode);
22252 if (TARGET_64BIT)
22253 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22254 else
22255 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22256 }
22257 else
22258 emit_insn (gen_rtx_SET (compare_result,
22259 gen_rtx_COMPARE (comp_mode, op0, op1)));
22260 }
22261
22262 /* Some kinds of FP comparisons need an OR operation;
22263 under flag_finite_math_only we don't bother. */
22264 if (FLOAT_MODE_P (mode)
22265 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22266 && !flag_finite_math_only
22267 && (code == LE || code == GE
22268 || code == UNEQ || code == LTGT
22269 || code == UNGT || code == UNLT))
22270 {
22271 enum rtx_code or1, or2;
22272 rtx or1_rtx, or2_rtx, compare2_rtx;
22273 rtx or_result = gen_reg_rtx (CCEQmode);
22274
22275 switch (code)
22276 {
22277 case LE: or1 = LT; or2 = EQ; break;
22278 case GE: or1 = GT; or2 = EQ; break;
22279 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22280 case LTGT: or1 = LT; or2 = GT; break;
22281 case UNGT: or1 = UNORDERED; or2 = GT; break;
22282 case UNLT: or1 = UNORDERED; or2 = LT; break;
22283 default: gcc_unreachable ();
22284 }
22285 validate_condition_mode (or1, comp_mode);
22286 validate_condition_mode (or2, comp_mode);
22287 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22288 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22289 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22290 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22291 const_true_rtx);
22292 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22293
22294 compare_result = or_result;
22295 code = EQ;
22296 }
22297
22298 validate_condition_mode (code, GET_MODE (compare_result));
22299
22300 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22301 }
22302
22303 \f
22304 /* Return the diagnostic message string if the binary operation OP is
22305 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22306
22307 static const char*
22308 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22309 const_tree type1,
22310 const_tree type2)
22311 {
22312 machine_mode mode1 = TYPE_MODE (type1);
22313 machine_mode mode2 = TYPE_MODE (type2);
22314
22315 /* For complex modes, use the inner type. */
22316 if (COMPLEX_MODE_P (mode1))
22317 mode1 = GET_MODE_INNER (mode1);
22318
22319 if (COMPLEX_MODE_P (mode2))
22320 mode2 = GET_MODE_INNER (mode2);
22321
22322 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22323 double to intermix unless -mfloat128-convert. */
22324 if (mode1 == mode2)
22325 return NULL;
22326
22327 if (!TARGET_FLOAT128_CVT)
22328 {
22329 if ((mode1 == KFmode && mode2 == IFmode)
22330 || (mode1 == IFmode && mode2 == KFmode))
22331 return N_("__float128 and __ibm128 cannot be used in the same "
22332 "expression");
22333
22334 if (TARGET_IEEEQUAD
22335 && ((mode1 == IFmode && mode2 == TFmode)
22336 || (mode1 == TFmode && mode2 == IFmode)))
22337 return N_("__ibm128 and long double cannot be used in the same "
22338 "expression");
22339
22340 if (!TARGET_IEEEQUAD
22341 && ((mode1 == KFmode && mode2 == TFmode)
22342 || (mode1 == TFmode && mode2 == KFmode)))
22343 return N_("__float128 and long double cannot be used in the same "
22344 "expression");
22345 }
22346
22347 return NULL;
22348 }
22349
22350 \f
22351 /* Expand floating point conversion to/from __float128 and __ibm128. */
22352
22353 void
22354 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22355 {
22356 machine_mode dest_mode = GET_MODE (dest);
22357 machine_mode src_mode = GET_MODE (src);
22358 convert_optab cvt = unknown_optab;
22359 bool do_move = false;
22360 rtx libfunc = NULL_RTX;
22361 rtx dest2;
22362 typedef rtx (*rtx_2func_t) (rtx, rtx);
22363 rtx_2func_t hw_convert = (rtx_2func_t)0;
22364 size_t kf_or_tf;
22365
22366 struct hw_conv_t {
22367 rtx_2func_t from_df;
22368 rtx_2func_t from_sf;
22369 rtx_2func_t from_si_sign;
22370 rtx_2func_t from_si_uns;
22371 rtx_2func_t from_di_sign;
22372 rtx_2func_t from_di_uns;
22373 rtx_2func_t to_df;
22374 rtx_2func_t to_sf;
22375 rtx_2func_t to_si_sign;
22376 rtx_2func_t to_si_uns;
22377 rtx_2func_t to_di_sign;
22378 rtx_2func_t to_di_uns;
22379 } hw_conversions[2] = {
22380 /* convertions to/from KFmode */
22381 {
22382 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22383 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22384 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22385 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22386 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22387 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22388 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22389 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22390 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22391 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22392 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22393 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22394 },
22395
22396 /* convertions to/from TFmode */
22397 {
22398 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22399 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22400 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22401 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22402 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22403 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22404 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22405 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22406 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22407 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22408 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22409 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22410 },
22411 };
22412
22413 if (dest_mode == src_mode)
22414 gcc_unreachable ();
22415
22416 /* Eliminate memory operations. */
22417 if (MEM_P (src))
22418 src = force_reg (src_mode, src);
22419
22420 if (MEM_P (dest))
22421 {
22422 rtx tmp = gen_reg_rtx (dest_mode);
22423 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22424 rs6000_emit_move (dest, tmp, dest_mode);
22425 return;
22426 }
22427
22428 /* Convert to IEEE 128-bit floating point. */
22429 if (FLOAT128_IEEE_P (dest_mode))
22430 {
22431 if (dest_mode == KFmode)
22432 kf_or_tf = 0;
22433 else if (dest_mode == TFmode)
22434 kf_or_tf = 1;
22435 else
22436 gcc_unreachable ();
22437
22438 switch (src_mode)
22439 {
22440 case DFmode:
22441 cvt = sext_optab;
22442 hw_convert = hw_conversions[kf_or_tf].from_df;
22443 break;
22444
22445 case SFmode:
22446 cvt = sext_optab;
22447 hw_convert = hw_conversions[kf_or_tf].from_sf;
22448 break;
22449
22450 case KFmode:
22451 case IFmode:
22452 case TFmode:
22453 if (FLOAT128_IBM_P (src_mode))
22454 cvt = sext_optab;
22455 else
22456 do_move = true;
22457 break;
22458
22459 case SImode:
22460 if (unsigned_p)
22461 {
22462 cvt = ufloat_optab;
22463 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22464 }
22465 else
22466 {
22467 cvt = sfloat_optab;
22468 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22469 }
22470 break;
22471
22472 case DImode:
22473 if (unsigned_p)
22474 {
22475 cvt = ufloat_optab;
22476 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22477 }
22478 else
22479 {
22480 cvt = sfloat_optab;
22481 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22482 }
22483 break;
22484
22485 default:
22486 gcc_unreachable ();
22487 }
22488 }
22489
22490 /* Convert from IEEE 128-bit floating point. */
22491 else if (FLOAT128_IEEE_P (src_mode))
22492 {
22493 if (src_mode == KFmode)
22494 kf_or_tf = 0;
22495 else if (src_mode == TFmode)
22496 kf_or_tf = 1;
22497 else
22498 gcc_unreachable ();
22499
22500 switch (dest_mode)
22501 {
22502 case DFmode:
22503 cvt = trunc_optab;
22504 hw_convert = hw_conversions[kf_or_tf].to_df;
22505 break;
22506
22507 case SFmode:
22508 cvt = trunc_optab;
22509 hw_convert = hw_conversions[kf_or_tf].to_sf;
22510 break;
22511
22512 case KFmode:
22513 case IFmode:
22514 case TFmode:
22515 if (FLOAT128_IBM_P (dest_mode))
22516 cvt = trunc_optab;
22517 else
22518 do_move = true;
22519 break;
22520
22521 case SImode:
22522 if (unsigned_p)
22523 {
22524 cvt = ufix_optab;
22525 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22526 }
22527 else
22528 {
22529 cvt = sfix_optab;
22530 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22531 }
22532 break;
22533
22534 case DImode:
22535 if (unsigned_p)
22536 {
22537 cvt = ufix_optab;
22538 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22539 }
22540 else
22541 {
22542 cvt = sfix_optab;
22543 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22544 }
22545 break;
22546
22547 default:
22548 gcc_unreachable ();
22549 }
22550 }
22551
22552 /* Both IBM format. */
22553 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22554 do_move = true;
22555
22556 else
22557 gcc_unreachable ();
22558
22559 /* Handle conversion between TFmode/KFmode. */
22560 if (do_move)
22561 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22562
22563 /* Handle conversion if we have hardware support. */
22564 else if (TARGET_FLOAT128_HW && hw_convert)
22565 emit_insn ((hw_convert) (dest, src));
22566
22567 /* Call an external function to do the conversion. */
22568 else if (cvt != unknown_optab)
22569 {
22570 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22571 gcc_assert (libfunc != NULL_RTX);
22572
22573 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
22574 src_mode);
22575
22576 gcc_assert (dest2 != NULL_RTX);
22577 if (!rtx_equal_p (dest, dest2))
22578 emit_move_insn (dest, dest2);
22579 }
22580
22581 else
22582 gcc_unreachable ();
22583
22584 return;
22585 }
22586
22587 \f
22588 /* Emit the RTL for an sISEL pattern. */
22589
22590 void
22591 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
22592 {
22593 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
22594 }
22595
22596 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22597 can be used as that dest register. Return the dest register. */
22598
22599 rtx
22600 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22601 {
22602 if (op2 == const0_rtx)
22603 return op1;
22604
22605 if (GET_CODE (scratch) == SCRATCH)
22606 scratch = gen_reg_rtx (mode);
22607
22608 if (logical_operand (op2, mode))
22609 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22610 else
22611 emit_insn (gen_rtx_SET (scratch,
22612 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22613
22614 return scratch;
22615 }
22616
22617 void
22618 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22619 {
22620 rtx condition_rtx;
22621 machine_mode op_mode;
22622 enum rtx_code cond_code;
22623 rtx result = operands[0];
22624
22625 condition_rtx = rs6000_generate_compare (operands[1], mode);
22626 cond_code = GET_CODE (condition_rtx);
22627
22628 if (cond_code == NE
22629 || cond_code == GE || cond_code == LE
22630 || cond_code == GEU || cond_code == LEU
22631 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22632 {
22633 rtx not_result = gen_reg_rtx (CCEQmode);
22634 rtx not_op, rev_cond_rtx;
22635 machine_mode cc_mode;
22636
22637 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22638
22639 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22640 SImode, XEXP (condition_rtx, 0), const0_rtx);
22641 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22642 emit_insn (gen_rtx_SET (not_result, not_op));
22643 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22644 }
22645
22646 op_mode = GET_MODE (XEXP (operands[1], 0));
22647 if (op_mode == VOIDmode)
22648 op_mode = GET_MODE (XEXP (operands[1], 1));
22649
22650 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22651 {
22652 PUT_MODE (condition_rtx, DImode);
22653 convert_move (result, condition_rtx, 0);
22654 }
22655 else
22656 {
22657 PUT_MODE (condition_rtx, SImode);
22658 emit_insn (gen_rtx_SET (result, condition_rtx));
22659 }
22660 }
22661
22662 /* Emit a branch of kind CODE to location LOC. */
22663
22664 void
22665 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22666 {
22667 rtx condition_rtx, loc_ref;
22668
22669 condition_rtx = rs6000_generate_compare (operands[0], mode);
22670 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22671 emit_jump_insn (gen_rtx_SET (pc_rtx,
22672 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22673 loc_ref, pc_rtx)));
22674 }
22675
22676 /* Return the string to output a conditional branch to LABEL, which is
22677 the operand template of the label, or NULL if the branch is really a
22678 conditional return.
22679
22680 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22681 condition code register and its mode specifies what kind of
22682 comparison we made.
22683
22684 REVERSED is nonzero if we should reverse the sense of the comparison.
22685
22686 INSN is the insn. */
22687
22688 char *
22689 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22690 {
22691 static char string[64];
22692 enum rtx_code code = GET_CODE (op);
22693 rtx cc_reg = XEXP (op, 0);
22694 machine_mode mode = GET_MODE (cc_reg);
22695 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22696 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22697 int really_reversed = reversed ^ need_longbranch;
22698 char *s = string;
22699 const char *ccode;
22700 const char *pred;
22701 rtx note;
22702
22703 validate_condition_mode (code, mode);
22704
22705 /* Work out which way this really branches. We could use
22706 reverse_condition_maybe_unordered here always but this
22707 makes the resulting assembler clearer. */
22708 if (really_reversed)
22709 {
22710 /* Reversal of FP compares takes care -- an ordered compare
22711 becomes an unordered compare and vice versa. */
22712 if (mode == CCFPmode)
22713 code = reverse_condition_maybe_unordered (code);
22714 else
22715 code = reverse_condition (code);
22716 }
22717
22718 switch (code)
22719 {
22720 /* Not all of these are actually distinct opcodes, but
22721 we distinguish them for clarity of the resulting assembler. */
22722 case NE: case LTGT:
22723 ccode = "ne"; break;
22724 case EQ: case UNEQ:
22725 ccode = "eq"; break;
22726 case GE: case GEU:
22727 ccode = "ge"; break;
22728 case GT: case GTU: case UNGT:
22729 ccode = "gt"; break;
22730 case LE: case LEU:
22731 ccode = "le"; break;
22732 case LT: case LTU: case UNLT:
22733 ccode = "lt"; break;
22734 case UNORDERED: ccode = "un"; break;
22735 case ORDERED: ccode = "nu"; break;
22736 case UNGE: ccode = "nl"; break;
22737 case UNLE: ccode = "ng"; break;
22738 default:
22739 gcc_unreachable ();
22740 }
22741
22742 /* Maybe we have a guess as to how likely the branch is. */
22743 pred = "";
22744 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22745 if (note != NULL_RTX)
22746 {
22747 /* PROB is the difference from 50%. */
22748 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
22749
22750 /* Only hint for highly probable/improbable branches on newer cpus when
22751 we have real profile data, as static prediction overrides processor
22752 dynamic prediction. For older cpus we may as well always hint, but
22753 assume not taken for branches that are very close to 50% as a
22754 mispredicted taken branch is more expensive than a
22755 mispredicted not-taken branch. */
22756 if (rs6000_always_hint
22757 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22758 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22759 && br_prob_note_reliable_p (note)))
22760 {
22761 if (abs (prob) > REG_BR_PROB_BASE / 20
22762 && ((prob > 0) ^ need_longbranch))
22763 pred = "+";
22764 else
22765 pred = "-";
22766 }
22767 }
22768
22769 if (label == NULL)
22770 s += sprintf (s, "b%slr%s ", ccode, pred);
22771 else
22772 s += sprintf (s, "b%s%s ", ccode, pred);
22773
22774 /* We need to escape any '%' characters in the reg_names string.
22775 Assume they'd only be the first character.... */
22776 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22777 *s++ = '%';
22778 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22779
22780 if (label != NULL)
22781 {
22782 /* If the branch distance was too far, we may have to use an
22783 unconditional branch to go the distance. */
22784 if (need_longbranch)
22785 s += sprintf (s, ",$+8\n\tb %s", label);
22786 else
22787 s += sprintf (s, ",%s", label);
22788 }
22789
22790 return string;
22791 }
22792
22793 /* Return insn for VSX or Altivec comparisons. */
22794
22795 static rtx
22796 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22797 {
22798 rtx mask;
22799 machine_mode mode = GET_MODE (op0);
22800
22801 switch (code)
22802 {
22803 default:
22804 break;
22805
22806 case GE:
22807 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22808 return NULL_RTX;
22809 /* FALLTHRU */
22810
22811 case EQ:
22812 case GT:
22813 case GTU:
22814 case ORDERED:
22815 case UNORDERED:
22816 case UNEQ:
22817 case LTGT:
22818 mask = gen_reg_rtx (mode);
22819 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22820 return mask;
22821 }
22822
22823 return NULL_RTX;
22824 }
22825
22826 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22827 DMODE is expected destination mode. This is a recursive function. */
22828
22829 static rtx
22830 rs6000_emit_vector_compare (enum rtx_code rcode,
22831 rtx op0, rtx op1,
22832 machine_mode dmode)
22833 {
22834 rtx mask;
22835 bool swap_operands = false;
22836 bool try_again = false;
22837
22838 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22839 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22840
22841 /* See if the comparison works as is. */
22842 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22843 if (mask)
22844 return mask;
22845
22846 switch (rcode)
22847 {
22848 case LT:
22849 rcode = GT;
22850 swap_operands = true;
22851 try_again = true;
22852 break;
22853 case LTU:
22854 rcode = GTU;
22855 swap_operands = true;
22856 try_again = true;
22857 break;
22858 case NE:
22859 case UNLE:
22860 case UNLT:
22861 case UNGE:
22862 case UNGT:
22863 /* Invert condition and try again.
22864 e.g., A != B becomes ~(A==B). */
22865 {
22866 enum rtx_code rev_code;
22867 enum insn_code nor_code;
22868 rtx mask2;
22869
22870 rev_code = reverse_condition_maybe_unordered (rcode);
22871 if (rev_code == UNKNOWN)
22872 return NULL_RTX;
22873
22874 nor_code = optab_handler (one_cmpl_optab, dmode);
22875 if (nor_code == CODE_FOR_nothing)
22876 return NULL_RTX;
22877
22878 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22879 if (!mask2)
22880 return NULL_RTX;
22881
22882 mask = gen_reg_rtx (dmode);
22883 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22884 return mask;
22885 }
22886 break;
22887 case GE:
22888 case GEU:
22889 case LE:
22890 case LEU:
22891 /* Try GT/GTU/LT/LTU OR EQ */
22892 {
22893 rtx c_rtx, eq_rtx;
22894 enum insn_code ior_code;
22895 enum rtx_code new_code;
22896
22897 switch (rcode)
22898 {
22899 case GE:
22900 new_code = GT;
22901 break;
22902
22903 case GEU:
22904 new_code = GTU;
22905 break;
22906
22907 case LE:
22908 new_code = LT;
22909 break;
22910
22911 case LEU:
22912 new_code = LTU;
22913 break;
22914
22915 default:
22916 gcc_unreachable ();
22917 }
22918
22919 ior_code = optab_handler (ior_optab, dmode);
22920 if (ior_code == CODE_FOR_nothing)
22921 return NULL_RTX;
22922
22923 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22924 if (!c_rtx)
22925 return NULL_RTX;
22926
22927 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22928 if (!eq_rtx)
22929 return NULL_RTX;
22930
22931 mask = gen_reg_rtx (dmode);
22932 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22933 return mask;
22934 }
22935 break;
22936 default:
22937 return NULL_RTX;
22938 }
22939
22940 if (try_again)
22941 {
22942 if (swap_operands)
22943 std::swap (op0, op1);
22944
22945 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22946 if (mask)
22947 return mask;
22948 }
22949
22950 /* You only get two chances. */
22951 return NULL_RTX;
22952 }
22953
22954 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22955 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22956 operands for the relation operation COND. */
22957
22958 int
22959 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22960 rtx cond, rtx cc_op0, rtx cc_op1)
22961 {
22962 machine_mode dest_mode = GET_MODE (dest);
22963 machine_mode mask_mode = GET_MODE (cc_op0);
22964 enum rtx_code rcode = GET_CODE (cond);
22965 machine_mode cc_mode = CCmode;
22966 rtx mask;
22967 rtx cond2;
22968 bool invert_move = false;
22969
22970 if (VECTOR_UNIT_NONE_P (dest_mode))
22971 return 0;
22972
22973 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22974 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22975
22976 switch (rcode)
22977 {
22978 /* Swap operands if we can, and fall back to doing the operation as
22979 specified, and doing a NOR to invert the test. */
22980 case NE:
22981 case UNLE:
22982 case UNLT:
22983 case UNGE:
22984 case UNGT:
22985 /* Invert condition and try again.
22986 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22987 invert_move = true;
22988 rcode = reverse_condition_maybe_unordered (rcode);
22989 if (rcode == UNKNOWN)
22990 return 0;
22991 break;
22992
22993 case GE:
22994 case LE:
22995 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22996 {
22997 /* Invert condition to avoid compound test. */
22998 invert_move = true;
22999 rcode = reverse_condition (rcode);
23000 }
23001 break;
23002
23003 case GTU:
23004 case GEU:
23005 case LTU:
23006 case LEU:
23007 /* Mark unsigned tests with CCUNSmode. */
23008 cc_mode = CCUNSmode;
23009
23010 /* Invert condition to avoid compound test if necessary. */
23011 if (rcode == GEU || rcode == LEU)
23012 {
23013 invert_move = true;
23014 rcode = reverse_condition (rcode);
23015 }
23016 break;
23017
23018 default:
23019 break;
23020 }
23021
23022 /* Get the vector mask for the given relational operations. */
23023 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
23024
23025 if (!mask)
23026 return 0;
23027
23028 if (invert_move)
23029 std::swap (op_true, op_false);
23030
23031 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
23032 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
23033 && (GET_CODE (op_true) == CONST_VECTOR
23034 || GET_CODE (op_false) == CONST_VECTOR))
23035 {
23036 rtx constant_0 = CONST0_RTX (dest_mode);
23037 rtx constant_m1 = CONSTM1_RTX (dest_mode);
23038
23039 if (op_true == constant_m1 && op_false == constant_0)
23040 {
23041 emit_move_insn (dest, mask);
23042 return 1;
23043 }
23044
23045 else if (op_true == constant_0 && op_false == constant_m1)
23046 {
23047 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
23048 return 1;
23049 }
23050
23051 /* If we can't use the vector comparison directly, perhaps we can use
23052 the mask for the true or false fields, instead of loading up a
23053 constant. */
23054 if (op_true == constant_m1)
23055 op_true = mask;
23056
23057 if (op_false == constant_0)
23058 op_false = mask;
23059 }
23060
23061 if (!REG_P (op_true) && !SUBREG_P (op_true))
23062 op_true = force_reg (dest_mode, op_true);
23063
23064 if (!REG_P (op_false) && !SUBREG_P (op_false))
23065 op_false = force_reg (dest_mode, op_false);
23066
23067 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
23068 CONST0_RTX (dest_mode));
23069 emit_insn (gen_rtx_SET (dest,
23070 gen_rtx_IF_THEN_ELSE (dest_mode,
23071 cond2,
23072 op_true,
23073 op_false)));
23074 return 1;
23075 }
23076
23077 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
23078 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
23079 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
23080 hardware has no such operation. */
23081
23082 static int
23083 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23084 {
23085 enum rtx_code code = GET_CODE (op);
23086 rtx op0 = XEXP (op, 0);
23087 rtx op1 = XEXP (op, 1);
23088 machine_mode compare_mode = GET_MODE (op0);
23089 machine_mode result_mode = GET_MODE (dest);
23090 bool max_p = false;
23091
23092 if (result_mode != compare_mode)
23093 return 0;
23094
23095 if (code == GE || code == GT)
23096 max_p = true;
23097 else if (code == LE || code == LT)
23098 max_p = false;
23099 else
23100 return 0;
23101
23102 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
23103 ;
23104
23105 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
23106 max_p = !max_p;
23107
23108 else
23109 return 0;
23110
23111 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
23112 return 1;
23113 }
23114
23115 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
23116 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
23117 operands of the last comparison is nonzero/true, FALSE_COND if it is
23118 zero/false. Return 0 if the hardware has no such operation. */
23119
23120 static int
23121 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23122 {
23123 enum rtx_code code = GET_CODE (op);
23124 rtx op0 = XEXP (op, 0);
23125 rtx op1 = XEXP (op, 1);
23126 machine_mode result_mode = GET_MODE (dest);
23127 rtx compare_rtx;
23128 rtx cmove_rtx;
23129 rtx clobber_rtx;
23130
23131 if (!can_create_pseudo_p ())
23132 return 0;
23133
23134 switch (code)
23135 {
23136 case EQ:
23137 case GE:
23138 case GT:
23139 break;
23140
23141 case NE:
23142 case LT:
23143 case LE:
23144 code = swap_condition (code);
23145 std::swap (op0, op1);
23146 break;
23147
23148 default:
23149 return 0;
23150 }
23151
23152 /* Generate: [(parallel [(set (dest)
23153 (if_then_else (op (cmp1) (cmp2))
23154 (true)
23155 (false)))
23156 (clobber (scratch))])]. */
23157
23158 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23159 cmove_rtx = gen_rtx_SET (dest,
23160 gen_rtx_IF_THEN_ELSE (result_mode,
23161 compare_rtx,
23162 true_cond,
23163 false_cond));
23164
23165 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23166 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23167 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23168
23169 return 1;
23170 }
23171
23172 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23173 operands of the last comparison is nonzero/true, FALSE_COND if it
23174 is zero/false. Return 0 if the hardware has no such operation. */
23175
23176 int
23177 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23178 {
23179 enum rtx_code code = GET_CODE (op);
23180 rtx op0 = XEXP (op, 0);
23181 rtx op1 = XEXP (op, 1);
23182 machine_mode compare_mode = GET_MODE (op0);
23183 machine_mode result_mode = GET_MODE (dest);
23184 rtx temp;
23185 bool is_against_zero;
23186
23187 /* These modes should always match. */
23188 if (GET_MODE (op1) != compare_mode
23189 /* In the isel case however, we can use a compare immediate, so
23190 op1 may be a small constant. */
23191 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23192 return 0;
23193 if (GET_MODE (true_cond) != result_mode)
23194 return 0;
23195 if (GET_MODE (false_cond) != result_mode)
23196 return 0;
23197
23198 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23199 if (TARGET_P9_MINMAX
23200 && (compare_mode == SFmode || compare_mode == DFmode)
23201 && (result_mode == SFmode || result_mode == DFmode))
23202 {
23203 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23204 return 1;
23205
23206 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23207 return 1;
23208 }
23209
23210 /* Don't allow using floating point comparisons for integer results for
23211 now. */
23212 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23213 return 0;
23214
23215 /* First, work out if the hardware can do this at all, or
23216 if it's too slow.... */
23217 if (!FLOAT_MODE_P (compare_mode))
23218 {
23219 if (TARGET_ISEL)
23220 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23221 return 0;
23222 }
23223
23224 is_against_zero = op1 == CONST0_RTX (compare_mode);
23225
23226 /* A floating-point subtract might overflow, underflow, or produce
23227 an inexact result, thus changing the floating-point flags, so it
23228 can't be generated if we care about that. It's safe if one side
23229 of the construct is zero, since then no subtract will be
23230 generated. */
23231 if (SCALAR_FLOAT_MODE_P (compare_mode)
23232 && flag_trapping_math && ! is_against_zero)
23233 return 0;
23234
23235 /* Eliminate half of the comparisons by switching operands, this
23236 makes the remaining code simpler. */
23237 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23238 || code == LTGT || code == LT || code == UNLE)
23239 {
23240 code = reverse_condition_maybe_unordered (code);
23241 temp = true_cond;
23242 true_cond = false_cond;
23243 false_cond = temp;
23244 }
23245
23246 /* UNEQ and LTGT take four instructions for a comparison with zero,
23247 it'll probably be faster to use a branch here too. */
23248 if (code == UNEQ && HONOR_NANS (compare_mode))
23249 return 0;
23250
23251 /* We're going to try to implement comparisons by performing
23252 a subtract, then comparing against zero. Unfortunately,
23253 Inf - Inf is NaN which is not zero, and so if we don't
23254 know that the operand is finite and the comparison
23255 would treat EQ different to UNORDERED, we can't do it. */
23256 if (HONOR_INFINITIES (compare_mode)
23257 && code != GT && code != UNGE
23258 && (GET_CODE (op1) != CONST_DOUBLE
23259 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23260 /* Constructs of the form (a OP b ? a : b) are safe. */
23261 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23262 || (! rtx_equal_p (op0, true_cond)
23263 && ! rtx_equal_p (op1, true_cond))))
23264 return 0;
23265
23266 /* At this point we know we can use fsel. */
23267
23268 /* Reduce the comparison to a comparison against zero. */
23269 if (! is_against_zero)
23270 {
23271 temp = gen_reg_rtx (compare_mode);
23272 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23273 op0 = temp;
23274 op1 = CONST0_RTX (compare_mode);
23275 }
23276
23277 /* If we don't care about NaNs we can reduce some of the comparisons
23278 down to faster ones. */
23279 if (! HONOR_NANS (compare_mode))
23280 switch (code)
23281 {
23282 case GT:
23283 code = LE;
23284 temp = true_cond;
23285 true_cond = false_cond;
23286 false_cond = temp;
23287 break;
23288 case UNGE:
23289 code = GE;
23290 break;
23291 case UNEQ:
23292 code = EQ;
23293 break;
23294 default:
23295 break;
23296 }
23297
23298 /* Now, reduce everything down to a GE. */
23299 switch (code)
23300 {
23301 case GE:
23302 break;
23303
23304 case LE:
23305 temp = gen_reg_rtx (compare_mode);
23306 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23307 op0 = temp;
23308 break;
23309
23310 case ORDERED:
23311 temp = gen_reg_rtx (compare_mode);
23312 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23313 op0 = temp;
23314 break;
23315
23316 case EQ:
23317 temp = gen_reg_rtx (compare_mode);
23318 emit_insn (gen_rtx_SET (temp,
23319 gen_rtx_NEG (compare_mode,
23320 gen_rtx_ABS (compare_mode, op0))));
23321 op0 = temp;
23322 break;
23323
23324 case UNGE:
23325 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23326 temp = gen_reg_rtx (result_mode);
23327 emit_insn (gen_rtx_SET (temp,
23328 gen_rtx_IF_THEN_ELSE (result_mode,
23329 gen_rtx_GE (VOIDmode,
23330 op0, op1),
23331 true_cond, false_cond)));
23332 false_cond = true_cond;
23333 true_cond = temp;
23334
23335 temp = gen_reg_rtx (compare_mode);
23336 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23337 op0 = temp;
23338 break;
23339
23340 case GT:
23341 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23342 temp = gen_reg_rtx (result_mode);
23343 emit_insn (gen_rtx_SET (temp,
23344 gen_rtx_IF_THEN_ELSE (result_mode,
23345 gen_rtx_GE (VOIDmode,
23346 op0, op1),
23347 true_cond, false_cond)));
23348 true_cond = false_cond;
23349 false_cond = temp;
23350
23351 temp = gen_reg_rtx (compare_mode);
23352 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23353 op0 = temp;
23354 break;
23355
23356 default:
23357 gcc_unreachable ();
23358 }
23359
23360 emit_insn (gen_rtx_SET (dest,
23361 gen_rtx_IF_THEN_ELSE (result_mode,
23362 gen_rtx_GE (VOIDmode,
23363 op0, op1),
23364 true_cond, false_cond)));
23365 return 1;
23366 }
23367
23368 /* Same as above, but for ints (isel). */
23369
23370 static int
23371 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23372 {
23373 rtx condition_rtx, cr;
23374 machine_mode mode = GET_MODE (dest);
23375 enum rtx_code cond_code;
23376 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23377 bool signedp;
23378
23379 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23380 return 0;
23381
23382 /* We still have to do the compare, because isel doesn't do a
23383 compare, it just looks at the CRx bits set by a previous compare
23384 instruction. */
23385 condition_rtx = rs6000_generate_compare (op, mode);
23386 cond_code = GET_CODE (condition_rtx);
23387 cr = XEXP (condition_rtx, 0);
23388 signedp = GET_MODE (cr) == CCmode;
23389
23390 isel_func = (mode == SImode
23391 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23392 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23393
23394 switch (cond_code)
23395 {
23396 case LT: case GT: case LTU: case GTU: case EQ:
23397 /* isel handles these directly. */
23398 break;
23399
23400 default:
23401 /* We need to swap the sense of the comparison. */
23402 {
23403 std::swap (false_cond, true_cond);
23404 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23405 }
23406 break;
23407 }
23408
23409 false_cond = force_reg (mode, false_cond);
23410 if (true_cond != const0_rtx)
23411 true_cond = force_reg (mode, true_cond);
23412
23413 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23414
23415 return 1;
23416 }
23417
23418 const char *
23419 output_isel (rtx *operands)
23420 {
23421 enum rtx_code code;
23422
23423 code = GET_CODE (operands[1]);
23424
23425 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
23426 {
23427 gcc_assert (GET_CODE (operands[2]) == REG
23428 && GET_CODE (operands[3]) == REG);
23429 PUT_CODE (operands[1], reverse_condition (code));
23430 return "isel %0,%3,%2,%j1";
23431 }
23432
23433 return "isel %0,%2,%3,%j1";
23434 }
23435
23436 void
23437 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23438 {
23439 machine_mode mode = GET_MODE (op0);
23440 enum rtx_code c;
23441 rtx target;
23442
23443 /* VSX/altivec have direct min/max insns. */
23444 if ((code == SMAX || code == SMIN)
23445 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23446 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23447 {
23448 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23449 return;
23450 }
23451
23452 if (code == SMAX || code == SMIN)
23453 c = GE;
23454 else
23455 c = GEU;
23456
23457 if (code == SMAX || code == UMAX)
23458 target = emit_conditional_move (dest, c, op0, op1, mode,
23459 op0, op1, mode, 0);
23460 else
23461 target = emit_conditional_move (dest, c, op0, op1, mode,
23462 op1, op0, mode, 0);
23463 gcc_assert (target);
23464 if (target != dest)
23465 emit_move_insn (dest, target);
23466 }
23467
23468 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23469 for the value to come from memory or if it is already loaded into a GPR. */
23470
23471 void
23472 rs6000_split_signbit (rtx dest, rtx src)
23473 {
23474 machine_mode d_mode = GET_MODE (dest);
23475 machine_mode s_mode = GET_MODE (src);
23476 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
23477 rtx shift_reg = dest_di;
23478
23479 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
23480
23481 if (MEM_P (src))
23482 {
23483 rtx mem = (WORDS_BIG_ENDIAN
23484 ? adjust_address (src, DImode, 0)
23485 : adjust_address (src, DImode, 8));
23486 emit_insn (gen_rtx_SET (dest_di, mem));
23487 }
23488
23489 else
23490 {
23491 unsigned int r = reg_or_subregno (src);
23492
23493 if (INT_REGNO_P (r))
23494 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
23495
23496 else
23497 {
23498 /* Generate the special mfvsrd instruction to get it in a GPR. */
23499 gcc_assert (VSX_REGNO_P (r));
23500 if (s_mode == KFmode)
23501 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
23502 else
23503 emit_insn (gen_signbittf2_dm2 (dest_di, src));
23504 }
23505 }
23506
23507 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
23508 return;
23509 }
23510
23511 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23512 COND is true. Mark the jump as unlikely to be taken. */
23513
23514 static void
23515 emit_unlikely_jump (rtx cond, rtx label)
23516 {
23517 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
23518 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23519 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23520 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
23521 }
23522
23523 /* A subroutine of the atomic operation splitters. Emit a load-locked
23524 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23525 the zero_extend operation. */
23526
23527 static void
23528 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23529 {
23530 rtx (*fn) (rtx, rtx) = NULL;
23531
23532 switch (mode)
23533 {
23534 case QImode:
23535 fn = gen_load_lockedqi;
23536 break;
23537 case HImode:
23538 fn = gen_load_lockedhi;
23539 break;
23540 case SImode:
23541 if (GET_MODE (mem) == QImode)
23542 fn = gen_load_lockedqi_si;
23543 else if (GET_MODE (mem) == HImode)
23544 fn = gen_load_lockedhi_si;
23545 else
23546 fn = gen_load_lockedsi;
23547 break;
23548 case DImode:
23549 fn = gen_load_lockeddi;
23550 break;
23551 case TImode:
23552 fn = gen_load_lockedti;
23553 break;
23554 default:
23555 gcc_unreachable ();
23556 }
23557 emit_insn (fn (reg, mem));
23558 }
23559
23560 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23561 instruction in MODE. */
23562
23563 static void
23564 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23565 {
23566 rtx (*fn) (rtx, rtx, rtx) = NULL;
23567
23568 switch (mode)
23569 {
23570 case QImode:
23571 fn = gen_store_conditionalqi;
23572 break;
23573 case HImode:
23574 fn = gen_store_conditionalhi;
23575 break;
23576 case SImode:
23577 fn = gen_store_conditionalsi;
23578 break;
23579 case DImode:
23580 fn = gen_store_conditionaldi;
23581 break;
23582 case TImode:
23583 fn = gen_store_conditionalti;
23584 break;
23585 default:
23586 gcc_unreachable ();
23587 }
23588
23589 /* Emit sync before stwcx. to address PPC405 Erratum. */
23590 if (PPC405_ERRATUM77)
23591 emit_insn (gen_hwsync ());
23592
23593 emit_insn (fn (res, mem, val));
23594 }
23595
23596 /* Expand barriers before and after a load_locked/store_cond sequence. */
23597
23598 static rtx
23599 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23600 {
23601 rtx addr = XEXP (mem, 0);
23602 int strict_p = (reload_in_progress || reload_completed);
23603
23604 if (!legitimate_indirect_address_p (addr, strict_p)
23605 && !legitimate_indexed_address_p (addr, strict_p))
23606 {
23607 addr = force_reg (Pmode, addr);
23608 mem = replace_equiv_address_nv (mem, addr);
23609 }
23610
23611 switch (model)
23612 {
23613 case MEMMODEL_RELAXED:
23614 case MEMMODEL_CONSUME:
23615 case MEMMODEL_ACQUIRE:
23616 break;
23617 case MEMMODEL_RELEASE:
23618 case MEMMODEL_ACQ_REL:
23619 emit_insn (gen_lwsync ());
23620 break;
23621 case MEMMODEL_SEQ_CST:
23622 emit_insn (gen_hwsync ());
23623 break;
23624 default:
23625 gcc_unreachable ();
23626 }
23627 return mem;
23628 }
23629
23630 static void
23631 rs6000_post_atomic_barrier (enum memmodel model)
23632 {
23633 switch (model)
23634 {
23635 case MEMMODEL_RELAXED:
23636 case MEMMODEL_CONSUME:
23637 case MEMMODEL_RELEASE:
23638 break;
23639 case MEMMODEL_ACQUIRE:
23640 case MEMMODEL_ACQ_REL:
23641 case MEMMODEL_SEQ_CST:
23642 emit_insn (gen_isync ());
23643 break;
23644 default:
23645 gcc_unreachable ();
23646 }
23647 }
23648
23649 /* A subroutine of the various atomic expanders. For sub-word operations,
23650 we must adjust things to operate on SImode. Given the original MEM,
23651 return a new aligned memory. Also build and return the quantities by
23652 which to shift and mask. */
23653
23654 static rtx
23655 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23656 {
23657 rtx addr, align, shift, mask, mem;
23658 HOST_WIDE_INT shift_mask;
23659 machine_mode mode = GET_MODE (orig_mem);
23660
23661 /* For smaller modes, we have to implement this via SImode. */
23662 shift_mask = (mode == QImode ? 0x18 : 0x10);
23663
23664 addr = XEXP (orig_mem, 0);
23665 addr = force_reg (GET_MODE (addr), addr);
23666
23667 /* Aligned memory containing subword. Generate a new memory. We
23668 do not want any of the existing MEM_ATTR data, as we're now
23669 accessing memory outside the original object. */
23670 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23672 mem = gen_rtx_MEM (SImode, align);
23673 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23674 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23675 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23676
23677 /* Shift amount for subword relative to aligned word. */
23678 shift = gen_reg_rtx (SImode);
23679 addr = gen_lowpart (SImode, addr);
23680 rtx tmp = gen_reg_rtx (SImode);
23681 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23682 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23683 if (BYTES_BIG_ENDIAN)
23684 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23685 shift, 1, OPTAB_LIB_WIDEN);
23686 *pshift = shift;
23687
23688 /* Mask for insertion. */
23689 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23690 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23691 *pmask = mask;
23692
23693 return mem;
23694 }
23695
23696 /* A subroutine of the various atomic expanders. For sub-word operands,
23697 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23698
23699 static rtx
23700 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23701 {
23702 rtx x;
23703
23704 x = gen_reg_rtx (SImode);
23705 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23706 gen_rtx_NOT (SImode, mask),
23707 oldval)));
23708
23709 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23710
23711 return x;
23712 }
23713
23714 /* A subroutine of the various atomic expanders. For sub-word operands,
23715 extract WIDE to NARROW via SHIFT. */
23716
23717 static void
23718 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23719 {
23720 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23721 wide, 1, OPTAB_LIB_WIDEN);
23722 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23723 }
23724
23725 /* Expand an atomic compare and swap operation. */
23726
23727 void
23728 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23729 {
23730 rtx boolval, retval, mem, oldval, newval, cond;
23731 rtx label1, label2, x, mask, shift;
23732 machine_mode mode, orig_mode;
23733 enum memmodel mod_s, mod_f;
23734 bool is_weak;
23735
23736 boolval = operands[0];
23737 retval = operands[1];
23738 mem = operands[2];
23739 oldval = operands[3];
23740 newval = operands[4];
23741 is_weak = (INTVAL (operands[5]) != 0);
23742 mod_s = memmodel_base (INTVAL (operands[6]));
23743 mod_f = memmodel_base (INTVAL (operands[7]));
23744 orig_mode = mode = GET_MODE (mem);
23745
23746 mask = shift = NULL_RTX;
23747 if (mode == QImode || mode == HImode)
23748 {
23749 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23750 lwarx and shift/mask operations. With power8, we need to do the
23751 comparison in SImode, but the store is still done in QI/HImode. */
23752 oldval = convert_modes (SImode, mode, oldval, 1);
23753
23754 if (!TARGET_SYNC_HI_QI)
23755 {
23756 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23757
23758 /* Shift and mask OLDVAL into position with the word. */
23759 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23760 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23761
23762 /* Shift and mask NEWVAL into position within the word. */
23763 newval = convert_modes (SImode, mode, newval, 1);
23764 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23765 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23766 }
23767
23768 /* Prepare to adjust the return value. */
23769 retval = gen_reg_rtx (SImode);
23770 mode = SImode;
23771 }
23772 else if (reg_overlap_mentioned_p (retval, oldval))
23773 oldval = copy_to_reg (oldval);
23774
23775 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23776 oldval = copy_to_mode_reg (mode, oldval);
23777
23778 if (reg_overlap_mentioned_p (retval, newval))
23779 newval = copy_to_reg (newval);
23780
23781 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23782
23783 label1 = NULL_RTX;
23784 if (!is_weak)
23785 {
23786 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23787 emit_label (XEXP (label1, 0));
23788 }
23789 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23790
23791 emit_load_locked (mode, retval, mem);
23792
23793 x = retval;
23794 if (mask)
23795 x = expand_simple_binop (SImode, AND, retval, mask,
23796 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23797
23798 cond = gen_reg_rtx (CCmode);
23799 /* If we have TImode, synthesize a comparison. */
23800 if (mode != TImode)
23801 x = gen_rtx_COMPARE (CCmode, x, oldval);
23802 else
23803 {
23804 rtx xor1_result = gen_reg_rtx (DImode);
23805 rtx xor2_result = gen_reg_rtx (DImode);
23806 rtx or_result = gen_reg_rtx (DImode);
23807 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23808 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23809 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23810 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23811
23812 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23813 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23814 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23815 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23816 }
23817
23818 emit_insn (gen_rtx_SET (cond, x));
23819
23820 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23821 emit_unlikely_jump (x, label2);
23822
23823 x = newval;
23824 if (mask)
23825 x = rs6000_mask_atomic_subword (retval, newval, mask);
23826
23827 emit_store_conditional (orig_mode, cond, mem, x);
23828
23829 if (!is_weak)
23830 {
23831 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23832 emit_unlikely_jump (x, label1);
23833 }
23834
23835 if (!is_mm_relaxed (mod_f))
23836 emit_label (XEXP (label2, 0));
23837
23838 rs6000_post_atomic_barrier (mod_s);
23839
23840 if (is_mm_relaxed (mod_f))
23841 emit_label (XEXP (label2, 0));
23842
23843 if (shift)
23844 rs6000_finish_atomic_subword (operands[1], retval, shift);
23845 else if (mode != GET_MODE (operands[1]))
23846 convert_move (operands[1], retval, 1);
23847
23848 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23849 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23850 emit_insn (gen_rtx_SET (boolval, x));
23851 }
23852
23853 /* Expand an atomic exchange operation. */
23854
23855 void
23856 rs6000_expand_atomic_exchange (rtx operands[])
23857 {
23858 rtx retval, mem, val, cond;
23859 machine_mode mode;
23860 enum memmodel model;
23861 rtx label, x, mask, shift;
23862
23863 retval = operands[0];
23864 mem = operands[1];
23865 val = operands[2];
23866 model = memmodel_base (INTVAL (operands[3]));
23867 mode = GET_MODE (mem);
23868
23869 mask = shift = NULL_RTX;
23870 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23871 {
23872 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23873
23874 /* Shift and mask VAL into position with the word. */
23875 val = convert_modes (SImode, mode, val, 1);
23876 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23877 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23878
23879 /* Prepare to adjust the return value. */
23880 retval = gen_reg_rtx (SImode);
23881 mode = SImode;
23882 }
23883
23884 mem = rs6000_pre_atomic_barrier (mem, model);
23885
23886 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23887 emit_label (XEXP (label, 0));
23888
23889 emit_load_locked (mode, retval, mem);
23890
23891 x = val;
23892 if (mask)
23893 x = rs6000_mask_atomic_subword (retval, val, mask);
23894
23895 cond = gen_reg_rtx (CCmode);
23896 emit_store_conditional (mode, cond, mem, x);
23897
23898 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23899 emit_unlikely_jump (x, label);
23900
23901 rs6000_post_atomic_barrier (model);
23902
23903 if (shift)
23904 rs6000_finish_atomic_subword (operands[0], retval, shift);
23905 }
23906
23907 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23908 to perform. MEM is the memory on which to operate. VAL is the second
23909 operand of the binary operator. BEFORE and AFTER are optional locations to
23910 return the value of MEM either before of after the operation. MODEL_RTX
23911 is a CONST_INT containing the memory model to use. */
23912
23913 void
23914 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23915 rtx orig_before, rtx orig_after, rtx model_rtx)
23916 {
23917 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23918 machine_mode mode = GET_MODE (mem);
23919 machine_mode store_mode = mode;
23920 rtx label, x, cond, mask, shift;
23921 rtx before = orig_before, after = orig_after;
23922
23923 mask = shift = NULL_RTX;
23924 /* On power8, we want to use SImode for the operation. On previous systems,
23925 use the operation in a subword and shift/mask to get the proper byte or
23926 halfword. */
23927 if (mode == QImode || mode == HImode)
23928 {
23929 if (TARGET_SYNC_HI_QI)
23930 {
23931 val = convert_modes (SImode, mode, val, 1);
23932
23933 /* Prepare to adjust the return value. */
23934 before = gen_reg_rtx (SImode);
23935 if (after)
23936 after = gen_reg_rtx (SImode);
23937 mode = SImode;
23938 }
23939 else
23940 {
23941 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23942
23943 /* Shift and mask VAL into position with the word. */
23944 val = convert_modes (SImode, mode, val, 1);
23945 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23946 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23947
23948 switch (code)
23949 {
23950 case IOR:
23951 case XOR:
23952 /* We've already zero-extended VAL. That is sufficient to
23953 make certain that it does not affect other bits. */
23954 mask = NULL;
23955 break;
23956
23957 case AND:
23958 /* If we make certain that all of the other bits in VAL are
23959 set, that will be sufficient to not affect other bits. */
23960 x = gen_rtx_NOT (SImode, mask);
23961 x = gen_rtx_IOR (SImode, x, val);
23962 emit_insn (gen_rtx_SET (val, x));
23963 mask = NULL;
23964 break;
23965
23966 case NOT:
23967 case PLUS:
23968 case MINUS:
23969 /* These will all affect bits outside the field and need
23970 adjustment via MASK within the loop. */
23971 break;
23972
23973 default:
23974 gcc_unreachable ();
23975 }
23976
23977 /* Prepare to adjust the return value. */
23978 before = gen_reg_rtx (SImode);
23979 if (after)
23980 after = gen_reg_rtx (SImode);
23981 store_mode = mode = SImode;
23982 }
23983 }
23984
23985 mem = rs6000_pre_atomic_barrier (mem, model);
23986
23987 label = gen_label_rtx ();
23988 emit_label (label);
23989 label = gen_rtx_LABEL_REF (VOIDmode, label);
23990
23991 if (before == NULL_RTX)
23992 before = gen_reg_rtx (mode);
23993
23994 emit_load_locked (mode, before, mem);
23995
23996 if (code == NOT)
23997 {
23998 x = expand_simple_binop (mode, AND, before, val,
23999 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24000 after = expand_simple_unop (mode, NOT, x, after, 1);
24001 }
24002 else
24003 {
24004 after = expand_simple_binop (mode, code, before, val,
24005 after, 1, OPTAB_LIB_WIDEN);
24006 }
24007
24008 x = after;
24009 if (mask)
24010 {
24011 x = expand_simple_binop (SImode, AND, after, mask,
24012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24013 x = rs6000_mask_atomic_subword (before, x, mask);
24014 }
24015 else if (store_mode != mode)
24016 x = convert_modes (store_mode, mode, x, 1);
24017
24018 cond = gen_reg_rtx (CCmode);
24019 emit_store_conditional (store_mode, cond, mem, x);
24020
24021 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
24022 emit_unlikely_jump (x, label);
24023
24024 rs6000_post_atomic_barrier (model);
24025
24026 if (shift)
24027 {
24028 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
24029 then do the calcuations in a SImode register. */
24030 if (orig_before)
24031 rs6000_finish_atomic_subword (orig_before, before, shift);
24032 if (orig_after)
24033 rs6000_finish_atomic_subword (orig_after, after, shift);
24034 }
24035 else if (store_mode != mode)
24036 {
24037 /* QImode/HImode on machines with lbarx/lharx where we do the native
24038 operation and then do the calcuations in a SImode register. */
24039 if (orig_before)
24040 convert_move (orig_before, before, 1);
24041 if (orig_after)
24042 convert_move (orig_after, after, 1);
24043 }
24044 else if (orig_after && after != orig_after)
24045 emit_move_insn (orig_after, after);
24046 }
24047
24048 /* Emit instructions to move SRC to DST. Called by splitters for
24049 multi-register moves. It will emit at most one instruction for
24050 each register that is accessed; that is, it won't emit li/lis pairs
24051 (or equivalent for 64-bit code). One of SRC or DST must be a hard
24052 register. */
24053
24054 void
24055 rs6000_split_multireg_move (rtx dst, rtx src)
24056 {
24057 /* The register number of the first register being moved. */
24058 int reg;
24059 /* The mode that is to be moved. */
24060 machine_mode mode;
24061 /* The mode that the move is being done in, and its size. */
24062 machine_mode reg_mode;
24063 int reg_mode_size;
24064 /* The number of registers that will be moved. */
24065 int nregs;
24066
24067 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
24068 mode = GET_MODE (dst);
24069 nregs = hard_regno_nregs[reg][mode];
24070 if (FP_REGNO_P (reg))
24071 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
24072 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
24073 else if (ALTIVEC_REGNO_P (reg))
24074 reg_mode = V16QImode;
24075 else
24076 reg_mode = word_mode;
24077 reg_mode_size = GET_MODE_SIZE (reg_mode);
24078
24079 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
24080
24081 /* TDmode residing in FP registers is special, since the ISA requires that
24082 the lower-numbered word of a register pair is always the most significant
24083 word, even in little-endian mode. This does not match the usual subreg
24084 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
24085 the appropriate constituent registers "by hand" in little-endian mode.
24086
24087 Note we do not need to check for destructive overlap here since TDmode
24088 can only reside in even/odd register pairs. */
24089 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
24090 {
24091 rtx p_src, p_dst;
24092 int i;
24093
24094 for (i = 0; i < nregs; i++)
24095 {
24096 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
24097 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
24098 else
24099 p_src = simplify_gen_subreg (reg_mode, src, mode,
24100 i * reg_mode_size);
24101
24102 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
24103 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
24104 else
24105 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
24106 i * reg_mode_size);
24107
24108 emit_insn (gen_rtx_SET (p_dst, p_src));
24109 }
24110
24111 return;
24112 }
24113
24114 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
24115 {
24116 /* Move register range backwards, if we might have destructive
24117 overlap. */
24118 int i;
24119 for (i = nregs - 1; i >= 0; i--)
24120 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24121 i * reg_mode_size),
24122 simplify_gen_subreg (reg_mode, src, mode,
24123 i * reg_mode_size)));
24124 }
24125 else
24126 {
24127 int i;
24128 int j = -1;
24129 bool used_update = false;
24130 rtx restore_basereg = NULL_RTX;
24131
24132 if (MEM_P (src) && INT_REGNO_P (reg))
24133 {
24134 rtx breg;
24135
24136 if (GET_CODE (XEXP (src, 0)) == PRE_INC
24137 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
24138 {
24139 rtx delta_rtx;
24140 breg = XEXP (XEXP (src, 0), 0);
24141 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
24142 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
24143 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
24144 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24145 src = replace_equiv_address (src, breg);
24146 }
24147 else if (! rs6000_offsettable_memref_p (src, reg_mode))
24148 {
24149 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
24150 {
24151 rtx basereg = XEXP (XEXP (src, 0), 0);
24152 if (TARGET_UPDATE)
24153 {
24154 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
24155 emit_insn (gen_rtx_SET (ndst,
24156 gen_rtx_MEM (reg_mode,
24157 XEXP (src, 0))));
24158 used_update = true;
24159 }
24160 else
24161 emit_insn (gen_rtx_SET (basereg,
24162 XEXP (XEXP (src, 0), 1)));
24163 src = replace_equiv_address (src, basereg);
24164 }
24165 else
24166 {
24167 rtx basereg = gen_rtx_REG (Pmode, reg);
24168 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24169 src = replace_equiv_address (src, basereg);
24170 }
24171 }
24172
24173 breg = XEXP (src, 0);
24174 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24175 breg = XEXP (breg, 0);
24176
24177 /* If the base register we are using to address memory is
24178 also a destination reg, then change that register last. */
24179 if (REG_P (breg)
24180 && REGNO (breg) >= REGNO (dst)
24181 && REGNO (breg) < REGNO (dst) + nregs)
24182 j = REGNO (breg) - REGNO (dst);
24183 }
24184 else if (MEM_P (dst) && INT_REGNO_P (reg))
24185 {
24186 rtx breg;
24187
24188 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24189 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24190 {
24191 rtx delta_rtx;
24192 breg = XEXP (XEXP (dst, 0), 0);
24193 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24194 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24195 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24196
24197 /* We have to update the breg before doing the store.
24198 Use store with update, if available. */
24199
24200 if (TARGET_UPDATE)
24201 {
24202 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24203 emit_insn (TARGET_32BIT
24204 ? (TARGET_POWERPC64
24205 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24206 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24207 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24208 used_update = true;
24209 }
24210 else
24211 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24212 dst = replace_equiv_address (dst, breg);
24213 }
24214 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
24215 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24216 {
24217 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24218 {
24219 rtx basereg = XEXP (XEXP (dst, 0), 0);
24220 if (TARGET_UPDATE)
24221 {
24222 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24223 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24224 XEXP (dst, 0)),
24225 nsrc));
24226 used_update = true;
24227 }
24228 else
24229 emit_insn (gen_rtx_SET (basereg,
24230 XEXP (XEXP (dst, 0), 1)));
24231 dst = replace_equiv_address (dst, basereg);
24232 }
24233 else
24234 {
24235 rtx basereg = XEXP (XEXP (dst, 0), 0);
24236 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24237 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24238 && REG_P (basereg)
24239 && REG_P (offsetreg)
24240 && REGNO (basereg) != REGNO (offsetreg));
24241 if (REGNO (basereg) == 0)
24242 {
24243 rtx tmp = offsetreg;
24244 offsetreg = basereg;
24245 basereg = tmp;
24246 }
24247 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24248 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24249 dst = replace_equiv_address (dst, basereg);
24250 }
24251 }
24252 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24253 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
24254 }
24255
24256 for (i = 0; i < nregs; i++)
24257 {
24258 /* Calculate index to next subword. */
24259 ++j;
24260 if (j == nregs)
24261 j = 0;
24262
24263 /* If compiler already emitted move of first word by
24264 store with update, no need to do anything. */
24265 if (j == 0 && used_update)
24266 continue;
24267
24268 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24269 j * reg_mode_size),
24270 simplify_gen_subreg (reg_mode, src, mode,
24271 j * reg_mode_size)));
24272 }
24273 if (restore_basereg != NULL_RTX)
24274 emit_insn (restore_basereg);
24275 }
24276 }
24277
24278 \f
24279 /* This page contains routines that are used to determine what the
24280 function prologue and epilogue code will do and write them out. */
24281
24282 static inline bool
24283 save_reg_p (int r)
24284 {
24285 return !call_used_regs[r] && df_regs_ever_live_p (r);
24286 }
24287
24288 /* Determine whether the gp REG is really used. */
24289
24290 static bool
24291 rs6000_reg_live_or_pic_offset_p (int reg)
24292 {
24293 /* We need to mark the PIC offset register live for the same conditions
24294 as it is set up, or otherwise it won't be saved before we clobber it. */
24295
24296 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24297 {
24298 if (TARGET_TOC && TARGET_MINIMAL_TOC
24299 && (crtl->calls_eh_return
24300 || df_regs_ever_live_p (reg)
24301 || !constant_pool_empty_p ()))
24302 return true;
24303
24304 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
24305 && flag_pic)
24306 return true;
24307 }
24308
24309 /* If the function calls eh_return, claim used all the registers that would
24310 be checked for liveness otherwise. */
24311
24312 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
24313 && !call_used_regs[reg]);
24314 }
24315
24316 /* Return the first fixed-point register that is required to be
24317 saved. 32 if none. */
24318
24319 int
24320 first_reg_to_save (void)
24321 {
24322 int first_reg;
24323
24324 /* Find lowest numbered live register. */
24325 for (first_reg = 13; first_reg <= 31; first_reg++)
24326 if (save_reg_p (first_reg))
24327 break;
24328
24329 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
24330 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
24331 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24332 || (TARGET_TOC && TARGET_MINIMAL_TOC))
24333 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
24334 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
24335
24336 #if TARGET_MACHO
24337 if (flag_pic
24338 && crtl->uses_pic_offset_table
24339 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24340 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24341 #endif
24342
24343 return first_reg;
24344 }
24345
24346 /* Similar, for FP regs. */
24347
24348 int
24349 first_fp_reg_to_save (void)
24350 {
24351 int first_reg;
24352
24353 /* Find lowest numbered live register. */
24354 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24355 if (save_reg_p (first_reg))
24356 break;
24357
24358 return first_reg;
24359 }
24360
24361 /* Similar, for AltiVec regs. */
24362
24363 static int
24364 first_altivec_reg_to_save (void)
24365 {
24366 int i;
24367
24368 /* Stack frame remains as is unless we are in AltiVec ABI. */
24369 if (! TARGET_ALTIVEC_ABI)
24370 return LAST_ALTIVEC_REGNO + 1;
24371
24372 /* On Darwin, the unwind routines are compiled without
24373 TARGET_ALTIVEC, and use save_world to save/restore the
24374 altivec registers when necessary. */
24375 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24376 && ! TARGET_ALTIVEC)
24377 return FIRST_ALTIVEC_REGNO + 20;
24378
24379 /* Find lowest numbered live register. */
24380 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24381 if (save_reg_p (i))
24382 break;
24383
24384 return i;
24385 }
24386
24387 /* Return a 32-bit mask of the AltiVec registers we need to set in
24388 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24389 the 32-bit word is 0. */
24390
24391 static unsigned int
24392 compute_vrsave_mask (void)
24393 {
24394 unsigned int i, mask = 0;
24395
24396 /* On Darwin, the unwind routines are compiled without
24397 TARGET_ALTIVEC, and use save_world to save/restore the
24398 call-saved altivec registers when necessary. */
24399 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24400 && ! TARGET_ALTIVEC)
24401 mask |= 0xFFF;
24402
24403 /* First, find out if we use _any_ altivec registers. */
24404 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24405 if (df_regs_ever_live_p (i))
24406 mask |= ALTIVEC_REG_BIT (i);
24407
24408 if (mask == 0)
24409 return mask;
24410
24411 /* Next, remove the argument registers from the set. These must
24412 be in the VRSAVE mask set by the caller, so we don't need to add
24413 them in again. More importantly, the mask we compute here is
24414 used to generate CLOBBERs in the set_vrsave insn, and we do not
24415 wish the argument registers to die. */
24416 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24417 mask &= ~ALTIVEC_REG_BIT (i);
24418
24419 /* Similarly, remove the return value from the set. */
24420 {
24421 bool yes = false;
24422 diddle_return_value (is_altivec_return_reg, &yes);
24423 if (yes)
24424 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24425 }
24426
24427 return mask;
24428 }
24429
24430 /* For a very restricted set of circumstances, we can cut down the
24431 size of prologues/epilogues by calling our own save/restore-the-world
24432 routines. */
24433
24434 static void
24435 compute_save_world_info (rs6000_stack_t *info)
24436 {
24437 info->world_save_p = 1;
24438 info->world_save_p
24439 = (WORLD_SAVE_P (info)
24440 && DEFAULT_ABI == ABI_DARWIN
24441 && !cfun->has_nonlocal_label
24442 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24443 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24444 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24445 && info->cr_save_p);
24446
24447 /* This will not work in conjunction with sibcalls. Make sure there
24448 are none. (This check is expensive, but seldom executed.) */
24449 if (WORLD_SAVE_P (info))
24450 {
24451 rtx_insn *insn;
24452 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24453 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24454 {
24455 info->world_save_p = 0;
24456 break;
24457 }
24458 }
24459
24460 if (WORLD_SAVE_P (info))
24461 {
24462 /* Even if we're not touching VRsave, make sure there's room on the
24463 stack for it, if it looks like we're calling SAVE_WORLD, which
24464 will attempt to save it. */
24465 info->vrsave_size = 4;
24466
24467 /* If we are going to save the world, we need to save the link register too. */
24468 info->lr_save_p = 1;
24469
24470 /* "Save" the VRsave register too if we're saving the world. */
24471 if (info->vrsave_mask == 0)
24472 info->vrsave_mask = compute_vrsave_mask ();
24473
24474 /* Because the Darwin register save/restore routines only handle
24475 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24476 check. */
24477 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24478 && (info->first_altivec_reg_save
24479 >= FIRST_SAVED_ALTIVEC_REGNO));
24480 }
24481
24482 return;
24483 }
24484
24485
24486 static void
24487 is_altivec_return_reg (rtx reg, void *xyes)
24488 {
24489 bool *yes = (bool *) xyes;
24490 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24491 *yes = true;
24492 }
24493
24494 \f
24495 /* Return whether REG is a global user reg or has been specifed by
24496 -ffixed-REG. We should not restore these, and so cannot use
24497 lmw or out-of-line restore functions if there are any. We also
24498 can't save them (well, emit frame notes for them), because frame
24499 unwinding during exception handling will restore saved registers. */
24500
24501 static bool
24502 fixed_reg_p (int reg)
24503 {
24504 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24505 backend sets it, overriding anything the user might have given. */
24506 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24507 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24508 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24509 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24510 return false;
24511
24512 return fixed_regs[reg];
24513 }
24514
24515 /* Determine the strategy for savings/restoring registers. */
24516
24517 enum {
24518 SAVE_MULTIPLE = 0x1,
24519 SAVE_INLINE_GPRS = 0x2,
24520 SAVE_INLINE_FPRS = 0x4,
24521 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24522 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24523 SAVE_INLINE_VRS = 0x20,
24524 REST_MULTIPLE = 0x100,
24525 REST_INLINE_GPRS = 0x200,
24526 REST_INLINE_FPRS = 0x400,
24527 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24528 REST_INLINE_VRS = 0x1000
24529 };
24530
24531 static int
24532 rs6000_savres_strategy (rs6000_stack_t *info,
24533 bool using_static_chain_p)
24534 {
24535 int strategy = 0;
24536
24537 /* Select between in-line and out-of-line save and restore of regs.
24538 First, all the obvious cases where we don't use out-of-line. */
24539 if (crtl->calls_eh_return
24540 || cfun->machine->ra_need_lr)
24541 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24542 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24543 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24544
24545 if (info->first_gp_reg_save == 32)
24546 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24547
24548 if (info->first_fp_reg_save == 64
24549 /* The out-of-line FP routines use double-precision stores;
24550 we can't use those routines if we don't have such stores. */
24551 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24552 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24553
24554 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24555 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24556
24557 /* Define cutoff for using out-of-line functions to save registers. */
24558 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24559 {
24560 if (!optimize_size)
24561 {
24562 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24563 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24564 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24565 }
24566 else
24567 {
24568 /* Prefer out-of-line restore if it will exit. */
24569 if (info->first_fp_reg_save > 61)
24570 strategy |= SAVE_INLINE_FPRS;
24571 if (info->first_gp_reg_save > 29)
24572 {
24573 if (info->first_fp_reg_save == 64)
24574 strategy |= SAVE_INLINE_GPRS;
24575 else
24576 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24577 }
24578 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24579 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24580 }
24581 }
24582 else if (DEFAULT_ABI == ABI_DARWIN)
24583 {
24584 if (info->first_fp_reg_save > 60)
24585 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24586 if (info->first_gp_reg_save > 29)
24587 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24588 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24589 }
24590 else
24591 {
24592 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24593 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24594 || info->first_fp_reg_save > 61)
24595 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24596 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24597 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24598 }
24599
24600 /* Don't bother to try to save things out-of-line if r11 is occupied
24601 by the static chain. It would require too much fiddling and the
24602 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24603 pointer on Darwin, and AIX uses r1 or r12. */
24604 if (using_static_chain_p
24605 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24606 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24607 | SAVE_INLINE_GPRS
24608 | SAVE_INLINE_VRS);
24609
24610 /* We can only use the out-of-line routines to restore fprs if we've
24611 saved all the registers from first_fp_reg_save in the prologue.
24612 Otherwise, we risk loading garbage. Of course, if we have saved
24613 out-of-line then we know we haven't skipped any fprs. */
24614 if ((strategy & SAVE_INLINE_FPRS)
24615 && !(strategy & REST_INLINE_FPRS))
24616 {
24617 int i;
24618
24619 for (i = info->first_fp_reg_save; i < 64; i++)
24620 if (fixed_regs[i] || !save_reg_p (i))
24621 {
24622 strategy |= REST_INLINE_FPRS;
24623 break;
24624 }
24625 }
24626
24627 /* Similarly, for altivec regs. */
24628 if ((strategy & SAVE_INLINE_VRS)
24629 && !(strategy & REST_INLINE_VRS))
24630 {
24631 int i;
24632
24633 for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24634 if (fixed_regs[i] || !save_reg_p (i))
24635 {
24636 strategy |= REST_INLINE_VRS;
24637 break;
24638 }
24639 }
24640
24641 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24642 saved is an out-of-line save or restore. Set up the value for
24643 the next test (excluding out-of-line gprs). */
24644 bool lr_save_p = (info->lr_save_p
24645 || !(strategy & SAVE_INLINE_FPRS)
24646 || !(strategy & SAVE_INLINE_VRS)
24647 || !(strategy & REST_INLINE_FPRS)
24648 || !(strategy & REST_INLINE_VRS));
24649
24650 if (TARGET_MULTIPLE
24651 && !TARGET_POWERPC64
24652 && info->first_gp_reg_save < 31
24653 && !(flag_shrink_wrap
24654 && flag_shrink_wrap_separate
24655 && optimize_function_for_speed_p (cfun)))
24656 {
24657 /* Prefer store multiple for saves over out-of-line routines,
24658 since the store-multiple instruction will always be smaller. */
24659 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24660
24661 /* The situation is more complicated with load multiple. We'd
24662 prefer to use the out-of-line routines for restores, since the
24663 "exit" out-of-line routines can handle the restore of LR and the
24664 frame teardown. However if doesn't make sense to use the
24665 out-of-line routine if that is the only reason we'd need to save
24666 LR, and we can't use the "exit" out-of-line gpr restore if we
24667 have saved some fprs; In those cases it is advantageous to use
24668 load multiple when available. */
24669 if (info->first_fp_reg_save != 64 || !lr_save_p)
24670 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24671 }
24672
24673 /* Using the "exit" out-of-line routine does not improve code size
24674 if using it would require lr to be saved and if only saving one
24675 or two gprs. */
24676 else if (!lr_save_p && info->first_gp_reg_save > 29)
24677 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24678
24679 /* We can only use load multiple or the out-of-line routines to
24680 restore gprs if we've saved all the registers from
24681 first_gp_reg_save. Otherwise, we risk loading garbage.
24682 Of course, if we have saved out-of-line or used stmw then we know
24683 we haven't skipped any gprs. */
24684 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24685 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24686 {
24687 int i;
24688
24689 for (i = info->first_gp_reg_save; i < 32; i++)
24690 if (fixed_reg_p (i) || !save_reg_p (i))
24691 {
24692 strategy |= REST_INLINE_GPRS;
24693 strategy &= ~REST_MULTIPLE;
24694 break;
24695 }
24696 }
24697
24698 if (TARGET_ELF && TARGET_64BIT)
24699 {
24700 if (!(strategy & SAVE_INLINE_FPRS))
24701 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24702 else if (!(strategy & SAVE_INLINE_GPRS)
24703 && info->first_fp_reg_save == 64)
24704 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24705 }
24706 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24707 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24708
24709 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24710 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24711
24712 return strategy;
24713 }
24714
24715 /* Calculate the stack information for the current function. This is
24716 complicated by having two separate calling sequences, the AIX calling
24717 sequence and the V.4 calling sequence.
24718
24719 AIX (and Darwin/Mac OS X) stack frames look like:
24720 32-bit 64-bit
24721 SP----> +---------------------------------------+
24722 | back chain to caller | 0 0
24723 +---------------------------------------+
24724 | saved CR | 4 8 (8-11)
24725 +---------------------------------------+
24726 | saved LR | 8 16
24727 +---------------------------------------+
24728 | reserved for compilers | 12 24
24729 +---------------------------------------+
24730 | reserved for binders | 16 32
24731 +---------------------------------------+
24732 | saved TOC pointer | 20 40
24733 +---------------------------------------+
24734 | Parameter save area (+padding*) (P) | 24 48
24735 +---------------------------------------+
24736 | Alloca space (A) | 24+P etc.
24737 +---------------------------------------+
24738 | Local variable space (L) | 24+P+A
24739 +---------------------------------------+
24740 | Float/int conversion temporary (X) | 24+P+A+L
24741 +---------------------------------------+
24742 | Save area for AltiVec registers (W) | 24+P+A+L+X
24743 +---------------------------------------+
24744 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24745 +---------------------------------------+
24746 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24747 +---------------------------------------+
24748 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24749 +---------------------------------------+
24750 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24751 +---------------------------------------+
24752 old SP->| back chain to caller's caller |
24753 +---------------------------------------+
24754
24755 * If the alloca area is present, the parameter save area is
24756 padded so that the former starts 16-byte aligned.
24757
24758 The required alignment for AIX configurations is two words (i.e., 8
24759 or 16 bytes).
24760
24761 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24762
24763 SP----> +---------------------------------------+
24764 | Back chain to caller | 0
24765 +---------------------------------------+
24766 | Save area for CR | 8
24767 +---------------------------------------+
24768 | Saved LR | 16
24769 +---------------------------------------+
24770 | Saved TOC pointer | 24
24771 +---------------------------------------+
24772 | Parameter save area (+padding*) (P) | 32
24773 +---------------------------------------+
24774 | Alloca space (A) | 32+P
24775 +---------------------------------------+
24776 | Local variable space (L) | 32+P+A
24777 +---------------------------------------+
24778 | Save area for AltiVec registers (W) | 32+P+A+L
24779 +---------------------------------------+
24780 | AltiVec alignment padding (Y) | 32+P+A+L+W
24781 +---------------------------------------+
24782 | Save area for GP registers (G) | 32+P+A+L+W+Y
24783 +---------------------------------------+
24784 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24785 +---------------------------------------+
24786 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24787 +---------------------------------------+
24788
24789 * If the alloca area is present, the parameter save area is
24790 padded so that the former starts 16-byte aligned.
24791
24792 V.4 stack frames look like:
24793
24794 SP----> +---------------------------------------+
24795 | back chain to caller | 0
24796 +---------------------------------------+
24797 | caller's saved LR | 4
24798 +---------------------------------------+
24799 | Parameter save area (+padding*) (P) | 8
24800 +---------------------------------------+
24801 | Alloca space (A) | 8+P
24802 +---------------------------------------+
24803 | Varargs save area (V) | 8+P+A
24804 +---------------------------------------+
24805 | Local variable space (L) | 8+P+A+V
24806 +---------------------------------------+
24807 | Float/int conversion temporary (X) | 8+P+A+V+L
24808 +---------------------------------------+
24809 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24810 +---------------------------------------+
24811 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24812 +---------------------------------------+
24813 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24814 +---------------------------------------+
24815 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24816 +---------------------------------------+
24817 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24818 +---------------------------------------+
24819 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24820 +---------------------------------------+
24821 old SP->| back chain to caller's caller |
24822 +---------------------------------------+
24823
24824 * If the alloca area is present and the required alignment is
24825 16 bytes, the parameter save area is padded so that the
24826 alloca area starts 16-byte aligned.
24827
24828 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24829 given. (But note below and in sysv4.h that we require only 8 and
24830 may round up the size of our stack frame anyways. The historical
24831 reason is early versions of powerpc-linux which didn't properly
24832 align the stack at program startup. A happy side-effect is that
24833 -mno-eabi libraries can be used with -meabi programs.)
24834
24835 The EABI configuration defaults to the V.4 layout. However,
24836 the stack alignment requirements may differ. If -mno-eabi is not
24837 given, the required stack alignment is 8 bytes; if -mno-eabi is
24838 given, the required alignment is 16 bytes. (But see V.4 comment
24839 above.) */
24840
24841 #ifndef ABI_STACK_BOUNDARY
24842 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24843 #endif
24844
24845 static rs6000_stack_t *
24846 rs6000_stack_info (void)
24847 {
24848 /* We should never be called for thunks, we are not set up for that. */
24849 gcc_assert (!cfun->is_thunk);
24850
24851 rs6000_stack_t *info = &stack_info;
24852 int reg_size = TARGET_32BIT ? 4 : 8;
24853 int ehrd_size;
24854 int ehcr_size;
24855 int save_align;
24856 int first_gp;
24857 HOST_WIDE_INT non_fixed_size;
24858 bool using_static_chain_p;
24859
24860 if (reload_completed && info->reload_completed)
24861 return info;
24862
24863 memset (info, 0, sizeof (*info));
24864 info->reload_completed = reload_completed;
24865
24866 /* Select which calling sequence. */
24867 info->abi = DEFAULT_ABI;
24868
24869 /* Calculate which registers need to be saved & save area size. */
24870 info->first_gp_reg_save = first_reg_to_save ();
24871 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24872 even if it currently looks like we won't. Reload may need it to
24873 get at a constant; if so, it will have already created a constant
24874 pool entry for it. */
24875 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24876 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24877 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24878 && crtl->uses_const_pool
24879 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24880 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24881 else
24882 first_gp = info->first_gp_reg_save;
24883
24884 info->gp_size = reg_size * (32 - first_gp);
24885
24886 info->first_fp_reg_save = first_fp_reg_to_save ();
24887 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24888
24889 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24890 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24891 - info->first_altivec_reg_save);
24892
24893 /* Does this function call anything? */
24894 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24895
24896 /* Determine if we need to save the condition code registers. */
24897 if (save_reg_p (CR2_REGNO)
24898 || save_reg_p (CR3_REGNO)
24899 || save_reg_p (CR4_REGNO))
24900 {
24901 info->cr_save_p = 1;
24902 if (DEFAULT_ABI == ABI_V4)
24903 info->cr_size = reg_size;
24904 }
24905
24906 /* If the current function calls __builtin_eh_return, then we need
24907 to allocate stack space for registers that will hold data for
24908 the exception handler. */
24909 if (crtl->calls_eh_return)
24910 {
24911 unsigned int i;
24912 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24913 continue;
24914
24915 ehrd_size = i * UNITS_PER_WORD;
24916 }
24917 else
24918 ehrd_size = 0;
24919
24920 /* In the ELFv2 ABI, we also need to allocate space for separate
24921 CR field save areas if the function calls __builtin_eh_return. */
24922 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24923 {
24924 /* This hard-codes that we have three call-saved CR fields. */
24925 ehcr_size = 3 * reg_size;
24926 /* We do *not* use the regular CR save mechanism. */
24927 info->cr_save_p = 0;
24928 }
24929 else
24930 ehcr_size = 0;
24931
24932 /* Determine various sizes. */
24933 info->reg_size = reg_size;
24934 info->fixed_size = RS6000_SAVE_AREA;
24935 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24936 if (cfun->calls_alloca)
24937 info->parm_size =
24938 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24939 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24940 else
24941 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24942 TARGET_ALTIVEC ? 16 : 8);
24943 if (FRAME_GROWS_DOWNWARD)
24944 info->vars_size
24945 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24946 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24947 - (info->fixed_size + info->vars_size + info->parm_size);
24948
24949 if (TARGET_ALTIVEC_ABI)
24950 info->vrsave_mask = compute_vrsave_mask ();
24951
24952 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24953 info->vrsave_size = 4;
24954
24955 compute_save_world_info (info);
24956
24957 /* Calculate the offsets. */
24958 switch (DEFAULT_ABI)
24959 {
24960 case ABI_NONE:
24961 default:
24962 gcc_unreachable ();
24963
24964 case ABI_AIX:
24965 case ABI_ELFv2:
24966 case ABI_DARWIN:
24967 info->fp_save_offset = -info->fp_size;
24968 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24969
24970 if (TARGET_ALTIVEC_ABI)
24971 {
24972 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24973
24974 /* Align stack so vector save area is on a quadword boundary.
24975 The padding goes above the vectors. */
24976 if (info->altivec_size != 0)
24977 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24978
24979 info->altivec_save_offset = info->vrsave_save_offset
24980 - info->altivec_padding_size
24981 - info->altivec_size;
24982 gcc_assert (info->altivec_size == 0
24983 || info->altivec_save_offset % 16 == 0);
24984
24985 /* Adjust for AltiVec case. */
24986 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24987 }
24988 else
24989 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24990
24991 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24992 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24993 info->lr_save_offset = 2*reg_size;
24994 break;
24995
24996 case ABI_V4:
24997 info->fp_save_offset = -info->fp_size;
24998 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24999 info->cr_save_offset = info->gp_save_offset - info->cr_size;
25000
25001 if (TARGET_ALTIVEC_ABI)
25002 {
25003 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
25004
25005 /* Align stack so vector save area is on a quadword boundary. */
25006 if (info->altivec_size != 0)
25007 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
25008
25009 info->altivec_save_offset = info->vrsave_save_offset
25010 - info->altivec_padding_size
25011 - info->altivec_size;
25012
25013 /* Adjust for AltiVec case. */
25014 info->ehrd_offset = info->altivec_save_offset;
25015 }
25016 else
25017 info->ehrd_offset = info->cr_save_offset;
25018
25019 info->ehrd_offset -= ehrd_size;
25020 info->lr_save_offset = reg_size;
25021 }
25022
25023 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
25024 info->save_size = RS6000_ALIGN (info->fp_size
25025 + info->gp_size
25026 + info->altivec_size
25027 + info->altivec_padding_size
25028 + ehrd_size
25029 + ehcr_size
25030 + info->cr_size
25031 + info->vrsave_size,
25032 save_align);
25033
25034 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
25035
25036 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
25037 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
25038
25039 /* Determine if we need to save the link register. */
25040 if (info->calls_p
25041 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25042 && crtl->profile
25043 && !TARGET_PROFILE_KERNEL)
25044 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
25045 #ifdef TARGET_RELOCATABLE
25046 || (DEFAULT_ABI == ABI_V4
25047 && (TARGET_RELOCATABLE || flag_pic > 1)
25048 && !constant_pool_empty_p ())
25049 #endif
25050 || rs6000_ra_ever_killed ())
25051 info->lr_save_p = 1;
25052
25053 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
25054 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
25055 && call_used_regs[STATIC_CHAIN_REGNUM]);
25056 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
25057
25058 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
25059 || !(info->savres_strategy & SAVE_INLINE_FPRS)
25060 || !(info->savres_strategy & SAVE_INLINE_VRS)
25061 || !(info->savres_strategy & REST_INLINE_GPRS)
25062 || !(info->savres_strategy & REST_INLINE_FPRS)
25063 || !(info->savres_strategy & REST_INLINE_VRS))
25064 info->lr_save_p = 1;
25065
25066 if (info->lr_save_p)
25067 df_set_regs_ever_live (LR_REGNO, true);
25068
25069 /* Determine if we need to allocate any stack frame:
25070
25071 For AIX we need to push the stack if a frame pointer is needed
25072 (because the stack might be dynamically adjusted), if we are
25073 debugging, if we make calls, or if the sum of fp_save, gp_save,
25074 and local variables are more than the space needed to save all
25075 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
25076 + 18*8 = 288 (GPR13 reserved).
25077
25078 For V.4 we don't have the stack cushion that AIX uses, but assume
25079 that the debugger can handle stackless frames. */
25080
25081 if (info->calls_p)
25082 info->push_p = 1;
25083
25084 else if (DEFAULT_ABI == ABI_V4)
25085 info->push_p = non_fixed_size != 0;
25086
25087 else if (frame_pointer_needed)
25088 info->push_p = 1;
25089
25090 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
25091 info->push_p = 1;
25092
25093 else
25094 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
25095
25096 return info;
25097 }
25098
25099 static void
25100 debug_stack_info (rs6000_stack_t *info)
25101 {
25102 const char *abi_string;
25103
25104 if (! info)
25105 info = rs6000_stack_info ();
25106
25107 fprintf (stderr, "\nStack information for function %s:\n",
25108 ((current_function_decl && DECL_NAME (current_function_decl))
25109 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
25110 : "<unknown>"));
25111
25112 switch (info->abi)
25113 {
25114 default: abi_string = "Unknown"; break;
25115 case ABI_NONE: abi_string = "NONE"; break;
25116 case ABI_AIX: abi_string = "AIX"; break;
25117 case ABI_ELFv2: abi_string = "ELFv2"; break;
25118 case ABI_DARWIN: abi_string = "Darwin"; break;
25119 case ABI_V4: abi_string = "V.4"; break;
25120 }
25121
25122 fprintf (stderr, "\tABI = %5s\n", abi_string);
25123
25124 if (TARGET_ALTIVEC_ABI)
25125 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
25126
25127 if (info->first_gp_reg_save != 32)
25128 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
25129
25130 if (info->first_fp_reg_save != 64)
25131 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
25132
25133 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
25134 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
25135 info->first_altivec_reg_save);
25136
25137 if (info->lr_save_p)
25138 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
25139
25140 if (info->cr_save_p)
25141 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
25142
25143 if (info->vrsave_mask)
25144 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25145
25146 if (info->push_p)
25147 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25148
25149 if (info->calls_p)
25150 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25151
25152 if (info->gp_size)
25153 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25154
25155 if (info->fp_size)
25156 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25157
25158 if (info->altivec_size)
25159 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25160 info->altivec_save_offset);
25161
25162 if (info->vrsave_size)
25163 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25164 info->vrsave_save_offset);
25165
25166 if (info->lr_save_p)
25167 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25168
25169 if (info->cr_save_p)
25170 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25171
25172 if (info->varargs_save_offset)
25173 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25174
25175 if (info->total_size)
25176 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25177 info->total_size);
25178
25179 if (info->vars_size)
25180 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25181 info->vars_size);
25182
25183 if (info->parm_size)
25184 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25185
25186 if (info->fixed_size)
25187 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25188
25189 if (info->gp_size)
25190 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25191
25192 if (info->fp_size)
25193 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25194
25195 if (info->altivec_size)
25196 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
25197
25198 if (info->vrsave_size)
25199 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
25200
25201 if (info->altivec_padding_size)
25202 fprintf (stderr, "\taltivec_padding_size= %5d\n",
25203 info->altivec_padding_size);
25204
25205 if (info->cr_size)
25206 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
25207
25208 if (info->save_size)
25209 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
25210
25211 if (info->reg_size != 4)
25212 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
25213
25214 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25215
25216 fprintf (stderr, "\n");
25217 }
25218
25219 rtx
25220 rs6000_return_addr (int count, rtx frame)
25221 {
25222 /* Currently we don't optimize very well between prolog and body
25223 code and for PIC code the code can be actually quite bad, so
25224 don't try to be too clever here. */
25225 if (count != 0
25226 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25227 {
25228 cfun->machine->ra_needs_full_frame = 1;
25229
25230 return
25231 gen_rtx_MEM
25232 (Pmode,
25233 memory_address
25234 (Pmode,
25235 plus_constant (Pmode,
25236 copy_to_reg
25237 (gen_rtx_MEM (Pmode,
25238 memory_address (Pmode, frame))),
25239 RETURN_ADDRESS_OFFSET)));
25240 }
25241
25242 cfun->machine->ra_need_lr = 1;
25243 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25244 }
25245
25246 /* Say whether a function is a candidate for sibcall handling or not. */
25247
25248 static bool
25249 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25250 {
25251 tree fntype;
25252
25253 if (decl)
25254 fntype = TREE_TYPE (decl);
25255 else
25256 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25257
25258 /* We can't do it if the called function has more vector parameters
25259 than the current function; there's nowhere to put the VRsave code. */
25260 if (TARGET_ALTIVEC_ABI
25261 && TARGET_ALTIVEC_VRSAVE
25262 && !(decl && decl == current_function_decl))
25263 {
25264 function_args_iterator args_iter;
25265 tree type;
25266 int nvreg = 0;
25267
25268 /* Functions with vector parameters are required to have a
25269 prototype, so the argument type info must be available
25270 here. */
25271 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25272 if (TREE_CODE (type) == VECTOR_TYPE
25273 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25274 nvreg++;
25275
25276 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25277 if (TREE_CODE (type) == VECTOR_TYPE
25278 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25279 nvreg--;
25280
25281 if (nvreg > 0)
25282 return false;
25283 }
25284
25285 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25286 functions, because the callee may have a different TOC pointer to
25287 the caller and there's no way to ensure we restore the TOC when
25288 we return. With the secure-plt SYSV ABI we can't make non-local
25289 calls when -fpic/PIC because the plt call stubs use r30. */
25290 if (DEFAULT_ABI == ABI_DARWIN
25291 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25292 && decl
25293 && !DECL_EXTERNAL (decl)
25294 && !DECL_WEAK (decl)
25295 && (*targetm.binds_local_p) (decl))
25296 || (DEFAULT_ABI == ABI_V4
25297 && (!TARGET_SECURE_PLT
25298 || !flag_pic
25299 || (decl
25300 && (*targetm.binds_local_p) (decl)))))
25301 {
25302 tree attr_list = TYPE_ATTRIBUTES (fntype);
25303
25304 if (!lookup_attribute ("longcall", attr_list)
25305 || lookup_attribute ("shortcall", attr_list))
25306 return true;
25307 }
25308
25309 return false;
25310 }
25311
25312 static int
25313 rs6000_ra_ever_killed (void)
25314 {
25315 rtx_insn *top;
25316 rtx reg;
25317 rtx_insn *insn;
25318
25319 if (cfun->is_thunk)
25320 return 0;
25321
25322 if (cfun->machine->lr_save_state)
25323 return cfun->machine->lr_save_state - 1;
25324
25325 /* regs_ever_live has LR marked as used if any sibcalls are present,
25326 but this should not force saving and restoring in the
25327 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25328 clobbers LR, so that is inappropriate. */
25329
25330 /* Also, the prologue can generate a store into LR that
25331 doesn't really count, like this:
25332
25333 move LR->R0
25334 bcl to set PIC register
25335 move LR->R31
25336 move R0->LR
25337
25338 When we're called from the epilogue, we need to avoid counting
25339 this as a store. */
25340
25341 push_topmost_sequence ();
25342 top = get_insns ();
25343 pop_topmost_sequence ();
25344 reg = gen_rtx_REG (Pmode, LR_REGNO);
25345
25346 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25347 {
25348 if (INSN_P (insn))
25349 {
25350 if (CALL_P (insn))
25351 {
25352 if (!SIBLING_CALL_P (insn))
25353 return 1;
25354 }
25355 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25356 return 1;
25357 else if (set_of (reg, insn) != NULL_RTX
25358 && !prologue_epilogue_contains (insn))
25359 return 1;
25360 }
25361 }
25362 return 0;
25363 }
25364 \f
25365 /* Emit instructions needed to load the TOC register.
25366 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25367 a constant pool; or for SVR4 -fpic. */
25368
25369 void
25370 rs6000_emit_load_toc_table (int fromprolog)
25371 {
25372 rtx dest;
25373 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25374
25375 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25376 {
25377 char buf[30];
25378 rtx lab, tmp1, tmp2, got;
25379
25380 lab = gen_label_rtx ();
25381 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25382 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25383 if (flag_pic == 2)
25384 {
25385 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25386 need_toc_init = 1;
25387 }
25388 else
25389 got = rs6000_got_sym ();
25390 tmp1 = tmp2 = dest;
25391 if (!fromprolog)
25392 {
25393 tmp1 = gen_reg_rtx (Pmode);
25394 tmp2 = gen_reg_rtx (Pmode);
25395 }
25396 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25397 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25398 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25399 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25400 }
25401 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25402 {
25403 emit_insn (gen_load_toc_v4_pic_si ());
25404 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25405 }
25406 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25407 {
25408 char buf[30];
25409 rtx temp0 = (fromprolog
25410 ? gen_rtx_REG (Pmode, 0)
25411 : gen_reg_rtx (Pmode));
25412
25413 if (fromprolog)
25414 {
25415 rtx symF, symL;
25416
25417 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25418 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25419
25420 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25421 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25422
25423 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25424 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25425 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25426 }
25427 else
25428 {
25429 rtx tocsym, lab;
25430
25431 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25432 need_toc_init = 1;
25433 lab = gen_label_rtx ();
25434 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25435 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25436 if (TARGET_LINK_STACK)
25437 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25438 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25439 }
25440 emit_insn (gen_addsi3 (dest, temp0, dest));
25441 }
25442 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25443 {
25444 /* This is for AIX code running in non-PIC ELF32. */
25445 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25446
25447 need_toc_init = 1;
25448 emit_insn (gen_elf_high (dest, realsym));
25449 emit_insn (gen_elf_low (dest, dest, realsym));
25450 }
25451 else
25452 {
25453 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25454
25455 if (TARGET_32BIT)
25456 emit_insn (gen_load_toc_aix_si (dest));
25457 else
25458 emit_insn (gen_load_toc_aix_di (dest));
25459 }
25460 }
25461
25462 /* Emit instructions to restore the link register after determining where
25463 its value has been stored. */
25464
25465 void
25466 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25467 {
25468 rs6000_stack_t *info = rs6000_stack_info ();
25469 rtx operands[2];
25470
25471 operands[0] = source;
25472 operands[1] = scratch;
25473
25474 if (info->lr_save_p)
25475 {
25476 rtx frame_rtx = stack_pointer_rtx;
25477 HOST_WIDE_INT sp_offset = 0;
25478 rtx tmp;
25479
25480 if (frame_pointer_needed
25481 || cfun->calls_alloca
25482 || info->total_size > 32767)
25483 {
25484 tmp = gen_frame_mem (Pmode, frame_rtx);
25485 emit_move_insn (operands[1], tmp);
25486 frame_rtx = operands[1];
25487 }
25488 else if (info->push_p)
25489 sp_offset = info->total_size;
25490
25491 tmp = plus_constant (Pmode, frame_rtx,
25492 info->lr_save_offset + sp_offset);
25493 tmp = gen_frame_mem (Pmode, tmp);
25494 emit_move_insn (tmp, operands[0]);
25495 }
25496 else
25497 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25498
25499 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25500 state of lr_save_p so any change from here on would be a bug. In
25501 particular, stop rs6000_ra_ever_killed from considering the SET
25502 of lr we may have added just above. */
25503 cfun->machine->lr_save_state = info->lr_save_p + 1;
25504 }
25505
25506 static GTY(()) alias_set_type set = -1;
25507
25508 alias_set_type
25509 get_TOC_alias_set (void)
25510 {
25511 if (set == -1)
25512 set = new_alias_set ();
25513 return set;
25514 }
25515
25516 /* This returns nonzero if the current function uses the TOC. This is
25517 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25518 is generated by the ABI_V4 load_toc_* patterns. */
25519 #if TARGET_ELF
25520 static int
25521 uses_TOC (void)
25522 {
25523 rtx_insn *insn;
25524
25525 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25526 if (INSN_P (insn))
25527 {
25528 rtx pat = PATTERN (insn);
25529 int i;
25530
25531 if (GET_CODE (pat) == PARALLEL)
25532 for (i = 0; i < XVECLEN (pat, 0); i++)
25533 {
25534 rtx sub = XVECEXP (pat, 0, i);
25535 if (GET_CODE (sub) == USE)
25536 {
25537 sub = XEXP (sub, 0);
25538 if (GET_CODE (sub) == UNSPEC
25539 && XINT (sub, 1) == UNSPEC_TOC)
25540 return 1;
25541 }
25542 }
25543 }
25544 return 0;
25545 }
25546 #endif
25547
25548 rtx
25549 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25550 {
25551 rtx tocrel, tocreg, hi;
25552
25553 if (TARGET_DEBUG_ADDR)
25554 {
25555 if (GET_CODE (symbol) == SYMBOL_REF)
25556 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25557 XSTR (symbol, 0));
25558 else
25559 {
25560 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25561 GET_RTX_NAME (GET_CODE (symbol)));
25562 debug_rtx (symbol);
25563 }
25564 }
25565
25566 if (!can_create_pseudo_p ())
25567 df_set_regs_ever_live (TOC_REGISTER, true);
25568
25569 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25570 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25571 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25572 return tocrel;
25573
25574 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25575 if (largetoc_reg != NULL)
25576 {
25577 emit_move_insn (largetoc_reg, hi);
25578 hi = largetoc_reg;
25579 }
25580 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25581 }
25582
25583 /* Issue assembly directives that create a reference to the given DWARF
25584 FRAME_TABLE_LABEL from the current function section. */
25585 void
25586 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25587 {
25588 fprintf (asm_out_file, "\t.ref %s\n",
25589 (* targetm.strip_name_encoding) (frame_table_label));
25590 }
25591 \f
25592 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25593 and the change to the stack pointer. */
25594
25595 static void
25596 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25597 {
25598 rtvec p;
25599 int i;
25600 rtx regs[3];
25601
25602 i = 0;
25603 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25604 if (hard_frame_needed)
25605 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25606 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25607 || (hard_frame_needed
25608 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25609 regs[i++] = fp;
25610
25611 p = rtvec_alloc (i);
25612 while (--i >= 0)
25613 {
25614 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25615 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25616 }
25617
25618 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25619 }
25620
25621 /* Emit the correct code for allocating stack space, as insns.
25622 If COPY_REG, make sure a copy of the old frame is left there.
25623 The generated code may use hard register 0 as a temporary. */
25624
25625 static rtx_insn *
25626 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25627 {
25628 rtx_insn *insn;
25629 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25630 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25631 rtx todec = gen_int_mode (-size, Pmode);
25632 rtx par, set, mem;
25633
25634 if (INTVAL (todec) != -size)
25635 {
25636 warning (0, "stack frame too large");
25637 emit_insn (gen_trap ());
25638 return 0;
25639 }
25640
25641 if (crtl->limit_stack)
25642 {
25643 if (REG_P (stack_limit_rtx)
25644 && REGNO (stack_limit_rtx) > 1
25645 && REGNO (stack_limit_rtx) <= 31)
25646 {
25647 rtx_insn *insn
25648 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25649 gcc_assert (insn);
25650 emit_insn (insn);
25651 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25652 }
25653 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25654 && TARGET_32BIT
25655 && DEFAULT_ABI == ABI_V4
25656 && !flag_pic)
25657 {
25658 rtx toload = gen_rtx_CONST (VOIDmode,
25659 gen_rtx_PLUS (Pmode,
25660 stack_limit_rtx,
25661 GEN_INT (size)));
25662
25663 emit_insn (gen_elf_high (tmp_reg, toload));
25664 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25665 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25666 const0_rtx));
25667 }
25668 else
25669 warning (0, "stack limit expression is not supported");
25670 }
25671
25672 if (copy_reg)
25673 {
25674 if (copy_off != 0)
25675 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25676 else
25677 emit_move_insn (copy_reg, stack_reg);
25678 }
25679
25680 if (size > 32767)
25681 {
25682 /* Need a note here so that try_split doesn't get confused. */
25683 if (get_last_insn () == NULL_RTX)
25684 emit_note (NOTE_INSN_DELETED);
25685 insn = emit_move_insn (tmp_reg, todec);
25686 try_split (PATTERN (insn), insn, 0);
25687 todec = tmp_reg;
25688 }
25689
25690 insn = emit_insn (TARGET_32BIT
25691 ? gen_movsi_update_stack (stack_reg, stack_reg,
25692 todec, stack_reg)
25693 : gen_movdi_di_update_stack (stack_reg, stack_reg,
25694 todec, stack_reg));
25695 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25696 it now and set the alias set/attributes. The above gen_*_update
25697 calls will generate a PARALLEL with the MEM set being the first
25698 operation. */
25699 par = PATTERN (insn);
25700 gcc_assert (GET_CODE (par) == PARALLEL);
25701 set = XVECEXP (par, 0, 0);
25702 gcc_assert (GET_CODE (set) == SET);
25703 mem = SET_DEST (set);
25704 gcc_assert (MEM_P (mem));
25705 MEM_NOTRAP_P (mem) = 1;
25706 set_mem_alias_set (mem, get_frame_alias_set ());
25707
25708 RTX_FRAME_RELATED_P (insn) = 1;
25709 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25710 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
25711 GEN_INT (-size))));
25712 return insn;
25713 }
25714
25715 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25716
25717 #if PROBE_INTERVAL > 32768
25718 #error Cannot use indexed addressing mode for stack probing
25719 #endif
25720
25721 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25722 inclusive. These are offsets from the current stack pointer. */
25723
25724 static void
25725 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25726 {
25727 /* See if we have a constant small number of probes to generate. If so,
25728 that's the easy case. */
25729 if (first + size <= 32768)
25730 {
25731 HOST_WIDE_INT i;
25732
25733 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25734 it exceeds SIZE. If only one probe is needed, this will not
25735 generate any code. Then probe at FIRST + SIZE. */
25736 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25737 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25738 -(first + i)));
25739
25740 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25741 -(first + size)));
25742 }
25743
25744 /* Otherwise, do the same as above, but in a loop. Note that we must be
25745 extra careful with variables wrapping around because we might be at
25746 the very top (or the very bottom) of the address space and we have
25747 to be able to handle this case properly; in particular, we use an
25748 equality test for the loop condition. */
25749 else
25750 {
25751 HOST_WIDE_INT rounded_size;
25752 rtx r12 = gen_rtx_REG (Pmode, 12);
25753 rtx r0 = gen_rtx_REG (Pmode, 0);
25754
25755 /* Sanity check for the addressing mode we're going to use. */
25756 gcc_assert (first <= 32768);
25757
25758 /* Step 1: round SIZE to the previous multiple of the interval. */
25759
25760 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25761
25762
25763 /* Step 2: compute initial and final value of the loop counter. */
25764
25765 /* TEST_ADDR = SP + FIRST. */
25766 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25767 -first)));
25768
25769 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25770 if (rounded_size > 32768)
25771 {
25772 emit_move_insn (r0, GEN_INT (-rounded_size));
25773 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25774 }
25775 else
25776 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25777 -rounded_size)));
25778
25779
25780 /* Step 3: the loop
25781
25782 do
25783 {
25784 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25785 probe at TEST_ADDR
25786 }
25787 while (TEST_ADDR != LAST_ADDR)
25788
25789 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25790 until it is equal to ROUNDED_SIZE. */
25791
25792 if (TARGET_64BIT)
25793 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
25794 else
25795 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
25796
25797
25798 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25799 that SIZE is equal to ROUNDED_SIZE. */
25800
25801 if (size != rounded_size)
25802 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25803 }
25804 }
25805
25806 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25807 absolute addresses. */
25808
25809 const char *
25810 output_probe_stack_range (rtx reg1, rtx reg2)
25811 {
25812 static int labelno = 0;
25813 char loop_lab[32];
25814 rtx xops[2];
25815
25816 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25817
25818 /* Loop. */
25819 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25820
25821 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25822 xops[0] = reg1;
25823 xops[1] = GEN_INT (-PROBE_INTERVAL);
25824 output_asm_insn ("addi %0,%0,%1", xops);
25825
25826 /* Probe at TEST_ADDR. */
25827 xops[1] = gen_rtx_REG (Pmode, 0);
25828 output_asm_insn ("stw %1,0(%0)", xops);
25829
25830 /* Test if TEST_ADDR == LAST_ADDR. */
25831 xops[1] = reg2;
25832 if (TARGET_64BIT)
25833 output_asm_insn ("cmpd 0,%0,%1", xops);
25834 else
25835 output_asm_insn ("cmpw 0,%0,%1", xops);
25836
25837 /* Branch. */
25838 fputs ("\tbne 0,", asm_out_file);
25839 assemble_name_raw (asm_out_file, loop_lab);
25840 fputc ('\n', asm_out_file);
25841
25842 return "";
25843 }
25844
25845 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25846 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25847 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25848 deduce these equivalences by itself so it wasn't necessary to hold
25849 its hand so much. Don't be tempted to always supply d2_f_d_e with
25850 the actual cfa register, ie. r31 when we are using a hard frame
25851 pointer. That fails when saving regs off r1, and sched moves the
25852 r31 setup past the reg saves. */
25853
25854 static rtx_insn *
25855 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25856 rtx reg2, rtx repl2)
25857 {
25858 rtx repl;
25859
25860 if (REGNO (reg) == STACK_POINTER_REGNUM)
25861 {
25862 gcc_checking_assert (val == 0);
25863 repl = NULL_RTX;
25864 }
25865 else
25866 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25867 GEN_INT (val));
25868
25869 rtx pat = PATTERN (insn);
25870 if (!repl && !reg2)
25871 {
25872 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25873 if (GET_CODE (pat) == PARALLEL)
25874 for (int i = 0; i < XVECLEN (pat, 0); i++)
25875 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25876 {
25877 rtx set = XVECEXP (pat, 0, i);
25878
25879 /* If this PARALLEL has been emitted for out-of-line
25880 register save functions, or store multiple, then omit
25881 eh_frame info for any user-defined global regs. If
25882 eh_frame info is supplied, frame unwinding will
25883 restore a user reg. */
25884 if (!REG_P (SET_SRC (set))
25885 || !fixed_reg_p (REGNO (SET_SRC (set))))
25886 RTX_FRAME_RELATED_P (set) = 1;
25887 }
25888 RTX_FRAME_RELATED_P (insn) = 1;
25889 return insn;
25890 }
25891
25892 /* We expect that 'pat' is either a SET or a PARALLEL containing
25893 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25894 are important so they all have to be marked RTX_FRAME_RELATED_P.
25895 Call simplify_replace_rtx on the SETs rather than the whole insn
25896 so as to leave the other stuff alone (for example USE of r12). */
25897
25898 set_used_flags (pat);
25899 if (GET_CODE (pat) == SET)
25900 {
25901 if (repl)
25902 pat = simplify_replace_rtx (pat, reg, repl);
25903 if (reg2)
25904 pat = simplify_replace_rtx (pat, reg2, repl2);
25905 }
25906 else if (GET_CODE (pat) == PARALLEL)
25907 {
25908 pat = shallow_copy_rtx (pat);
25909 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25910
25911 for (int i = 0; i < XVECLEN (pat, 0); i++)
25912 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25913 {
25914 rtx set = XVECEXP (pat, 0, i);
25915
25916 if (repl)
25917 set = simplify_replace_rtx (set, reg, repl);
25918 if (reg2)
25919 set = simplify_replace_rtx (set, reg2, repl2);
25920 XVECEXP (pat, 0, i) = set;
25921
25922 /* Omit eh_frame info for any user-defined global regs. */
25923 if (!REG_P (SET_SRC (set))
25924 || !fixed_reg_p (REGNO (SET_SRC (set))))
25925 RTX_FRAME_RELATED_P (set) = 1;
25926 }
25927 }
25928 else
25929 gcc_unreachable ();
25930
25931 RTX_FRAME_RELATED_P (insn) = 1;
25932 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25933
25934 return insn;
25935 }
25936
25937 /* Returns an insn that has a vrsave set operation with the
25938 appropriate CLOBBERs. */
25939
25940 static rtx
25941 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25942 {
25943 int nclobs, i;
25944 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25945 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25946
25947 clobs[0]
25948 = gen_rtx_SET (vrsave,
25949 gen_rtx_UNSPEC_VOLATILE (SImode,
25950 gen_rtvec (2, reg, vrsave),
25951 UNSPECV_SET_VRSAVE));
25952
25953 nclobs = 1;
25954
25955 /* We need to clobber the registers in the mask so the scheduler
25956 does not move sets to VRSAVE before sets of AltiVec registers.
25957
25958 However, if the function receives nonlocal gotos, reload will set
25959 all call saved registers live. We will end up with:
25960
25961 (set (reg 999) (mem))
25962 (parallel [ (set (reg vrsave) (unspec blah))
25963 (clobber (reg 999))])
25964
25965 The clobber will cause the store into reg 999 to be dead, and
25966 flow will attempt to delete an epilogue insn. In this case, we
25967 need an unspec use/set of the register. */
25968
25969 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25970 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25971 {
25972 if (!epiloguep || call_used_regs [i])
25973 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25974 gen_rtx_REG (V4SImode, i));
25975 else
25976 {
25977 rtx reg = gen_rtx_REG (V4SImode, i);
25978
25979 clobs[nclobs++]
25980 = gen_rtx_SET (reg,
25981 gen_rtx_UNSPEC (V4SImode,
25982 gen_rtvec (1, reg), 27));
25983 }
25984 }
25985
25986 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25987
25988 for (i = 0; i < nclobs; ++i)
25989 XVECEXP (insn, 0, i) = clobs[i];
25990
25991 return insn;
25992 }
25993
25994 static rtx
25995 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25996 {
25997 rtx addr, mem;
25998
25999 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26000 mem = gen_frame_mem (GET_MODE (reg), addr);
26001 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26002 }
26003
26004 static rtx
26005 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26006 {
26007 return gen_frame_set (reg, frame_reg, offset, false);
26008 }
26009
26010 static rtx
26011 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26012 {
26013 return gen_frame_set (reg, frame_reg, offset, true);
26014 }
26015
26016 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26017 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26018
26019 static rtx_insn *
26020 emit_frame_save (rtx frame_reg, machine_mode mode,
26021 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26022 {
26023 rtx reg;
26024
26025 /* Some cases that need register indexed addressing. */
26026 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26027 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
26028
26029 reg = gen_rtx_REG (mode, regno);
26030 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26031 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26032 NULL_RTX, NULL_RTX);
26033 }
26034
26035 /* Emit an offset memory reference suitable for a frame store, while
26036 converting to a valid addressing mode. */
26037
26038 static rtx
26039 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26040 {
26041 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
26042 }
26043
26044 #ifndef TARGET_FIX_AND_CONTINUE
26045 #define TARGET_FIX_AND_CONTINUE 0
26046 #endif
26047
26048 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26049 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26050 #define LAST_SAVRES_REGISTER 31
26051 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26052
26053 enum {
26054 SAVRES_LR = 0x1,
26055 SAVRES_SAVE = 0x2,
26056 SAVRES_REG = 0x0c,
26057 SAVRES_GPR = 0,
26058 SAVRES_FPR = 4,
26059 SAVRES_VR = 8
26060 };
26061
26062 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26063
26064 /* Temporary holding space for an out-of-line register save/restore
26065 routine name. */
26066 static char savres_routine_name[30];
26067
26068 /* Return the name for an out-of-line register save/restore routine.
26069 We are saving/restoring GPRs if GPR is true. */
26070
26071 static char *
26072 rs6000_savres_routine_name (int regno, int sel)
26073 {
26074 const char *prefix = "";
26075 const char *suffix = "";
26076
26077 /* Different targets are supposed to define
26078 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26079 routine name could be defined with:
26080
26081 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26082
26083 This is a nice idea in practice, but in reality, things are
26084 complicated in several ways:
26085
26086 - ELF targets have save/restore routines for GPRs.
26087
26088 - PPC64 ELF targets have routines for save/restore of GPRs that
26089 differ in what they do with the link register, so having a set
26090 prefix doesn't work. (We only use one of the save routines at
26091 the moment, though.)
26092
26093 - PPC32 elf targets have "exit" versions of the restore routines
26094 that restore the link register and can save some extra space.
26095 These require an extra suffix. (There are also "tail" versions
26096 of the restore routines and "GOT" versions of the save routines,
26097 but we don't generate those at present. Same problems apply,
26098 though.)
26099
26100 We deal with all this by synthesizing our own prefix/suffix and
26101 using that for the simple sprintf call shown above. */
26102 if (DEFAULT_ABI == ABI_V4)
26103 {
26104 if (TARGET_64BIT)
26105 goto aix_names;
26106
26107 if ((sel & SAVRES_REG) == SAVRES_GPR)
26108 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26109 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26110 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26111 else if ((sel & SAVRES_REG) == SAVRES_VR)
26112 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26113 else
26114 abort ();
26115
26116 if ((sel & SAVRES_LR))
26117 suffix = "_x";
26118 }
26119 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26120 {
26121 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26122 /* No out-of-line save/restore routines for GPRs on AIX. */
26123 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26124 #endif
26125
26126 aix_names:
26127 if ((sel & SAVRES_REG) == SAVRES_GPR)
26128 prefix = ((sel & SAVRES_SAVE)
26129 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26130 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26131 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26132 {
26133 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26134 if ((sel & SAVRES_LR))
26135 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26136 else
26137 #endif
26138 {
26139 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26140 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26141 }
26142 }
26143 else if ((sel & SAVRES_REG) == SAVRES_VR)
26144 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26145 else
26146 abort ();
26147 }
26148
26149 if (DEFAULT_ABI == ABI_DARWIN)
26150 {
26151 /* The Darwin approach is (slightly) different, in order to be
26152 compatible with code generated by the system toolchain. There is a
26153 single symbol for the start of save sequence, and the code here
26154 embeds an offset into that code on the basis of the first register
26155 to be saved. */
26156 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26157 if ((sel & SAVRES_REG) == SAVRES_GPR)
26158 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26159 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26160 (regno - 13) * 4, prefix, regno);
26161 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26162 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26163 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26164 else if ((sel & SAVRES_REG) == SAVRES_VR)
26165 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26166 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26167 else
26168 abort ();
26169 }
26170 else
26171 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26172
26173 return savres_routine_name;
26174 }
26175
26176 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26177 We are saving/restoring GPRs if GPR is true. */
26178
26179 static rtx
26180 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26181 {
26182 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26183 ? info->first_gp_reg_save
26184 : (sel & SAVRES_REG) == SAVRES_FPR
26185 ? info->first_fp_reg_save - 32
26186 : (sel & SAVRES_REG) == SAVRES_VR
26187 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26188 : -1);
26189 rtx sym;
26190 int select = sel;
26191
26192 /* Don't generate bogus routine names. */
26193 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26194 && regno <= LAST_SAVRES_REGISTER
26195 && select >= 0 && select <= 12);
26196
26197 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26198
26199 if (sym == NULL)
26200 {
26201 char *name;
26202
26203 name = rs6000_savres_routine_name (regno, sel);
26204
26205 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26206 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26207 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26208 }
26209
26210 return sym;
26211 }
26212
26213 /* Emit a sequence of insns, including a stack tie if needed, for
26214 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26215 reset the stack pointer, but move the base of the frame into
26216 reg UPDT_REGNO for use by out-of-line register restore routines. */
26217
26218 static rtx
26219 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26220 unsigned updt_regno)
26221 {
26222 /* If there is nothing to do, don't do anything. */
26223 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26224 return NULL_RTX;
26225
26226 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26227
26228 /* This blockage is needed so that sched doesn't decide to move
26229 the sp change before the register restores. */
26230 if (DEFAULT_ABI == ABI_V4)
26231 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26232 GEN_INT (frame_off)));
26233
26234 /* If we are restoring registers out-of-line, we will be using the
26235 "exit" variants of the restore routines, which will reset the
26236 stack for us. But we do need to point updt_reg into the
26237 right place for those routines. */
26238 if (frame_off != 0)
26239 return emit_insn (gen_add3_insn (updt_reg_rtx,
26240 frame_reg_rtx, GEN_INT (frame_off)));
26241 else
26242 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26243
26244 return NULL_RTX;
26245 }
26246
26247 /* Return the register number used as a pointer by out-of-line
26248 save/restore functions. */
26249
26250 static inline unsigned
26251 ptr_regno_for_savres (int sel)
26252 {
26253 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26254 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26255 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26256 }
26257
26258 /* Construct a parallel rtx describing the effect of a call to an
26259 out-of-line register save/restore routine, and emit the insn
26260 or jump_insn as appropriate. */
26261
26262 static rtx_insn *
26263 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26264 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26265 machine_mode reg_mode, int sel)
26266 {
26267 int i;
26268 int offset, start_reg, end_reg, n_regs, use_reg;
26269 int reg_size = GET_MODE_SIZE (reg_mode);
26270 rtx sym;
26271 rtvec p;
26272 rtx par;
26273 rtx_insn *insn;
26274
26275 offset = 0;
26276 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26277 ? info->first_gp_reg_save
26278 : (sel & SAVRES_REG) == SAVRES_FPR
26279 ? info->first_fp_reg_save
26280 : (sel & SAVRES_REG) == SAVRES_VR
26281 ? info->first_altivec_reg_save
26282 : -1);
26283 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26284 ? 32
26285 : (sel & SAVRES_REG) == SAVRES_FPR
26286 ? 64
26287 : (sel & SAVRES_REG) == SAVRES_VR
26288 ? LAST_ALTIVEC_REGNO + 1
26289 : -1);
26290 n_regs = end_reg - start_reg;
26291 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26292 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26293 + n_regs);
26294
26295 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26296 RTVEC_ELT (p, offset++) = ret_rtx;
26297
26298 RTVEC_ELT (p, offset++)
26299 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26300
26301 sym = rs6000_savres_routine_sym (info, sel);
26302 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26303
26304 use_reg = ptr_regno_for_savres (sel);
26305 if ((sel & SAVRES_REG) == SAVRES_VR)
26306 {
26307 /* Vector regs are saved/restored using [reg+reg] addressing. */
26308 RTVEC_ELT (p, offset++)
26309 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26310 RTVEC_ELT (p, offset++)
26311 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26312 }
26313 else
26314 RTVEC_ELT (p, offset++)
26315 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26316
26317 for (i = 0; i < end_reg - start_reg; i++)
26318 RTVEC_ELT (p, i + offset)
26319 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26320 frame_reg_rtx, save_area_offset + reg_size * i,
26321 (sel & SAVRES_SAVE) != 0);
26322
26323 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26324 RTVEC_ELT (p, i + offset)
26325 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26326
26327 par = gen_rtx_PARALLEL (VOIDmode, p);
26328
26329 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26330 {
26331 insn = emit_jump_insn (par);
26332 JUMP_LABEL (insn) = ret_rtx;
26333 }
26334 else
26335 insn = emit_insn (par);
26336 return insn;
26337 }
26338
26339 /* Emit code to store CR fields that need to be saved into REG. */
26340
26341 static void
26342 rs6000_emit_move_from_cr (rtx reg)
26343 {
26344 /* Only the ELFv2 ABI allows storing only selected fields. */
26345 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26346 {
26347 int i, cr_reg[8], count = 0;
26348
26349 /* Collect CR fields that must be saved. */
26350 for (i = 0; i < 8; i++)
26351 if (save_reg_p (CR0_REGNO + i))
26352 cr_reg[count++] = i;
26353
26354 /* If it's just a single one, use mfcrf. */
26355 if (count == 1)
26356 {
26357 rtvec p = rtvec_alloc (1);
26358 rtvec r = rtvec_alloc (2);
26359 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26360 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26361 RTVEC_ELT (p, 0)
26362 = gen_rtx_SET (reg,
26363 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26364
26365 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26366 return;
26367 }
26368
26369 /* ??? It might be better to handle count == 2 / 3 cases here
26370 as well, using logical operations to combine the values. */
26371 }
26372
26373 emit_insn (gen_movesi_from_cr (reg));
26374 }
26375
26376 /* Return whether the split-stack arg pointer (r12) is used. */
26377
26378 static bool
26379 split_stack_arg_pointer_used_p (void)
26380 {
26381 /* If the pseudo holding the arg pointer is no longer a pseudo,
26382 then the arg pointer is used. */
26383 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26384 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26385 || (REGNO (cfun->machine->split_stack_arg_pointer)
26386 < FIRST_PSEUDO_REGISTER)))
26387 return true;
26388
26389 /* Unfortunately we also need to do some code scanning, since
26390 r12 may have been substituted for the pseudo. */
26391 rtx_insn *insn;
26392 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26393 FOR_BB_INSNS (bb, insn)
26394 if (NONDEBUG_INSN_P (insn))
26395 {
26396 /* A call destroys r12. */
26397 if (CALL_P (insn))
26398 return false;
26399
26400 df_ref use;
26401 FOR_EACH_INSN_USE (use, insn)
26402 {
26403 rtx x = DF_REF_REG (use);
26404 if (REG_P (x) && REGNO (x) == 12)
26405 return true;
26406 }
26407 df_ref def;
26408 FOR_EACH_INSN_DEF (def, insn)
26409 {
26410 rtx x = DF_REF_REG (def);
26411 if (REG_P (x) && REGNO (x) == 12)
26412 return false;
26413 }
26414 }
26415 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26416 }
26417
26418 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26419
26420 static bool
26421 rs6000_global_entry_point_needed_p (void)
26422 {
26423 /* Only needed for the ELFv2 ABI. */
26424 if (DEFAULT_ABI != ABI_ELFv2)
26425 return false;
26426
26427 /* With -msingle-pic-base, we assume the whole program shares the same
26428 TOC, so no global entry point prologues are needed anywhere. */
26429 if (TARGET_SINGLE_PIC_BASE)
26430 return false;
26431
26432 /* Ensure we have a global entry point for thunks. ??? We could
26433 avoid that if the target routine doesn't need a global entry point,
26434 but we do not know whether this is the case at this point. */
26435 if (cfun->is_thunk)
26436 return true;
26437
26438 /* For regular functions, rs6000_emit_prologue sets this flag if the
26439 routine ever uses the TOC pointer. */
26440 return cfun->machine->r2_setup_needed;
26441 }
26442
26443 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26444 static sbitmap
26445 rs6000_get_separate_components (void)
26446 {
26447 rs6000_stack_t *info = rs6000_stack_info ();
26448
26449 if (WORLD_SAVE_P (info))
26450 return NULL;
26451
26452 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26453 && !(info->savres_strategy & REST_MULTIPLE));
26454
26455 /* Component 0 is the save/restore of LR (done via GPR0).
26456 Components 13..31 are the save/restore of GPR13..GPR31.
26457 Components 46..63 are the save/restore of FPR14..FPR31. */
26458
26459 cfun->machine->n_components = 64;
26460
26461 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26462 bitmap_clear (components);
26463
26464 int reg_size = TARGET_32BIT ? 4 : 8;
26465 int fp_reg_size = 8;
26466
26467 /* The GPRs we need saved to the frame. */
26468 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26469 && (info->savres_strategy & REST_INLINE_GPRS))
26470 {
26471 int offset = info->gp_save_offset;
26472 if (info->push_p)
26473 offset += info->total_size;
26474
26475 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26476 {
26477 if (IN_RANGE (offset, -0x8000, 0x7fff)
26478 && rs6000_reg_live_or_pic_offset_p (regno))
26479 bitmap_set_bit (components, regno);
26480
26481 offset += reg_size;
26482 }
26483 }
26484
26485 /* Don't mess with the hard frame pointer. */
26486 if (frame_pointer_needed)
26487 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26488
26489 /* Don't mess with the fixed TOC register. */
26490 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26491 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26492 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26493 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26494
26495 /* The FPRs we need saved to the frame. */
26496 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26497 && (info->savres_strategy & REST_INLINE_FPRS))
26498 {
26499 int offset = info->fp_save_offset;
26500 if (info->push_p)
26501 offset += info->total_size;
26502
26503 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26504 {
26505 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26506 bitmap_set_bit (components, regno);
26507
26508 offset += fp_reg_size;
26509 }
26510 }
26511
26512 /* Optimize LR save and restore if we can. This is component 0. Any
26513 out-of-line register save/restore routines need LR. */
26514 if (info->lr_save_p
26515 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26516 && (info->savres_strategy & SAVE_INLINE_GPRS)
26517 && (info->savres_strategy & REST_INLINE_GPRS)
26518 && (info->savres_strategy & SAVE_INLINE_FPRS)
26519 && (info->savres_strategy & REST_INLINE_FPRS)
26520 && (info->savres_strategy & SAVE_INLINE_VRS)
26521 && (info->savres_strategy & REST_INLINE_VRS))
26522 {
26523 int offset = info->lr_save_offset;
26524 if (info->push_p)
26525 offset += info->total_size;
26526 if (IN_RANGE (offset, -0x8000, 0x7fff))
26527 bitmap_set_bit (components, 0);
26528 }
26529
26530 return components;
26531 }
26532
26533 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26534 static sbitmap
26535 rs6000_components_for_bb (basic_block bb)
26536 {
26537 rs6000_stack_t *info = rs6000_stack_info ();
26538
26539 bitmap in = DF_LIVE_IN (bb);
26540 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26541 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26542
26543 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26544 bitmap_clear (components);
26545
26546 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26547
26548 /* GPRs. */
26549 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26550 if (bitmap_bit_p (in, regno)
26551 || bitmap_bit_p (gen, regno)
26552 || bitmap_bit_p (kill, regno))
26553 bitmap_set_bit (components, regno);
26554
26555 /* FPRs. */
26556 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26557 if (bitmap_bit_p (in, regno)
26558 || bitmap_bit_p (gen, regno)
26559 || bitmap_bit_p (kill, regno))
26560 bitmap_set_bit (components, regno);
26561
26562 /* The link register. */
26563 if (bitmap_bit_p (in, LR_REGNO)
26564 || bitmap_bit_p (gen, LR_REGNO)
26565 || bitmap_bit_p (kill, LR_REGNO))
26566 bitmap_set_bit (components, 0);
26567
26568 return components;
26569 }
26570
26571 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26572 static void
26573 rs6000_disqualify_components (sbitmap components, edge e,
26574 sbitmap edge_components, bool /*is_prologue*/)
26575 {
26576 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26577 live where we want to place that code. */
26578 if (bitmap_bit_p (edge_components, 0)
26579 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26580 {
26581 if (dump_file)
26582 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26583 "on entry to bb %d\n", e->dest->index);
26584 bitmap_clear_bit (components, 0);
26585 }
26586 }
26587
26588 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26589 static void
26590 rs6000_emit_prologue_components (sbitmap components)
26591 {
26592 rs6000_stack_t *info = rs6000_stack_info ();
26593 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26594 ? HARD_FRAME_POINTER_REGNUM
26595 : STACK_POINTER_REGNUM);
26596
26597 machine_mode reg_mode = Pmode;
26598 int reg_size = TARGET_32BIT ? 4 : 8;
26599 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26600 ? DFmode : SFmode;
26601 int fp_reg_size = 8;
26602
26603 /* Prologue for LR. */
26604 if (bitmap_bit_p (components, 0))
26605 {
26606 rtx reg = gen_rtx_REG (reg_mode, 0);
26607 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
26608 RTX_FRAME_RELATED_P (insn) = 1;
26609 add_reg_note (insn, REG_CFA_REGISTER, NULL);
26610
26611 int offset = info->lr_save_offset;
26612 if (info->push_p)
26613 offset += info->total_size;
26614
26615 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26616 RTX_FRAME_RELATED_P (insn) = 1;
26617 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26618 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26619 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26620 }
26621
26622 /* Prologue for the GPRs. */
26623 int offset = info->gp_save_offset;
26624 if (info->push_p)
26625 offset += info->total_size;
26626
26627 for (int i = info->first_gp_reg_save; i < 32; i++)
26628 {
26629 if (bitmap_bit_p (components, i))
26630 {
26631 rtx reg = gen_rtx_REG (reg_mode, i);
26632 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26633 RTX_FRAME_RELATED_P (insn) = 1;
26634 rtx set = copy_rtx (single_set (insn));
26635 add_reg_note (insn, REG_CFA_OFFSET, set);
26636 }
26637
26638 offset += reg_size;
26639 }
26640
26641 /* Prologue for the FPRs. */
26642 offset = info->fp_save_offset;
26643 if (info->push_p)
26644 offset += info->total_size;
26645
26646 for (int i = info->first_fp_reg_save; i < 64; i++)
26647 {
26648 if (bitmap_bit_p (components, i))
26649 {
26650 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26651 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26652 RTX_FRAME_RELATED_P (insn) = 1;
26653 rtx set = copy_rtx (single_set (insn));
26654 add_reg_note (insn, REG_CFA_OFFSET, set);
26655 }
26656
26657 offset += fp_reg_size;
26658 }
26659 }
26660
26661 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26662 static void
26663 rs6000_emit_epilogue_components (sbitmap components)
26664 {
26665 rs6000_stack_t *info = rs6000_stack_info ();
26666 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26667 ? HARD_FRAME_POINTER_REGNUM
26668 : STACK_POINTER_REGNUM);
26669
26670 machine_mode reg_mode = Pmode;
26671 int reg_size = TARGET_32BIT ? 4 : 8;
26672
26673 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26674 ? DFmode : SFmode;
26675 int fp_reg_size = 8;
26676
26677 /* Epilogue for the FPRs. */
26678 int offset = info->fp_save_offset;
26679 if (info->push_p)
26680 offset += info->total_size;
26681
26682 for (int i = info->first_fp_reg_save; i < 64; i++)
26683 {
26684 if (bitmap_bit_p (components, i))
26685 {
26686 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26687 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26688 RTX_FRAME_RELATED_P (insn) = 1;
26689 add_reg_note (insn, REG_CFA_RESTORE, reg);
26690 }
26691
26692 offset += fp_reg_size;
26693 }
26694
26695 /* Epilogue for the GPRs. */
26696 offset = info->gp_save_offset;
26697 if (info->push_p)
26698 offset += info->total_size;
26699
26700 for (int i = info->first_gp_reg_save; i < 32; i++)
26701 {
26702 if (bitmap_bit_p (components, i))
26703 {
26704 rtx reg = gen_rtx_REG (reg_mode, i);
26705 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26706 RTX_FRAME_RELATED_P (insn) = 1;
26707 add_reg_note (insn, REG_CFA_RESTORE, reg);
26708 }
26709
26710 offset += reg_size;
26711 }
26712
26713 /* Epilogue for LR. */
26714 if (bitmap_bit_p (components, 0))
26715 {
26716 int offset = info->lr_save_offset;
26717 if (info->push_p)
26718 offset += info->total_size;
26719
26720 rtx reg = gen_rtx_REG (reg_mode, 0);
26721 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26722
26723 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26724 insn = emit_move_insn (lr, reg);
26725 RTX_FRAME_RELATED_P (insn) = 1;
26726 add_reg_note (insn, REG_CFA_RESTORE, lr);
26727 }
26728 }
26729
26730 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26731 static void
26732 rs6000_set_handled_components (sbitmap components)
26733 {
26734 rs6000_stack_t *info = rs6000_stack_info ();
26735
26736 for (int i = info->first_gp_reg_save; i < 32; i++)
26737 if (bitmap_bit_p (components, i))
26738 cfun->machine->gpr_is_wrapped_separately[i] = true;
26739
26740 for (int i = info->first_fp_reg_save; i < 64; i++)
26741 if (bitmap_bit_p (components, i))
26742 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26743
26744 if (bitmap_bit_p (components, 0))
26745 cfun->machine->lr_is_wrapped_separately = true;
26746 }
26747
26748 /* VRSAVE is a bit vector representing which AltiVec registers
26749 are used. The OS uses this to determine which vector
26750 registers to save on a context switch. We need to save
26751 VRSAVE on the stack frame, add whatever AltiVec registers we
26752 used in this function, and do the corresponding magic in the
26753 epilogue. */
26754 static void
26755 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26756 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26757 {
26758 /* Get VRSAVE into a GPR. */
26759 rtx reg = gen_rtx_REG (SImode, save_regno);
26760 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26761 if (TARGET_MACHO)
26762 emit_insn (gen_get_vrsave_internal (reg));
26763 else
26764 emit_insn (gen_rtx_SET (reg, vrsave));
26765
26766 /* Save VRSAVE. */
26767 int offset = info->vrsave_save_offset + frame_off;
26768 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26769
26770 /* Include the registers in the mask. */
26771 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26772
26773 emit_insn (generate_set_vrsave (reg, info, 0));
26774 }
26775
26776 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26777 called, it left the arg pointer to the old stack in r29. Otherwise, the
26778 arg pointer is the top of the current frame. */
26779 static void
26780 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26781 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26782 {
26783 cfun->machine->split_stack_argp_used = true;
26784
26785 if (sp_adjust)
26786 {
26787 rtx r12 = gen_rtx_REG (Pmode, 12);
26788 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26789 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26790 emit_insn_before (set_r12, sp_adjust);
26791 }
26792 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26793 {
26794 rtx r12 = gen_rtx_REG (Pmode, 12);
26795 if (frame_off == 0)
26796 emit_move_insn (r12, frame_reg_rtx);
26797 else
26798 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26799 }
26800
26801 if (info->push_p)
26802 {
26803 rtx r12 = gen_rtx_REG (Pmode, 12);
26804 rtx r29 = gen_rtx_REG (Pmode, 29);
26805 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26806 rtx not_more = gen_label_rtx ();
26807 rtx jump;
26808
26809 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26810 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26811 gen_rtx_LABEL_REF (VOIDmode, not_more),
26812 pc_rtx);
26813 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26814 JUMP_LABEL (jump) = not_more;
26815 LABEL_NUSES (not_more) += 1;
26816 emit_move_insn (r12, r29);
26817 emit_label (not_more);
26818 }
26819 }
26820
26821 /* Emit function prologue as insns. */
26822
26823 void
26824 rs6000_emit_prologue (void)
26825 {
26826 rs6000_stack_t *info = rs6000_stack_info ();
26827 machine_mode reg_mode = Pmode;
26828 int reg_size = TARGET_32BIT ? 4 : 8;
26829 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26830 ? DFmode : SFmode;
26831 int fp_reg_size = 8;
26832 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26833 rtx frame_reg_rtx = sp_reg_rtx;
26834 unsigned int cr_save_regno;
26835 rtx cr_save_rtx = NULL_RTX;
26836 rtx_insn *insn;
26837 int strategy;
26838 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26839 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26840 && call_used_regs[STATIC_CHAIN_REGNUM]);
26841 int using_split_stack = (flag_split_stack
26842 && (lookup_attribute ("no_split_stack",
26843 DECL_ATTRIBUTES (cfun->decl))
26844 == NULL));
26845
26846 /* Offset to top of frame for frame_reg and sp respectively. */
26847 HOST_WIDE_INT frame_off = 0;
26848 HOST_WIDE_INT sp_off = 0;
26849 /* sp_adjust is the stack adjusting instruction, tracked so that the
26850 insn setting up the split-stack arg pointer can be emitted just
26851 prior to it, when r12 is not used here for other purposes. */
26852 rtx_insn *sp_adjust = 0;
26853
26854 #if CHECKING_P
26855 /* Track and check usage of r0, r11, r12. */
26856 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26857 #define START_USE(R) do \
26858 { \
26859 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26860 reg_inuse |= 1 << (R); \
26861 } while (0)
26862 #define END_USE(R) do \
26863 { \
26864 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26865 reg_inuse &= ~(1 << (R)); \
26866 } while (0)
26867 #define NOT_INUSE(R) do \
26868 { \
26869 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26870 } while (0)
26871 #else
26872 #define START_USE(R) do {} while (0)
26873 #define END_USE(R) do {} while (0)
26874 #define NOT_INUSE(R) do {} while (0)
26875 #endif
26876
26877 if (DEFAULT_ABI == ABI_ELFv2
26878 && !TARGET_SINGLE_PIC_BASE)
26879 {
26880 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26881
26882 /* With -mminimal-toc we may generate an extra use of r2 below. */
26883 if (TARGET_TOC && TARGET_MINIMAL_TOC
26884 && !constant_pool_empty_p ())
26885 cfun->machine->r2_setup_needed = true;
26886 }
26887
26888
26889 if (flag_stack_usage_info)
26890 current_function_static_stack_size = info->total_size;
26891
26892 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26893 {
26894 HOST_WIDE_INT size = info->total_size;
26895
26896 if (crtl->is_leaf && !cfun->calls_alloca)
26897 {
26898 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
26899 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
26900 size - STACK_CHECK_PROTECT);
26901 }
26902 else if (size > 0)
26903 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
26904 }
26905
26906 if (TARGET_FIX_AND_CONTINUE)
26907 {
26908 /* gdb on darwin arranges to forward a function from the old
26909 address by modifying the first 5 instructions of the function
26910 to branch to the overriding function. This is necessary to
26911 permit function pointers that point to the old function to
26912 actually forward to the new function. */
26913 emit_insn (gen_nop ());
26914 emit_insn (gen_nop ());
26915 emit_insn (gen_nop ());
26916 emit_insn (gen_nop ());
26917 emit_insn (gen_nop ());
26918 }
26919
26920 /* Handle world saves specially here. */
26921 if (WORLD_SAVE_P (info))
26922 {
26923 int i, j, sz;
26924 rtx treg;
26925 rtvec p;
26926 rtx reg0;
26927
26928 /* save_world expects lr in r0. */
26929 reg0 = gen_rtx_REG (Pmode, 0);
26930 if (info->lr_save_p)
26931 {
26932 insn = emit_move_insn (reg0,
26933 gen_rtx_REG (Pmode, LR_REGNO));
26934 RTX_FRAME_RELATED_P (insn) = 1;
26935 }
26936
26937 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26938 assumptions about the offsets of various bits of the stack
26939 frame. */
26940 gcc_assert (info->gp_save_offset == -220
26941 && info->fp_save_offset == -144
26942 && info->lr_save_offset == 8
26943 && info->cr_save_offset == 4
26944 && info->push_p
26945 && info->lr_save_p
26946 && (!crtl->calls_eh_return
26947 || info->ehrd_offset == -432)
26948 && info->vrsave_save_offset == -224
26949 && info->altivec_save_offset == -416);
26950
26951 treg = gen_rtx_REG (SImode, 11);
26952 emit_move_insn (treg, GEN_INT (-info->total_size));
26953
26954 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26955 in R11. It also clobbers R12, so beware! */
26956
26957 /* Preserve CR2 for save_world prologues */
26958 sz = 5;
26959 sz += 32 - info->first_gp_reg_save;
26960 sz += 64 - info->first_fp_reg_save;
26961 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26962 p = rtvec_alloc (sz);
26963 j = 0;
26964 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26965 gen_rtx_REG (SImode,
26966 LR_REGNO));
26967 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26968 gen_rtx_SYMBOL_REF (Pmode,
26969 "*save_world"));
26970 /* We do floats first so that the instruction pattern matches
26971 properly. */
26972 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26973 RTVEC_ELT (p, j++)
26974 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26975 ? DFmode : SFmode,
26976 info->first_fp_reg_save + i),
26977 frame_reg_rtx,
26978 info->fp_save_offset + frame_off + 8 * i);
26979 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26980 RTVEC_ELT (p, j++)
26981 = gen_frame_store (gen_rtx_REG (V4SImode,
26982 info->first_altivec_reg_save + i),
26983 frame_reg_rtx,
26984 info->altivec_save_offset + frame_off + 16 * i);
26985 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26986 RTVEC_ELT (p, j++)
26987 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26988 frame_reg_rtx,
26989 info->gp_save_offset + frame_off + reg_size * i);
26990
26991 /* CR register traditionally saved as CR2. */
26992 RTVEC_ELT (p, j++)
26993 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26994 frame_reg_rtx, info->cr_save_offset + frame_off);
26995 /* Explain about use of R0. */
26996 if (info->lr_save_p)
26997 RTVEC_ELT (p, j++)
26998 = gen_frame_store (reg0,
26999 frame_reg_rtx, info->lr_save_offset + frame_off);
27000 /* Explain what happens to the stack pointer. */
27001 {
27002 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27003 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27004 }
27005
27006 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27007 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27008 treg, GEN_INT (-info->total_size));
27009 sp_off = frame_off = info->total_size;
27010 }
27011
27012 strategy = info->savres_strategy;
27013
27014 /* For V.4, update stack before we do any saving and set back pointer. */
27015 if (! WORLD_SAVE_P (info)
27016 && info->push_p
27017 && (DEFAULT_ABI == ABI_V4
27018 || crtl->calls_eh_return))
27019 {
27020 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
27021 || !(strategy & SAVE_INLINE_GPRS)
27022 || !(strategy & SAVE_INLINE_VRS));
27023 int ptr_regno = -1;
27024 rtx ptr_reg = NULL_RTX;
27025 int ptr_off = 0;
27026
27027 if (info->total_size < 32767)
27028 frame_off = info->total_size;
27029 else if (need_r11)
27030 ptr_regno = 11;
27031 else if (info->cr_save_p
27032 || info->lr_save_p
27033 || info->first_fp_reg_save < 64
27034 || info->first_gp_reg_save < 32
27035 || info->altivec_size != 0
27036 || info->vrsave_size != 0
27037 || crtl->calls_eh_return)
27038 ptr_regno = 12;
27039 else
27040 {
27041 /* The prologue won't be saving any regs so there is no need
27042 to set up a frame register to access any frame save area.
27043 We also won't be using frame_off anywhere below, but set
27044 the correct value anyway to protect against future
27045 changes to this function. */
27046 frame_off = info->total_size;
27047 }
27048 if (ptr_regno != -1)
27049 {
27050 /* Set up the frame offset to that needed by the first
27051 out-of-line save function. */
27052 START_USE (ptr_regno);
27053 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27054 frame_reg_rtx = ptr_reg;
27055 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27056 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27057 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27058 ptr_off = info->gp_save_offset + info->gp_size;
27059 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27060 ptr_off = info->altivec_save_offset + info->altivec_size;
27061 frame_off = -ptr_off;
27062 }
27063 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27064 ptr_reg, ptr_off);
27065 if (REGNO (frame_reg_rtx) == 12)
27066 sp_adjust = 0;
27067 sp_off = info->total_size;
27068 if (frame_reg_rtx != sp_reg_rtx)
27069 rs6000_emit_stack_tie (frame_reg_rtx, false);
27070 }
27071
27072 /* If we use the link register, get it into r0. */
27073 if (!WORLD_SAVE_P (info) && info->lr_save_p
27074 && !cfun->machine->lr_is_wrapped_separately)
27075 {
27076 rtx addr, reg, mem;
27077
27078 reg = gen_rtx_REG (Pmode, 0);
27079 START_USE (0);
27080 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27081 RTX_FRAME_RELATED_P (insn) = 1;
27082
27083 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27084 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27085 {
27086 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27087 GEN_INT (info->lr_save_offset + frame_off));
27088 mem = gen_rtx_MEM (Pmode, addr);
27089 /* This should not be of rs6000_sr_alias_set, because of
27090 __builtin_return_address. */
27091
27092 insn = emit_move_insn (mem, reg);
27093 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27094 NULL_RTX, NULL_RTX);
27095 END_USE (0);
27096 }
27097 }
27098
27099 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27100 r12 will be needed by out-of-line gpr restore. */
27101 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27102 && !(strategy & (SAVE_INLINE_GPRS
27103 | SAVE_NOINLINE_GPRS_SAVES_LR))
27104 ? 11 : 12);
27105 if (!WORLD_SAVE_P (info)
27106 && info->cr_save_p
27107 && REGNO (frame_reg_rtx) != cr_save_regno
27108 && !(using_static_chain_p && cr_save_regno == 11)
27109 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27110 {
27111 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27112 START_USE (cr_save_regno);
27113 rs6000_emit_move_from_cr (cr_save_rtx);
27114 }
27115
27116 /* Do any required saving of fpr's. If only one or two to save, do
27117 it ourselves. Otherwise, call function. */
27118 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27119 {
27120 int offset = info->fp_save_offset + frame_off;
27121 for (int i = info->first_fp_reg_save; i < 64; i++)
27122 {
27123 if (save_reg_p (i)
27124 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
27125 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
27126 sp_off - frame_off);
27127
27128 offset += fp_reg_size;
27129 }
27130 }
27131 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27132 {
27133 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27134 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27135 unsigned ptr_regno = ptr_regno_for_savres (sel);
27136 rtx ptr_reg = frame_reg_rtx;
27137
27138 if (REGNO (frame_reg_rtx) == ptr_regno)
27139 gcc_checking_assert (frame_off == 0);
27140 else
27141 {
27142 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27143 NOT_INUSE (ptr_regno);
27144 emit_insn (gen_add3_insn (ptr_reg,
27145 frame_reg_rtx, GEN_INT (frame_off)));
27146 }
27147 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27148 info->fp_save_offset,
27149 info->lr_save_offset,
27150 DFmode, sel);
27151 rs6000_frame_related (insn, ptr_reg, sp_off,
27152 NULL_RTX, NULL_RTX);
27153 if (lr)
27154 END_USE (0);
27155 }
27156
27157 /* Save GPRs. This is done as a PARALLEL if we are using
27158 the store-multiple instructions. */
27159 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27160 {
27161 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27162 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27163 unsigned ptr_regno = ptr_regno_for_savres (sel);
27164 rtx ptr_reg = frame_reg_rtx;
27165 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27166 int end_save = info->gp_save_offset + info->gp_size;
27167 int ptr_off;
27168
27169 if (ptr_regno == 12)
27170 sp_adjust = 0;
27171 if (!ptr_set_up)
27172 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27173
27174 /* Need to adjust r11 (r12) if we saved any FPRs. */
27175 if (end_save + frame_off != 0)
27176 {
27177 rtx offset = GEN_INT (end_save + frame_off);
27178
27179 if (ptr_set_up)
27180 frame_off = -end_save;
27181 else
27182 NOT_INUSE (ptr_regno);
27183 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27184 }
27185 else if (!ptr_set_up)
27186 {
27187 NOT_INUSE (ptr_regno);
27188 emit_move_insn (ptr_reg, frame_reg_rtx);
27189 }
27190 ptr_off = -end_save;
27191 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27192 info->gp_save_offset + ptr_off,
27193 info->lr_save_offset + ptr_off,
27194 reg_mode, sel);
27195 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27196 NULL_RTX, NULL_RTX);
27197 if (lr)
27198 END_USE (0);
27199 }
27200 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27201 {
27202 rtvec p;
27203 int i;
27204 p = rtvec_alloc (32 - info->first_gp_reg_save);
27205 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27206 RTVEC_ELT (p, i)
27207 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27208 frame_reg_rtx,
27209 info->gp_save_offset + frame_off + reg_size * i);
27210 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27211 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27212 NULL_RTX, NULL_RTX);
27213 }
27214 else if (!WORLD_SAVE_P (info))
27215 {
27216 int offset = info->gp_save_offset + frame_off;
27217 for (int i = info->first_gp_reg_save; i < 32; i++)
27218 {
27219 if (rs6000_reg_live_or_pic_offset_p (i)
27220 && !cfun->machine->gpr_is_wrapped_separately[i])
27221 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27222 sp_off - frame_off);
27223
27224 offset += reg_size;
27225 }
27226 }
27227
27228 if (crtl->calls_eh_return)
27229 {
27230 unsigned int i;
27231 rtvec p;
27232
27233 for (i = 0; ; ++i)
27234 {
27235 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27236 if (regno == INVALID_REGNUM)
27237 break;
27238 }
27239
27240 p = rtvec_alloc (i);
27241
27242 for (i = 0; ; ++i)
27243 {
27244 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27245 if (regno == INVALID_REGNUM)
27246 break;
27247
27248 rtx set
27249 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27250 sp_reg_rtx,
27251 info->ehrd_offset + sp_off + reg_size * (int) i);
27252 RTVEC_ELT (p, i) = set;
27253 RTX_FRAME_RELATED_P (set) = 1;
27254 }
27255
27256 insn = emit_insn (gen_blockage ());
27257 RTX_FRAME_RELATED_P (insn) = 1;
27258 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27259 }
27260
27261 /* In AIX ABI we need to make sure r2 is really saved. */
27262 if (TARGET_AIX && crtl->calls_eh_return)
27263 {
27264 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27265 rtx join_insn, note;
27266 rtx_insn *save_insn;
27267 long toc_restore_insn;
27268
27269 tmp_reg = gen_rtx_REG (Pmode, 11);
27270 tmp_reg_si = gen_rtx_REG (SImode, 11);
27271 if (using_static_chain_p)
27272 {
27273 START_USE (0);
27274 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27275 }
27276 else
27277 START_USE (11);
27278 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27279 /* Peek at instruction to which this function returns. If it's
27280 restoring r2, then we know we've already saved r2. We can't
27281 unconditionally save r2 because the value we have will already
27282 be updated if we arrived at this function via a plt call or
27283 toc adjusting stub. */
27284 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27285 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27286 + RS6000_TOC_SAVE_SLOT);
27287 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27288 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27289 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27290 validate_condition_mode (EQ, CCUNSmode);
27291 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27292 emit_insn (gen_rtx_SET (compare_result,
27293 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27294 toc_save_done = gen_label_rtx ();
27295 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27296 gen_rtx_EQ (VOIDmode, compare_result,
27297 const0_rtx),
27298 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27299 pc_rtx);
27300 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27301 JUMP_LABEL (jump) = toc_save_done;
27302 LABEL_NUSES (toc_save_done) += 1;
27303
27304 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27305 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27306 sp_off - frame_off);
27307
27308 emit_label (toc_save_done);
27309
27310 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27311 have a CFG that has different saves along different paths.
27312 Move the note to a dummy blockage insn, which describes that
27313 R2 is unconditionally saved after the label. */
27314 /* ??? An alternate representation might be a special insn pattern
27315 containing both the branch and the store. That might let the
27316 code that minimizes the number of DW_CFA_advance opcodes better
27317 freedom in placing the annotations. */
27318 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27319 if (note)
27320 remove_note (save_insn, note);
27321 else
27322 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27323 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27324 RTX_FRAME_RELATED_P (save_insn) = 0;
27325
27326 join_insn = emit_insn (gen_blockage ());
27327 REG_NOTES (join_insn) = note;
27328 RTX_FRAME_RELATED_P (join_insn) = 1;
27329
27330 if (using_static_chain_p)
27331 {
27332 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27333 END_USE (0);
27334 }
27335 else
27336 END_USE (11);
27337 }
27338
27339 /* Save CR if we use any that must be preserved. */
27340 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27341 {
27342 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27343 GEN_INT (info->cr_save_offset + frame_off));
27344 rtx mem = gen_frame_mem (SImode, addr);
27345
27346 /* If we didn't copy cr before, do so now using r0. */
27347 if (cr_save_rtx == NULL_RTX)
27348 {
27349 START_USE (0);
27350 cr_save_rtx = gen_rtx_REG (SImode, 0);
27351 rs6000_emit_move_from_cr (cr_save_rtx);
27352 }
27353
27354 /* Saving CR requires a two-instruction sequence: one instruction
27355 to move the CR to a general-purpose register, and a second
27356 instruction that stores the GPR to memory.
27357
27358 We do not emit any DWARF CFI records for the first of these,
27359 because we cannot properly represent the fact that CR is saved in
27360 a register. One reason is that we cannot express that multiple
27361 CR fields are saved; another reason is that on 64-bit, the size
27362 of the CR register in DWARF (4 bytes) differs from the size of
27363 a general-purpose register.
27364
27365 This means if any intervening instruction were to clobber one of
27366 the call-saved CR fields, we'd have incorrect CFI. To prevent
27367 this from happening, we mark the store to memory as a use of
27368 those CR fields, which prevents any such instruction from being
27369 scheduled in between the two instructions. */
27370 rtx crsave_v[9];
27371 int n_crsave = 0;
27372 int i;
27373
27374 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27375 for (i = 0; i < 8; i++)
27376 if (save_reg_p (CR0_REGNO + i))
27377 crsave_v[n_crsave++]
27378 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27379
27380 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27381 gen_rtvec_v (n_crsave, crsave_v)));
27382 END_USE (REGNO (cr_save_rtx));
27383
27384 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27385 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27386 so we need to construct a frame expression manually. */
27387 RTX_FRAME_RELATED_P (insn) = 1;
27388
27389 /* Update address to be stack-pointer relative, like
27390 rs6000_frame_related would do. */
27391 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27392 GEN_INT (info->cr_save_offset + sp_off));
27393 mem = gen_frame_mem (SImode, addr);
27394
27395 if (DEFAULT_ABI == ABI_ELFv2)
27396 {
27397 /* In the ELFv2 ABI we generate separate CFI records for each
27398 CR field that was actually saved. They all point to the
27399 same 32-bit stack slot. */
27400 rtx crframe[8];
27401 int n_crframe = 0;
27402
27403 for (i = 0; i < 8; i++)
27404 if (save_reg_p (CR0_REGNO + i))
27405 {
27406 crframe[n_crframe]
27407 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27408
27409 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27410 n_crframe++;
27411 }
27412
27413 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27414 gen_rtx_PARALLEL (VOIDmode,
27415 gen_rtvec_v (n_crframe, crframe)));
27416 }
27417 else
27418 {
27419 /* In other ABIs, by convention, we use a single CR regnum to
27420 represent the fact that all call-saved CR fields are saved.
27421 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27422 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27423 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27424 }
27425 }
27426
27427 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27428 *separate* slots if the routine calls __builtin_eh_return, so
27429 that they can be independently restored by the unwinder. */
27430 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27431 {
27432 int i, cr_off = info->ehcr_offset;
27433 rtx crsave;
27434
27435 /* ??? We might get better performance by using multiple mfocrf
27436 instructions. */
27437 crsave = gen_rtx_REG (SImode, 0);
27438 emit_insn (gen_movesi_from_cr (crsave));
27439
27440 for (i = 0; i < 8; i++)
27441 if (!call_used_regs[CR0_REGNO + i])
27442 {
27443 rtvec p = rtvec_alloc (2);
27444 RTVEC_ELT (p, 0)
27445 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27446 RTVEC_ELT (p, 1)
27447 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27448
27449 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27450
27451 RTX_FRAME_RELATED_P (insn) = 1;
27452 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27453 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27454 sp_reg_rtx, cr_off + sp_off));
27455
27456 cr_off += reg_size;
27457 }
27458 }
27459
27460 /* Update stack and set back pointer unless this is V.4,
27461 for which it was done previously. */
27462 if (!WORLD_SAVE_P (info) && info->push_p
27463 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27464 {
27465 rtx ptr_reg = NULL;
27466 int ptr_off = 0;
27467
27468 /* If saving altivec regs we need to be able to address all save
27469 locations using a 16-bit offset. */
27470 if ((strategy & SAVE_INLINE_VRS) == 0
27471 || (info->altivec_size != 0
27472 && (info->altivec_save_offset + info->altivec_size - 16
27473 + info->total_size - frame_off) > 32767)
27474 || (info->vrsave_size != 0
27475 && (info->vrsave_save_offset
27476 + info->total_size - frame_off) > 32767))
27477 {
27478 int sel = SAVRES_SAVE | SAVRES_VR;
27479 unsigned ptr_regno = ptr_regno_for_savres (sel);
27480
27481 if (using_static_chain_p
27482 && ptr_regno == STATIC_CHAIN_REGNUM)
27483 ptr_regno = 12;
27484 if (REGNO (frame_reg_rtx) != ptr_regno)
27485 START_USE (ptr_regno);
27486 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27487 frame_reg_rtx = ptr_reg;
27488 ptr_off = info->altivec_save_offset + info->altivec_size;
27489 frame_off = -ptr_off;
27490 }
27491 else if (REGNO (frame_reg_rtx) == 1)
27492 frame_off = info->total_size;
27493 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27494 ptr_reg, ptr_off);
27495 if (REGNO (frame_reg_rtx) == 12)
27496 sp_adjust = 0;
27497 sp_off = info->total_size;
27498 if (frame_reg_rtx != sp_reg_rtx)
27499 rs6000_emit_stack_tie (frame_reg_rtx, false);
27500 }
27501
27502 /* Set frame pointer, if needed. */
27503 if (frame_pointer_needed)
27504 {
27505 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27506 sp_reg_rtx);
27507 RTX_FRAME_RELATED_P (insn) = 1;
27508 }
27509
27510 /* Save AltiVec registers if needed. Save here because the red zone does
27511 not always include AltiVec registers. */
27512 if (!WORLD_SAVE_P (info)
27513 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27514 {
27515 int end_save = info->altivec_save_offset + info->altivec_size;
27516 int ptr_off;
27517 /* Oddly, the vector save/restore functions point r0 at the end
27518 of the save area, then use r11 or r12 to load offsets for
27519 [reg+reg] addressing. */
27520 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27521 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27522 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27523
27524 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27525 NOT_INUSE (0);
27526 if (scratch_regno == 12)
27527 sp_adjust = 0;
27528 if (end_save + frame_off != 0)
27529 {
27530 rtx offset = GEN_INT (end_save + frame_off);
27531
27532 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27533 }
27534 else
27535 emit_move_insn (ptr_reg, frame_reg_rtx);
27536
27537 ptr_off = -end_save;
27538 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27539 info->altivec_save_offset + ptr_off,
27540 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27541 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27542 NULL_RTX, NULL_RTX);
27543 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27544 {
27545 /* The oddity mentioned above clobbered our frame reg. */
27546 emit_move_insn (frame_reg_rtx, ptr_reg);
27547 frame_off = ptr_off;
27548 }
27549 }
27550 else if (!WORLD_SAVE_P (info)
27551 && info->altivec_size != 0)
27552 {
27553 int i;
27554
27555 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27556 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27557 {
27558 rtx areg, savereg, mem;
27559 HOST_WIDE_INT offset;
27560
27561 offset = (info->altivec_save_offset + frame_off
27562 + 16 * (i - info->first_altivec_reg_save));
27563
27564 savereg = gen_rtx_REG (V4SImode, i);
27565
27566 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
27567 {
27568 mem = gen_frame_mem (V4SImode,
27569 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27570 GEN_INT (offset)));
27571 insn = emit_insn (gen_rtx_SET (mem, savereg));
27572 areg = NULL_RTX;
27573 }
27574 else
27575 {
27576 NOT_INUSE (0);
27577 areg = gen_rtx_REG (Pmode, 0);
27578 emit_move_insn (areg, GEN_INT (offset));
27579
27580 /* AltiVec addressing mode is [reg+reg]. */
27581 mem = gen_frame_mem (V4SImode,
27582 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27583
27584 /* Rather than emitting a generic move, force use of the stvx
27585 instruction, which we always want on ISA 2.07 (power8) systems.
27586 In particular we don't want xxpermdi/stxvd2x for little
27587 endian. */
27588 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27589 }
27590
27591 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27592 areg, GEN_INT (offset));
27593 }
27594 }
27595
27596 /* VRSAVE is a bit vector representing which AltiVec registers
27597 are used. The OS uses this to determine which vector
27598 registers to save on a context switch. We need to save
27599 VRSAVE on the stack frame, add whatever AltiVec registers we
27600 used in this function, and do the corresponding magic in the
27601 epilogue. */
27602
27603 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27604 {
27605 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27606 be using r12 as frame_reg_rtx and r11 as the static chain
27607 pointer for nested functions. */
27608 int save_regno = 12;
27609 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27610 && !using_static_chain_p)
27611 save_regno = 11;
27612 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27613 {
27614 save_regno = 11;
27615 if (using_static_chain_p)
27616 save_regno = 0;
27617 }
27618 NOT_INUSE (save_regno);
27619
27620 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27621 }
27622
27623 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27624 if (!TARGET_SINGLE_PIC_BASE
27625 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27626 && !constant_pool_empty_p ())
27627 || (DEFAULT_ABI == ABI_V4
27628 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27629 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27630 {
27631 /* If emit_load_toc_table will use the link register, we need to save
27632 it. We use R12 for this purpose because emit_load_toc_table
27633 can use register 0. This allows us to use a plain 'blr' to return
27634 from the procedure more often. */
27635 int save_LR_around_toc_setup = (TARGET_ELF
27636 && DEFAULT_ABI == ABI_V4
27637 && flag_pic
27638 && ! info->lr_save_p
27639 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27640 if (save_LR_around_toc_setup)
27641 {
27642 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27643 rtx tmp = gen_rtx_REG (Pmode, 12);
27644
27645 sp_adjust = 0;
27646 insn = emit_move_insn (tmp, lr);
27647 RTX_FRAME_RELATED_P (insn) = 1;
27648
27649 rs6000_emit_load_toc_table (TRUE);
27650
27651 insn = emit_move_insn (lr, tmp);
27652 add_reg_note (insn, REG_CFA_RESTORE, lr);
27653 RTX_FRAME_RELATED_P (insn) = 1;
27654 }
27655 else
27656 rs6000_emit_load_toc_table (TRUE);
27657 }
27658
27659 #if TARGET_MACHO
27660 if (!TARGET_SINGLE_PIC_BASE
27661 && DEFAULT_ABI == ABI_DARWIN
27662 && flag_pic && crtl->uses_pic_offset_table)
27663 {
27664 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27665 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27666
27667 /* Save and restore LR locally around this call (in R0). */
27668 if (!info->lr_save_p)
27669 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27670
27671 emit_insn (gen_load_macho_picbase (src));
27672
27673 emit_move_insn (gen_rtx_REG (Pmode,
27674 RS6000_PIC_OFFSET_TABLE_REGNUM),
27675 lr);
27676
27677 if (!info->lr_save_p)
27678 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27679 }
27680 #endif
27681
27682 /* If we need to, save the TOC register after doing the stack setup.
27683 Do not emit eh frame info for this save. The unwinder wants info,
27684 conceptually attached to instructions in this function, about
27685 register values in the caller of this function. This R2 may have
27686 already been changed from the value in the caller.
27687 We don't attempt to write accurate DWARF EH frame info for R2
27688 because code emitted by gcc for a (non-pointer) function call
27689 doesn't save and restore R2. Instead, R2 is managed out-of-line
27690 by a linker generated plt call stub when the function resides in
27691 a shared library. This behavior is costly to describe in DWARF,
27692 both in terms of the size of DWARF info and the time taken in the
27693 unwinder to interpret it. R2 changes, apart from the
27694 calls_eh_return case earlier in this function, are handled by
27695 linux-unwind.h frob_update_context. */
27696 if (rs6000_save_toc_in_prologue_p ())
27697 {
27698 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27699 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27700 }
27701
27702 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27703 if (using_split_stack && split_stack_arg_pointer_used_p ())
27704 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27705 }
27706
27707 /* Output .extern statements for the save/restore routines we use. */
27708
27709 static void
27710 rs6000_output_savres_externs (FILE *file)
27711 {
27712 rs6000_stack_t *info = rs6000_stack_info ();
27713
27714 if (TARGET_DEBUG_STACK)
27715 debug_stack_info (info);
27716
27717 /* Write .extern for any function we will call to save and restore
27718 fp values. */
27719 if (info->first_fp_reg_save < 64
27720 && !TARGET_MACHO
27721 && !TARGET_ELF)
27722 {
27723 char *name;
27724 int regno = info->first_fp_reg_save - 32;
27725
27726 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27727 {
27728 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27729 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27730 name = rs6000_savres_routine_name (regno, sel);
27731 fprintf (file, "\t.extern %s\n", name);
27732 }
27733 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27734 {
27735 bool lr = (info->savres_strategy
27736 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27737 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27738 name = rs6000_savres_routine_name (regno, sel);
27739 fprintf (file, "\t.extern %s\n", name);
27740 }
27741 }
27742 }
27743
27744 /* Write function prologue. */
27745
27746 static void
27747 rs6000_output_function_prologue (FILE *file,
27748 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27749 {
27750 if (!cfun->is_thunk)
27751 rs6000_output_savres_externs (file);
27752
27753 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27754 immediately after the global entry point label. */
27755 if (rs6000_global_entry_point_needed_p ())
27756 {
27757 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27758
27759 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27760
27761 if (TARGET_CMODEL != CMODEL_LARGE)
27762 {
27763 /* In the small and medium code models, we assume the TOC is less
27764 2 GB away from the text section, so it can be computed via the
27765 following two-instruction sequence. */
27766 char buf[256];
27767
27768 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27769 fprintf (file, "0:\taddis 2,12,.TOC.-");
27770 assemble_name (file, buf);
27771 fprintf (file, "@ha\n");
27772 fprintf (file, "\taddi 2,2,.TOC.-");
27773 assemble_name (file, buf);
27774 fprintf (file, "@l\n");
27775 }
27776 else
27777 {
27778 /* In the large code model, we allow arbitrary offsets between the
27779 TOC and the text section, so we have to load the offset from
27780 memory. The data field is emitted directly before the global
27781 entry point in rs6000_elf_declare_function_name. */
27782 char buf[256];
27783
27784 #ifdef HAVE_AS_ENTRY_MARKERS
27785 /* If supported by the linker, emit a marker relocation. If the
27786 total code size of the final executable or shared library
27787 happens to fit into 2 GB after all, the linker will replace
27788 this code sequence with the sequence for the small or medium
27789 code model. */
27790 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27791 #endif
27792 fprintf (file, "\tld 2,");
27793 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27794 assemble_name (file, buf);
27795 fprintf (file, "-");
27796 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27797 assemble_name (file, buf);
27798 fprintf (file, "(12)\n");
27799 fprintf (file, "\tadd 2,2,12\n");
27800 }
27801
27802 fputs ("\t.localentry\t", file);
27803 assemble_name (file, name);
27804 fputs (",.-", file);
27805 assemble_name (file, name);
27806 fputs ("\n", file);
27807 }
27808
27809 /* Output -mprofile-kernel code. This needs to be done here instead of
27810 in output_function_profile since it must go after the ELFv2 ABI
27811 local entry point. */
27812 if (TARGET_PROFILE_KERNEL && crtl->profile)
27813 {
27814 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27815 gcc_assert (!TARGET_32BIT);
27816
27817 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27818
27819 /* In the ELFv2 ABI we have no compiler stack word. It must be
27820 the resposibility of _mcount to preserve the static chain
27821 register if required. */
27822 if (DEFAULT_ABI != ABI_ELFv2
27823 && cfun->static_chain_decl != NULL)
27824 {
27825 asm_fprintf (file, "\tstd %s,24(%s)\n",
27826 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27827 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27828 asm_fprintf (file, "\tld %s,24(%s)\n",
27829 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27830 }
27831 else
27832 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27833 }
27834
27835 rs6000_pic_labelno++;
27836 }
27837
27838 /* -mprofile-kernel code calls mcount before the function prolog,
27839 so a profiled leaf function should stay a leaf function. */
27840 static bool
27841 rs6000_keep_leaf_when_profiled ()
27842 {
27843 return TARGET_PROFILE_KERNEL;
27844 }
27845
27846 /* Non-zero if vmx regs are restored before the frame pop, zero if
27847 we restore after the pop when possible. */
27848 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27849
27850 /* Restoring cr is a two step process: loading a reg from the frame
27851 save, then moving the reg to cr. For ABI_V4 we must let the
27852 unwinder know that the stack location is no longer valid at or
27853 before the stack deallocation, but we can't emit a cfa_restore for
27854 cr at the stack deallocation like we do for other registers.
27855 The trouble is that it is possible for the move to cr to be
27856 scheduled after the stack deallocation. So say exactly where cr
27857 is located on each of the two insns. */
27858
27859 static rtx
27860 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27861 {
27862 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27863 rtx reg = gen_rtx_REG (SImode, regno);
27864 rtx_insn *insn = emit_move_insn (reg, mem);
27865
27866 if (!exit_func && DEFAULT_ABI == ABI_V4)
27867 {
27868 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27869 rtx set = gen_rtx_SET (reg, cr);
27870
27871 add_reg_note (insn, REG_CFA_REGISTER, set);
27872 RTX_FRAME_RELATED_P (insn) = 1;
27873 }
27874 return reg;
27875 }
27876
27877 /* Reload CR from REG. */
27878
27879 static void
27880 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27881 {
27882 int count = 0;
27883 int i;
27884
27885 if (using_mfcr_multiple)
27886 {
27887 for (i = 0; i < 8; i++)
27888 if (save_reg_p (CR0_REGNO + i))
27889 count++;
27890 gcc_assert (count);
27891 }
27892
27893 if (using_mfcr_multiple && count > 1)
27894 {
27895 rtx_insn *insn;
27896 rtvec p;
27897 int ndx;
27898
27899 p = rtvec_alloc (count);
27900
27901 ndx = 0;
27902 for (i = 0; i < 8; i++)
27903 if (save_reg_p (CR0_REGNO + i))
27904 {
27905 rtvec r = rtvec_alloc (2);
27906 RTVEC_ELT (r, 0) = reg;
27907 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27908 RTVEC_ELT (p, ndx) =
27909 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27910 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27911 ndx++;
27912 }
27913 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27914 gcc_assert (ndx == count);
27915
27916 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27917 CR field separately. */
27918 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27919 {
27920 for (i = 0; i < 8; i++)
27921 if (save_reg_p (CR0_REGNO + i))
27922 add_reg_note (insn, REG_CFA_RESTORE,
27923 gen_rtx_REG (SImode, CR0_REGNO + i));
27924
27925 RTX_FRAME_RELATED_P (insn) = 1;
27926 }
27927 }
27928 else
27929 for (i = 0; i < 8; i++)
27930 if (save_reg_p (CR0_REGNO + i))
27931 {
27932 rtx insn = emit_insn (gen_movsi_to_cr_one
27933 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27934
27935 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27936 CR field separately, attached to the insn that in fact
27937 restores this particular CR field. */
27938 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27939 {
27940 add_reg_note (insn, REG_CFA_RESTORE,
27941 gen_rtx_REG (SImode, CR0_REGNO + i));
27942
27943 RTX_FRAME_RELATED_P (insn) = 1;
27944 }
27945 }
27946
27947 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27948 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27949 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27950 {
27951 rtx_insn *insn = get_last_insn ();
27952 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27953
27954 add_reg_note (insn, REG_CFA_RESTORE, cr);
27955 RTX_FRAME_RELATED_P (insn) = 1;
27956 }
27957 }
27958
27959 /* Like cr, the move to lr instruction can be scheduled after the
27960 stack deallocation, but unlike cr, its stack frame save is still
27961 valid. So we only need to emit the cfa_restore on the correct
27962 instruction. */
27963
27964 static void
27965 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27966 {
27967 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27968 rtx reg = gen_rtx_REG (Pmode, regno);
27969
27970 emit_move_insn (reg, mem);
27971 }
27972
27973 static void
27974 restore_saved_lr (int regno, bool exit_func)
27975 {
27976 rtx reg = gen_rtx_REG (Pmode, regno);
27977 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27978 rtx_insn *insn = emit_move_insn (lr, reg);
27979
27980 if (!exit_func && flag_shrink_wrap)
27981 {
27982 add_reg_note (insn, REG_CFA_RESTORE, lr);
27983 RTX_FRAME_RELATED_P (insn) = 1;
27984 }
27985 }
27986
27987 static rtx
27988 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27989 {
27990 if (DEFAULT_ABI == ABI_ELFv2)
27991 {
27992 int i;
27993 for (i = 0; i < 8; i++)
27994 if (save_reg_p (CR0_REGNO + i))
27995 {
27996 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27997 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27998 cfa_restores);
27999 }
28000 }
28001 else if (info->cr_save_p)
28002 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28003 gen_rtx_REG (SImode, CR2_REGNO),
28004 cfa_restores);
28005
28006 if (info->lr_save_p)
28007 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28008 gen_rtx_REG (Pmode, LR_REGNO),
28009 cfa_restores);
28010 return cfa_restores;
28011 }
28012
28013 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28014 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28015 below stack pointer not cloberred by signals. */
28016
28017 static inline bool
28018 offset_below_red_zone_p (HOST_WIDE_INT offset)
28019 {
28020 return offset < (DEFAULT_ABI == ABI_V4
28021 ? 0
28022 : TARGET_32BIT ? -220 : -288);
28023 }
28024
28025 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28026
28027 static void
28028 emit_cfa_restores (rtx cfa_restores)
28029 {
28030 rtx_insn *insn = get_last_insn ();
28031 rtx *loc = &REG_NOTES (insn);
28032
28033 while (*loc)
28034 loc = &XEXP (*loc, 1);
28035 *loc = cfa_restores;
28036 RTX_FRAME_RELATED_P (insn) = 1;
28037 }
28038
28039 /* Emit function epilogue as insns. */
28040
28041 void
28042 rs6000_emit_epilogue (int sibcall)
28043 {
28044 rs6000_stack_t *info;
28045 int restoring_GPRs_inline;
28046 int restoring_FPRs_inline;
28047 int using_load_multiple;
28048 int using_mtcr_multiple;
28049 int use_backchain_to_restore_sp;
28050 int restore_lr;
28051 int strategy;
28052 HOST_WIDE_INT frame_off = 0;
28053 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28054 rtx frame_reg_rtx = sp_reg_rtx;
28055 rtx cfa_restores = NULL_RTX;
28056 rtx insn;
28057 rtx cr_save_reg = NULL_RTX;
28058 machine_mode reg_mode = Pmode;
28059 int reg_size = TARGET_32BIT ? 4 : 8;
28060 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
28061 ? DFmode : SFmode;
28062 int fp_reg_size = 8;
28063 int i;
28064 bool exit_func;
28065 unsigned ptr_regno;
28066
28067 info = rs6000_stack_info ();
28068
28069 strategy = info->savres_strategy;
28070 using_load_multiple = strategy & REST_MULTIPLE;
28071 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28072 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28073 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
28074 || rs6000_cpu == PROCESSOR_PPC603
28075 || rs6000_cpu == PROCESSOR_PPC750
28076 || optimize_size);
28077 /* Restore via the backchain when we have a large frame, since this
28078 is more efficient than an addis, addi pair. The second condition
28079 here will not trigger at the moment; We don't actually need a
28080 frame pointer for alloca, but the generic parts of the compiler
28081 give us one anyway. */
28082 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28083 ? info->lr_save_offset
28084 : 0) > 32767
28085 || (cfun->calls_alloca
28086 && !frame_pointer_needed));
28087 restore_lr = (info->lr_save_p
28088 && (restoring_FPRs_inline
28089 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28090 && (restoring_GPRs_inline
28091 || info->first_fp_reg_save < 64)
28092 && !cfun->machine->lr_is_wrapped_separately);
28093
28094
28095 if (WORLD_SAVE_P (info))
28096 {
28097 int i, j;
28098 char rname[30];
28099 const char *alloc_rname;
28100 rtvec p;
28101
28102 /* eh_rest_world_r10 will return to the location saved in the LR
28103 stack slot (which is not likely to be our caller.)
28104 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28105 rest_world is similar, except any R10 parameter is ignored.
28106 The exception-handling stuff that was here in 2.95 is no
28107 longer necessary. */
28108
28109 p = rtvec_alloc (9
28110 + 32 - info->first_gp_reg_save
28111 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28112 + 63 + 1 - info->first_fp_reg_save);
28113
28114 strcpy (rname, ((crtl->calls_eh_return) ?
28115 "*eh_rest_world_r10" : "*rest_world"));
28116 alloc_rname = ggc_strdup (rname);
28117
28118 j = 0;
28119 RTVEC_ELT (p, j++) = ret_rtx;
28120 RTVEC_ELT (p, j++)
28121 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28122 /* The instruction pattern requires a clobber here;
28123 it is shared with the restVEC helper. */
28124 RTVEC_ELT (p, j++)
28125 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
28126
28127 {
28128 /* CR register traditionally saved as CR2. */
28129 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28130 RTVEC_ELT (p, j++)
28131 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28132 if (flag_shrink_wrap)
28133 {
28134 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28135 gen_rtx_REG (Pmode, LR_REGNO),
28136 cfa_restores);
28137 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28138 }
28139 }
28140
28141 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28142 {
28143 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28144 RTVEC_ELT (p, j++)
28145 = gen_frame_load (reg,
28146 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28147 if (flag_shrink_wrap)
28148 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28149 }
28150 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28151 {
28152 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28153 RTVEC_ELT (p, j++)
28154 = gen_frame_load (reg,
28155 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28156 if (flag_shrink_wrap)
28157 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28158 }
28159 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28160 {
28161 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28162 ? DFmode : SFmode),
28163 info->first_fp_reg_save + i);
28164 RTVEC_ELT (p, j++)
28165 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28166 if (flag_shrink_wrap)
28167 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28168 }
28169 RTVEC_ELT (p, j++)
28170 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28171 RTVEC_ELT (p, j++)
28172 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28173 RTVEC_ELT (p, j++)
28174 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28175 RTVEC_ELT (p, j++)
28176 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28177 RTVEC_ELT (p, j++)
28178 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28179 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28180
28181 if (flag_shrink_wrap)
28182 {
28183 REG_NOTES (insn) = cfa_restores;
28184 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28185 RTX_FRAME_RELATED_P (insn) = 1;
28186 }
28187 return;
28188 }
28189
28190 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28191 if (info->push_p)
28192 frame_off = info->total_size;
28193
28194 /* Restore AltiVec registers if we must do so before adjusting the
28195 stack. */
28196 if (info->altivec_size != 0
28197 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28198 || (DEFAULT_ABI != ABI_V4
28199 && offset_below_red_zone_p (info->altivec_save_offset))))
28200 {
28201 int i;
28202 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28203
28204 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28205 if (use_backchain_to_restore_sp)
28206 {
28207 int frame_regno = 11;
28208
28209 if ((strategy & REST_INLINE_VRS) == 0)
28210 {
28211 /* Of r11 and r12, select the one not clobbered by an
28212 out-of-line restore function for the frame register. */
28213 frame_regno = 11 + 12 - scratch_regno;
28214 }
28215 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28216 emit_move_insn (frame_reg_rtx,
28217 gen_rtx_MEM (Pmode, sp_reg_rtx));
28218 frame_off = 0;
28219 }
28220 else if (frame_pointer_needed)
28221 frame_reg_rtx = hard_frame_pointer_rtx;
28222
28223 if ((strategy & REST_INLINE_VRS) == 0)
28224 {
28225 int end_save = info->altivec_save_offset + info->altivec_size;
28226 int ptr_off;
28227 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28228 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28229
28230 if (end_save + frame_off != 0)
28231 {
28232 rtx offset = GEN_INT (end_save + frame_off);
28233
28234 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28235 }
28236 else
28237 emit_move_insn (ptr_reg, frame_reg_rtx);
28238
28239 ptr_off = -end_save;
28240 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28241 info->altivec_save_offset + ptr_off,
28242 0, V4SImode, SAVRES_VR);
28243 }
28244 else
28245 {
28246 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28247 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28248 {
28249 rtx addr, areg, mem, insn;
28250 rtx reg = gen_rtx_REG (V4SImode, i);
28251 HOST_WIDE_INT offset
28252 = (info->altivec_save_offset + frame_off
28253 + 16 * (i - info->first_altivec_reg_save));
28254
28255 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28256 {
28257 mem = gen_frame_mem (V4SImode,
28258 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28259 GEN_INT (offset)));
28260 insn = gen_rtx_SET (reg, mem);
28261 }
28262 else
28263 {
28264 areg = gen_rtx_REG (Pmode, 0);
28265 emit_move_insn (areg, GEN_INT (offset));
28266
28267 /* AltiVec addressing mode is [reg+reg]. */
28268 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28269 mem = gen_frame_mem (V4SImode, addr);
28270
28271 /* Rather than emitting a generic move, force use of the
28272 lvx instruction, which we always want. In particular we
28273 don't want lxvd2x/xxpermdi for little endian. */
28274 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28275 }
28276
28277 (void) emit_insn (insn);
28278 }
28279 }
28280
28281 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28282 if (((strategy & REST_INLINE_VRS) == 0
28283 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28284 && (flag_shrink_wrap
28285 || (offset_below_red_zone_p
28286 (info->altivec_save_offset
28287 + 16 * (i - info->first_altivec_reg_save)))))
28288 {
28289 rtx reg = gen_rtx_REG (V4SImode, i);
28290 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28291 }
28292 }
28293
28294 /* Restore VRSAVE if we must do so before adjusting the stack. */
28295 if (info->vrsave_size != 0
28296 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28297 || (DEFAULT_ABI != ABI_V4
28298 && offset_below_red_zone_p (info->vrsave_save_offset))))
28299 {
28300 rtx reg;
28301
28302 if (frame_reg_rtx == sp_reg_rtx)
28303 {
28304 if (use_backchain_to_restore_sp)
28305 {
28306 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28307 emit_move_insn (frame_reg_rtx,
28308 gen_rtx_MEM (Pmode, sp_reg_rtx));
28309 frame_off = 0;
28310 }
28311 else if (frame_pointer_needed)
28312 frame_reg_rtx = hard_frame_pointer_rtx;
28313 }
28314
28315 reg = gen_rtx_REG (SImode, 12);
28316 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28317 info->vrsave_save_offset + frame_off));
28318
28319 emit_insn (generate_set_vrsave (reg, info, 1));
28320 }
28321
28322 insn = NULL_RTX;
28323 /* If we have a large stack frame, restore the old stack pointer
28324 using the backchain. */
28325 if (use_backchain_to_restore_sp)
28326 {
28327 if (frame_reg_rtx == sp_reg_rtx)
28328 {
28329 /* Under V.4, don't reset the stack pointer until after we're done
28330 loading the saved registers. */
28331 if (DEFAULT_ABI == ABI_V4)
28332 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28333
28334 insn = emit_move_insn (frame_reg_rtx,
28335 gen_rtx_MEM (Pmode, sp_reg_rtx));
28336 frame_off = 0;
28337 }
28338 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28339 && DEFAULT_ABI == ABI_V4)
28340 /* frame_reg_rtx has been set up by the altivec restore. */
28341 ;
28342 else
28343 {
28344 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28345 frame_reg_rtx = sp_reg_rtx;
28346 }
28347 }
28348 /* If we have a frame pointer, we can restore the old stack pointer
28349 from it. */
28350 else if (frame_pointer_needed)
28351 {
28352 frame_reg_rtx = sp_reg_rtx;
28353 if (DEFAULT_ABI == ABI_V4)
28354 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28355 /* Prevent reordering memory accesses against stack pointer restore. */
28356 else if (cfun->calls_alloca
28357 || offset_below_red_zone_p (-info->total_size))
28358 rs6000_emit_stack_tie (frame_reg_rtx, true);
28359
28360 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28361 GEN_INT (info->total_size)));
28362 frame_off = 0;
28363 }
28364 else if (info->push_p
28365 && DEFAULT_ABI != ABI_V4
28366 && !crtl->calls_eh_return)
28367 {
28368 /* Prevent reordering memory accesses against stack pointer restore. */
28369 if (cfun->calls_alloca
28370 || offset_below_red_zone_p (-info->total_size))
28371 rs6000_emit_stack_tie (frame_reg_rtx, false);
28372 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28373 GEN_INT (info->total_size)));
28374 frame_off = 0;
28375 }
28376 if (insn && frame_reg_rtx == sp_reg_rtx)
28377 {
28378 if (cfa_restores)
28379 {
28380 REG_NOTES (insn) = cfa_restores;
28381 cfa_restores = NULL_RTX;
28382 }
28383 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28384 RTX_FRAME_RELATED_P (insn) = 1;
28385 }
28386
28387 /* Restore AltiVec registers if we have not done so already. */
28388 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28389 && info->altivec_size != 0
28390 && (DEFAULT_ABI == ABI_V4
28391 || !offset_below_red_zone_p (info->altivec_save_offset)))
28392 {
28393 int i;
28394
28395 if ((strategy & REST_INLINE_VRS) == 0)
28396 {
28397 int end_save = info->altivec_save_offset + info->altivec_size;
28398 int ptr_off;
28399 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28400 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28401 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28402
28403 if (end_save + frame_off != 0)
28404 {
28405 rtx offset = GEN_INT (end_save + frame_off);
28406
28407 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28408 }
28409 else
28410 emit_move_insn (ptr_reg, frame_reg_rtx);
28411
28412 ptr_off = -end_save;
28413 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28414 info->altivec_save_offset + ptr_off,
28415 0, V4SImode, SAVRES_VR);
28416 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28417 {
28418 /* Frame reg was clobbered by out-of-line save. Restore it
28419 from ptr_reg, and if we are calling out-of-line gpr or
28420 fpr restore set up the correct pointer and offset. */
28421 unsigned newptr_regno = 1;
28422 if (!restoring_GPRs_inline)
28423 {
28424 bool lr = info->gp_save_offset + info->gp_size == 0;
28425 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28426 newptr_regno = ptr_regno_for_savres (sel);
28427 end_save = info->gp_save_offset + info->gp_size;
28428 }
28429 else if (!restoring_FPRs_inline)
28430 {
28431 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28432 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28433 newptr_regno = ptr_regno_for_savres (sel);
28434 end_save = info->fp_save_offset + info->fp_size;
28435 }
28436
28437 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28438 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28439
28440 if (end_save + ptr_off != 0)
28441 {
28442 rtx offset = GEN_INT (end_save + ptr_off);
28443
28444 frame_off = -end_save;
28445 if (TARGET_32BIT)
28446 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28447 ptr_reg, offset));
28448 else
28449 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28450 ptr_reg, offset));
28451 }
28452 else
28453 {
28454 frame_off = ptr_off;
28455 emit_move_insn (frame_reg_rtx, ptr_reg);
28456 }
28457 }
28458 }
28459 else
28460 {
28461 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28462 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28463 {
28464 rtx addr, areg, mem, insn;
28465 rtx reg = gen_rtx_REG (V4SImode, i);
28466 HOST_WIDE_INT offset
28467 = (info->altivec_save_offset + frame_off
28468 + 16 * (i - info->first_altivec_reg_save));
28469
28470 if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset))
28471 {
28472 mem = gen_frame_mem (V4SImode,
28473 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28474 GEN_INT (offset)));
28475 insn = gen_rtx_SET (reg, mem);
28476 }
28477 else
28478 {
28479 areg = gen_rtx_REG (Pmode, 0);
28480 emit_move_insn (areg, GEN_INT (offset));
28481
28482 /* AltiVec addressing mode is [reg+reg]. */
28483 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28484 mem = gen_frame_mem (V4SImode, addr);
28485
28486 /* Rather than emitting a generic move, force use of the
28487 lvx instruction, which we always want. In particular we
28488 don't want lxvd2x/xxpermdi for little endian. */
28489 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28490 }
28491
28492 (void) emit_insn (insn);
28493 }
28494 }
28495
28496 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28497 if (((strategy & REST_INLINE_VRS) == 0
28498 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28499 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28500 {
28501 rtx reg = gen_rtx_REG (V4SImode, i);
28502 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28503 }
28504 }
28505
28506 /* Restore VRSAVE if we have not done so already. */
28507 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28508 && info->vrsave_size != 0
28509 && (DEFAULT_ABI == ABI_V4
28510 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28511 {
28512 rtx reg;
28513
28514 reg = gen_rtx_REG (SImode, 12);
28515 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28516 info->vrsave_save_offset + frame_off));
28517
28518 emit_insn (generate_set_vrsave (reg, info, 1));
28519 }
28520
28521 /* If we exit by an out-of-line restore function on ABI_V4 then that
28522 function will deallocate the stack, so we don't need to worry
28523 about the unwinder restoring cr from an invalid stack frame
28524 location. */
28525 exit_func = (!restoring_FPRs_inline
28526 || (!restoring_GPRs_inline
28527 && info->first_fp_reg_save == 64));
28528
28529 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28530 *separate* slots if the routine calls __builtin_eh_return, so
28531 that they can be independently restored by the unwinder. */
28532 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28533 {
28534 int i, cr_off = info->ehcr_offset;
28535
28536 for (i = 0; i < 8; i++)
28537 if (!call_used_regs[CR0_REGNO + i])
28538 {
28539 rtx reg = gen_rtx_REG (SImode, 0);
28540 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28541 cr_off + frame_off));
28542
28543 insn = emit_insn (gen_movsi_to_cr_one
28544 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28545
28546 if (!exit_func && flag_shrink_wrap)
28547 {
28548 add_reg_note (insn, REG_CFA_RESTORE,
28549 gen_rtx_REG (SImode, CR0_REGNO + i));
28550
28551 RTX_FRAME_RELATED_P (insn) = 1;
28552 }
28553
28554 cr_off += reg_size;
28555 }
28556 }
28557
28558 /* Get the old lr if we saved it. If we are restoring registers
28559 out-of-line, then the out-of-line routines can do this for us. */
28560 if (restore_lr && restoring_GPRs_inline)
28561 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28562
28563 /* Get the old cr if we saved it. */
28564 if (info->cr_save_p)
28565 {
28566 unsigned cr_save_regno = 12;
28567
28568 if (!restoring_GPRs_inline)
28569 {
28570 /* Ensure we don't use the register used by the out-of-line
28571 gpr register restore below. */
28572 bool lr = info->gp_save_offset + info->gp_size == 0;
28573 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28574 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28575
28576 if (gpr_ptr_regno == 12)
28577 cr_save_regno = 11;
28578 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28579 }
28580 else if (REGNO (frame_reg_rtx) == 12)
28581 cr_save_regno = 11;
28582
28583 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28584 info->cr_save_offset + frame_off,
28585 exit_func);
28586 }
28587
28588 /* Set LR here to try to overlap restores below. */
28589 if (restore_lr && restoring_GPRs_inline)
28590 restore_saved_lr (0, exit_func);
28591
28592 /* Load exception handler data registers, if needed. */
28593 if (crtl->calls_eh_return)
28594 {
28595 unsigned int i, regno;
28596
28597 if (TARGET_AIX)
28598 {
28599 rtx reg = gen_rtx_REG (reg_mode, 2);
28600 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28601 frame_off + RS6000_TOC_SAVE_SLOT));
28602 }
28603
28604 for (i = 0; ; ++i)
28605 {
28606 rtx mem;
28607
28608 regno = EH_RETURN_DATA_REGNO (i);
28609 if (regno == INVALID_REGNUM)
28610 break;
28611
28612 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28613 info->ehrd_offset + frame_off
28614 + reg_size * (int) i);
28615
28616 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28617 }
28618 }
28619
28620 /* Restore GPRs. This is done as a PARALLEL if we are using
28621 the load-multiple instructions. */
28622 if (!restoring_GPRs_inline)
28623 {
28624 /* We are jumping to an out-of-line function. */
28625 rtx ptr_reg;
28626 int end_save = info->gp_save_offset + info->gp_size;
28627 bool can_use_exit = end_save == 0;
28628 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28629 int ptr_off;
28630
28631 /* Emit stack reset code if we need it. */
28632 ptr_regno = ptr_regno_for_savres (sel);
28633 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28634 if (can_use_exit)
28635 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28636 else if (end_save + frame_off != 0)
28637 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28638 GEN_INT (end_save + frame_off)));
28639 else if (REGNO (frame_reg_rtx) != ptr_regno)
28640 emit_move_insn (ptr_reg, frame_reg_rtx);
28641 if (REGNO (frame_reg_rtx) == ptr_regno)
28642 frame_off = -end_save;
28643
28644 if (can_use_exit && info->cr_save_p)
28645 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28646
28647 ptr_off = -end_save;
28648 rs6000_emit_savres_rtx (info, ptr_reg,
28649 info->gp_save_offset + ptr_off,
28650 info->lr_save_offset + ptr_off,
28651 reg_mode, sel);
28652 }
28653 else if (using_load_multiple)
28654 {
28655 rtvec p;
28656 p = rtvec_alloc (32 - info->first_gp_reg_save);
28657 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28658 RTVEC_ELT (p, i)
28659 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28660 frame_reg_rtx,
28661 info->gp_save_offset + frame_off + reg_size * i);
28662 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28663 }
28664 else
28665 {
28666 int offset = info->gp_save_offset + frame_off;
28667 for (i = info->first_gp_reg_save; i < 32; i++)
28668 {
28669 if (rs6000_reg_live_or_pic_offset_p (i)
28670 && !cfun->machine->gpr_is_wrapped_separately[i])
28671 {
28672 rtx reg = gen_rtx_REG (reg_mode, i);
28673 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28674 }
28675
28676 offset += reg_size;
28677 }
28678 }
28679
28680 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28681 {
28682 /* If the frame pointer was used then we can't delay emitting
28683 a REG_CFA_DEF_CFA note. This must happen on the insn that
28684 restores the frame pointer, r31. We may have already emitted
28685 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28686 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28687 be harmless if emitted. */
28688 if (frame_pointer_needed)
28689 {
28690 insn = get_last_insn ();
28691 add_reg_note (insn, REG_CFA_DEF_CFA,
28692 plus_constant (Pmode, frame_reg_rtx, frame_off));
28693 RTX_FRAME_RELATED_P (insn) = 1;
28694 }
28695
28696 /* Set up cfa_restores. We always need these when
28697 shrink-wrapping. If not shrink-wrapping then we only need
28698 the cfa_restore when the stack location is no longer valid.
28699 The cfa_restores must be emitted on or before the insn that
28700 invalidates the stack, and of course must not be emitted
28701 before the insn that actually does the restore. The latter
28702 is why it is a bad idea to emit the cfa_restores as a group
28703 on the last instruction here that actually does a restore:
28704 That insn may be reordered with respect to others doing
28705 restores. */
28706 if (flag_shrink_wrap
28707 && !restoring_GPRs_inline
28708 && info->first_fp_reg_save == 64)
28709 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28710
28711 for (i = info->first_gp_reg_save; i < 32; i++)
28712 if (!restoring_GPRs_inline
28713 || using_load_multiple
28714 || rs6000_reg_live_or_pic_offset_p (i))
28715 {
28716 if (cfun->machine->gpr_is_wrapped_separately[i])
28717 continue;
28718
28719 rtx reg = gen_rtx_REG (reg_mode, i);
28720 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28721 }
28722 }
28723
28724 if (!restoring_GPRs_inline
28725 && info->first_fp_reg_save == 64)
28726 {
28727 /* We are jumping to an out-of-line function. */
28728 if (cfa_restores)
28729 emit_cfa_restores (cfa_restores);
28730 return;
28731 }
28732
28733 if (restore_lr && !restoring_GPRs_inline)
28734 {
28735 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28736 restore_saved_lr (0, exit_func);
28737 }
28738
28739 /* Restore fpr's if we need to do it without calling a function. */
28740 if (restoring_FPRs_inline)
28741 {
28742 int offset = info->fp_save_offset + frame_off;
28743 for (i = info->first_fp_reg_save; i < 64; i++)
28744 {
28745 if (save_reg_p (i)
28746 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28747 {
28748 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28749 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28750 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28751 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28752 cfa_restores);
28753 }
28754
28755 offset += fp_reg_size;
28756 }
28757 }
28758
28759 /* If we saved cr, restore it here. Just those that were used. */
28760 if (info->cr_save_p)
28761 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28762
28763 /* If this is V.4, unwind the stack pointer after all of the loads
28764 have been done, or set up r11 if we are restoring fp out of line. */
28765 ptr_regno = 1;
28766 if (!restoring_FPRs_inline)
28767 {
28768 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28769 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28770 ptr_regno = ptr_regno_for_savres (sel);
28771 }
28772
28773 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28774 if (REGNO (frame_reg_rtx) == ptr_regno)
28775 frame_off = 0;
28776
28777 if (insn && restoring_FPRs_inline)
28778 {
28779 if (cfa_restores)
28780 {
28781 REG_NOTES (insn) = cfa_restores;
28782 cfa_restores = NULL_RTX;
28783 }
28784 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28785 RTX_FRAME_RELATED_P (insn) = 1;
28786 }
28787
28788 if (crtl->calls_eh_return)
28789 {
28790 rtx sa = EH_RETURN_STACKADJ_RTX;
28791 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28792 }
28793
28794 if (!sibcall && restoring_FPRs_inline)
28795 {
28796 if (cfa_restores)
28797 {
28798 /* We can't hang the cfa_restores off a simple return,
28799 since the shrink-wrap code sometimes uses an existing
28800 return. This means there might be a path from
28801 pre-prologue code to this return, and dwarf2cfi code
28802 wants the eh_frame unwinder state to be the same on
28803 all paths to any point. So we need to emit the
28804 cfa_restores before the return. For -m64 we really
28805 don't need epilogue cfa_restores at all, except for
28806 this irritating dwarf2cfi with shrink-wrap
28807 requirement; The stack red-zone means eh_frame info
28808 from the prologue telling the unwinder to restore
28809 from the stack is perfectly good right to the end of
28810 the function. */
28811 emit_insn (gen_blockage ());
28812 emit_cfa_restores (cfa_restores);
28813 cfa_restores = NULL_RTX;
28814 }
28815
28816 emit_jump_insn (targetm.gen_simple_return ());
28817 }
28818
28819 if (!sibcall && !restoring_FPRs_inline)
28820 {
28821 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28822 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28823 int elt = 0;
28824 RTVEC_ELT (p, elt++) = ret_rtx;
28825 if (lr)
28826 RTVEC_ELT (p, elt++)
28827 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28828
28829 /* We have to restore more than two FP registers, so branch to the
28830 restore function. It will return to our caller. */
28831 int i;
28832 int reg;
28833 rtx sym;
28834
28835 if (flag_shrink_wrap)
28836 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28837
28838 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28839 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28840 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28841 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28842
28843 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28844 {
28845 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28846
28847 RTVEC_ELT (p, elt++)
28848 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28849 if (flag_shrink_wrap)
28850 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28851 }
28852
28853 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28854 }
28855
28856 if (cfa_restores)
28857 {
28858 if (sibcall)
28859 /* Ensure the cfa_restores are hung off an insn that won't
28860 be reordered above other restores. */
28861 emit_insn (gen_blockage ());
28862
28863 emit_cfa_restores (cfa_restores);
28864 }
28865 }
28866
28867 /* Write function epilogue. */
28868
28869 static void
28870 rs6000_output_function_epilogue (FILE *file,
28871 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
28872 {
28873 #if TARGET_MACHO
28874 macho_branch_islands ();
28875
28876 {
28877 rtx_insn *insn = get_last_insn ();
28878 rtx_insn *deleted_debug_label = NULL;
28879
28880 /* Mach-O doesn't support labels at the end of objects, so if
28881 it looks like we might want one, take special action.
28882
28883 First, collect any sequence of deleted debug labels. */
28884 while (insn
28885 && NOTE_P (insn)
28886 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28887 {
28888 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28889 notes only, instead set their CODE_LABEL_NUMBER to -1,
28890 otherwise there would be code generation differences
28891 in between -g and -g0. */
28892 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28893 deleted_debug_label = insn;
28894 insn = PREV_INSN (insn);
28895 }
28896
28897 /* Second, if we have:
28898 label:
28899 barrier
28900 then this needs to be detected, so skip past the barrier. */
28901
28902 if (insn && BARRIER_P (insn))
28903 insn = PREV_INSN (insn);
28904
28905 /* Up to now we've only seen notes or barriers. */
28906 if (insn)
28907 {
28908 if (LABEL_P (insn)
28909 || (NOTE_P (insn)
28910 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28911 /* Trailing label: <barrier>. */
28912 fputs ("\tnop\n", file);
28913 else
28914 {
28915 /* Lastly, see if we have a completely empty function body. */
28916 while (insn && ! INSN_P (insn))
28917 insn = PREV_INSN (insn);
28918 /* If we don't find any insns, we've got an empty function body;
28919 I.e. completely empty - without a return or branch. This is
28920 taken as the case where a function body has been removed
28921 because it contains an inline __builtin_unreachable(). GCC
28922 states that reaching __builtin_unreachable() means UB so we're
28923 not obliged to do anything special; however, we want
28924 non-zero-sized function bodies. To meet this, and help the
28925 user out, let's trap the case. */
28926 if (insn == NULL)
28927 fputs ("\ttrap\n", file);
28928 }
28929 }
28930 else if (deleted_debug_label)
28931 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28932 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28933 CODE_LABEL_NUMBER (insn) = -1;
28934 }
28935 #endif
28936
28937 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28938 on its format.
28939
28940 We don't output a traceback table if -finhibit-size-directive was
28941 used. The documentation for -finhibit-size-directive reads
28942 ``don't output a @code{.size} assembler directive, or anything
28943 else that would cause trouble if the function is split in the
28944 middle, and the two halves are placed at locations far apart in
28945 memory.'' The traceback table has this property, since it
28946 includes the offset from the start of the function to the
28947 traceback table itself.
28948
28949 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28950 different traceback table. */
28951 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28952 && ! flag_inhibit_size_directive
28953 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28954 {
28955 const char *fname = NULL;
28956 const char *language_string = lang_hooks.name;
28957 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28958 int i;
28959 int optional_tbtab;
28960 rs6000_stack_t *info = rs6000_stack_info ();
28961
28962 if (rs6000_traceback == traceback_full)
28963 optional_tbtab = 1;
28964 else if (rs6000_traceback == traceback_part)
28965 optional_tbtab = 0;
28966 else
28967 optional_tbtab = !optimize_size && !TARGET_ELF;
28968
28969 if (optional_tbtab)
28970 {
28971 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28972 while (*fname == '.') /* V.4 encodes . in the name */
28973 fname++;
28974
28975 /* Need label immediately before tbtab, so we can compute
28976 its offset from the function start. */
28977 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28978 ASM_OUTPUT_LABEL (file, fname);
28979 }
28980
28981 /* The .tbtab pseudo-op can only be used for the first eight
28982 expressions, since it can't handle the possibly variable
28983 length fields that follow. However, if you omit the optional
28984 fields, the assembler outputs zeros for all optional fields
28985 anyways, giving each variable length field is minimum length
28986 (as defined in sys/debug.h). Thus we can not use the .tbtab
28987 pseudo-op at all. */
28988
28989 /* An all-zero word flags the start of the tbtab, for debuggers
28990 that have to find it by searching forward from the entry
28991 point or from the current pc. */
28992 fputs ("\t.long 0\n", file);
28993
28994 /* Tbtab format type. Use format type 0. */
28995 fputs ("\t.byte 0,", file);
28996
28997 /* Language type. Unfortunately, there does not seem to be any
28998 official way to discover the language being compiled, so we
28999 use language_string.
29000 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
29001 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29002 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
29003 either, so for now use 0. */
29004 if (lang_GNU_C ()
29005 || ! strcmp (language_string, "GNU GIMPLE")
29006 || ! strcmp (language_string, "GNU Go")
29007 || ! strcmp (language_string, "libgccjit"))
29008 i = 0;
29009 else if (! strcmp (language_string, "GNU F77")
29010 || lang_GNU_Fortran ())
29011 i = 1;
29012 else if (! strcmp (language_string, "GNU Pascal"))
29013 i = 2;
29014 else if (! strcmp (language_string, "GNU Ada"))
29015 i = 3;
29016 else if (lang_GNU_CXX ()
29017 || ! strcmp (language_string, "GNU Objective-C++"))
29018 i = 9;
29019 else if (! strcmp (language_string, "GNU Java"))
29020 i = 13;
29021 else if (! strcmp (language_string, "GNU Objective-C"))
29022 i = 14;
29023 else
29024 gcc_unreachable ();
29025 fprintf (file, "%d,", i);
29026
29027 /* 8 single bit fields: global linkage (not set for C extern linkage,
29028 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29029 from start of procedure stored in tbtab, internal function, function
29030 has controlled storage, function has no toc, function uses fp,
29031 function logs/aborts fp operations. */
29032 /* Assume that fp operations are used if any fp reg must be saved. */
29033 fprintf (file, "%d,",
29034 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29035
29036 /* 6 bitfields: function is interrupt handler, name present in
29037 proc table, function calls alloca, on condition directives
29038 (controls stack walks, 3 bits), saves condition reg, saves
29039 link reg. */
29040 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29041 set up as a frame pointer, even when there is no alloca call. */
29042 fprintf (file, "%d,",
29043 ((optional_tbtab << 6)
29044 | ((optional_tbtab & frame_pointer_needed) << 5)
29045 | (info->cr_save_p << 1)
29046 | (info->lr_save_p)));
29047
29048 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29049 (6 bits). */
29050 fprintf (file, "%d,",
29051 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29052
29053 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29054 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29055
29056 if (optional_tbtab)
29057 {
29058 /* Compute the parameter info from the function decl argument
29059 list. */
29060 tree decl;
29061 int next_parm_info_bit = 31;
29062
29063 for (decl = DECL_ARGUMENTS (current_function_decl);
29064 decl; decl = DECL_CHAIN (decl))
29065 {
29066 rtx parameter = DECL_INCOMING_RTL (decl);
29067 machine_mode mode = GET_MODE (parameter);
29068
29069 if (GET_CODE (parameter) == REG)
29070 {
29071 if (SCALAR_FLOAT_MODE_P (mode))
29072 {
29073 int bits;
29074
29075 float_parms++;
29076
29077 switch (mode)
29078 {
29079 case SFmode:
29080 case SDmode:
29081 bits = 0x2;
29082 break;
29083
29084 case DFmode:
29085 case DDmode:
29086 case TFmode:
29087 case TDmode:
29088 case IFmode:
29089 case KFmode:
29090 bits = 0x3;
29091 break;
29092
29093 default:
29094 gcc_unreachable ();
29095 }
29096
29097 /* If only one bit will fit, don't or in this entry. */
29098 if (next_parm_info_bit > 0)
29099 parm_info |= (bits << (next_parm_info_bit - 1));
29100 next_parm_info_bit -= 2;
29101 }
29102 else
29103 {
29104 fixed_parms += ((GET_MODE_SIZE (mode)
29105 + (UNITS_PER_WORD - 1))
29106 / UNITS_PER_WORD);
29107 next_parm_info_bit -= 1;
29108 }
29109 }
29110 }
29111 }
29112
29113 /* Number of fixed point parameters. */
29114 /* This is actually the number of words of fixed point parameters; thus
29115 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29116 fprintf (file, "%d,", fixed_parms);
29117
29118 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29119 all on stack. */
29120 /* This is actually the number of fp registers that hold parameters;
29121 and thus the maximum value is 13. */
29122 /* Set parameters on stack bit if parameters are not in their original
29123 registers, regardless of whether they are on the stack? Xlc
29124 seems to set the bit when not optimizing. */
29125 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29126
29127 if (optional_tbtab)
29128 {
29129 /* Optional fields follow. Some are variable length. */
29130
29131 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
29132 float, 11 double float. */
29133 /* There is an entry for each parameter in a register, in the order
29134 that they occur in the parameter list. Any intervening arguments
29135 on the stack are ignored. If the list overflows a long (max
29136 possible length 34 bits) then completely leave off all elements
29137 that don't fit. */
29138 /* Only emit this long if there was at least one parameter. */
29139 if (fixed_parms || float_parms)
29140 fprintf (file, "\t.long %d\n", parm_info);
29141
29142 /* Offset from start of code to tb table. */
29143 fputs ("\t.long ", file);
29144 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29145 RS6000_OUTPUT_BASENAME (file, fname);
29146 putc ('-', file);
29147 rs6000_output_function_entry (file, fname);
29148 putc ('\n', file);
29149
29150 /* Interrupt handler mask. */
29151 /* Omit this long, since we never set the interrupt handler bit
29152 above. */
29153
29154 /* Number of CTL (controlled storage) anchors. */
29155 /* Omit this long, since the has_ctl bit is never set above. */
29156
29157 /* Displacement into stack of each CTL anchor. */
29158 /* Omit this list of longs, because there are no CTL anchors. */
29159
29160 /* Length of function name. */
29161 if (*fname == '*')
29162 ++fname;
29163 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29164
29165 /* Function name. */
29166 assemble_string (fname, strlen (fname));
29167
29168 /* Register for alloca automatic storage; this is always reg 31.
29169 Only emit this if the alloca bit was set above. */
29170 if (frame_pointer_needed)
29171 fputs ("\t.byte 31\n", file);
29172
29173 fputs ("\t.align 2\n", file);
29174 }
29175 }
29176
29177 /* Arrange to define .LCTOC1 label, if not already done. */
29178 if (need_toc_init)
29179 {
29180 need_toc_init = 0;
29181 if (!toc_initialized)
29182 {
29183 switch_to_section (toc_section);
29184 switch_to_section (current_function_section ());
29185 }
29186 }
29187 }
29188
29189 /* -fsplit-stack support. */
29190
29191 /* A SYMBOL_REF for __morestack. */
29192 static GTY(()) rtx morestack_ref;
29193
29194 static rtx
29195 gen_add3_const (rtx rt, rtx ra, long c)
29196 {
29197 if (TARGET_64BIT)
29198 return gen_adddi3 (rt, ra, GEN_INT (c));
29199 else
29200 return gen_addsi3 (rt, ra, GEN_INT (c));
29201 }
29202
29203 /* Emit -fsplit-stack prologue, which goes before the regular function
29204 prologue (at local entry point in the case of ELFv2). */
29205
29206 void
29207 rs6000_expand_split_stack_prologue (void)
29208 {
29209 rs6000_stack_t *info = rs6000_stack_info ();
29210 unsigned HOST_WIDE_INT allocate;
29211 long alloc_hi, alloc_lo;
29212 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29213 rtx_insn *insn;
29214
29215 gcc_assert (flag_split_stack && reload_completed);
29216
29217 if (!info->push_p)
29218 return;
29219
29220 if (global_regs[29])
29221 {
29222 error ("-fsplit-stack uses register r29");
29223 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29224 "conflicts with %qD", global_regs_decl[29]);
29225 }
29226
29227 allocate = info->total_size;
29228 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29229 {
29230 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29231 return;
29232 }
29233 if (morestack_ref == NULL_RTX)
29234 {
29235 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29236 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29237 | SYMBOL_FLAG_FUNCTION);
29238 }
29239
29240 r0 = gen_rtx_REG (Pmode, 0);
29241 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29242 r12 = gen_rtx_REG (Pmode, 12);
29243 emit_insn (gen_load_split_stack_limit (r0));
29244 /* Always emit two insns here to calculate the requested stack,
29245 so that the linker can edit them when adjusting size for calling
29246 non-split-stack code. */
29247 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29248 alloc_lo = -allocate - alloc_hi;
29249 if (alloc_hi != 0)
29250 {
29251 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29252 if (alloc_lo != 0)
29253 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29254 else
29255 emit_insn (gen_nop ());
29256 }
29257 else
29258 {
29259 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29260 emit_insn (gen_nop ());
29261 }
29262
29263 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29264 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29265 ok_label = gen_label_rtx ();
29266 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29267 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29268 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29269 pc_rtx);
29270 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29271 JUMP_LABEL (insn) = ok_label;
29272 /* Mark the jump as very likely to be taken. */
29273 add_int_reg_note (insn, REG_BR_PROB,
29274 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
29275
29276 lr = gen_rtx_REG (Pmode, LR_REGNO);
29277 insn = emit_move_insn (r0, lr);
29278 RTX_FRAME_RELATED_P (insn) = 1;
29279 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29280 RTX_FRAME_RELATED_P (insn) = 1;
29281
29282 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29283 const0_rtx, const0_rtx));
29284 call_fusage = NULL_RTX;
29285 use_reg (&call_fusage, r12);
29286 /* Say the call uses r0, even though it doesn't, to stop regrename
29287 from twiddling with the insns saving lr, trashing args for cfun.
29288 The insns restoring lr are similarly protected by making
29289 split_stack_return use r0. */
29290 use_reg (&call_fusage, r0);
29291 add_function_usage_to (insn, call_fusage);
29292 /* Indicate that this function can't jump to non-local gotos. */
29293 make_reg_eh_region_note_nothrow_nononlocal (insn);
29294 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29295 insn = emit_move_insn (lr, r0);
29296 add_reg_note (insn, REG_CFA_RESTORE, lr);
29297 RTX_FRAME_RELATED_P (insn) = 1;
29298 emit_insn (gen_split_stack_return ());
29299
29300 emit_label (ok_label);
29301 LABEL_NUSES (ok_label) = 1;
29302 }
29303
29304 /* Return the internal arg pointer used for function incoming
29305 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29306 to copy it to a pseudo in order for it to be preserved over calls
29307 and suchlike. We'd really like to use a pseudo here for the
29308 internal arg pointer but data-flow analysis is not prepared to
29309 accept pseudos as live at the beginning of a function. */
29310
29311 static rtx
29312 rs6000_internal_arg_pointer (void)
29313 {
29314 if (flag_split_stack
29315 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29316 == NULL))
29317
29318 {
29319 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29320 {
29321 rtx pat;
29322
29323 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29324 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29325
29326 /* Put the pseudo initialization right after the note at the
29327 beginning of the function. */
29328 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29329 gen_rtx_REG (Pmode, 12));
29330 push_topmost_sequence ();
29331 emit_insn_after (pat, get_insns ());
29332 pop_topmost_sequence ();
29333 }
29334 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29335 FIRST_PARM_OFFSET (current_function_decl));
29336 }
29337 return virtual_incoming_args_rtx;
29338 }
29339
29340 /* We may have to tell the dataflow pass that the split stack prologue
29341 is initializing a register. */
29342
29343 static void
29344 rs6000_live_on_entry (bitmap regs)
29345 {
29346 if (flag_split_stack)
29347 bitmap_set_bit (regs, 12);
29348 }
29349
29350 /* Emit -fsplit-stack dynamic stack allocation space check. */
29351
29352 void
29353 rs6000_split_stack_space_check (rtx size, rtx label)
29354 {
29355 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29356 rtx limit = gen_reg_rtx (Pmode);
29357 rtx requested = gen_reg_rtx (Pmode);
29358 rtx cmp = gen_reg_rtx (CCUNSmode);
29359 rtx jump;
29360
29361 emit_insn (gen_load_split_stack_limit (limit));
29362 if (CONST_INT_P (size))
29363 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29364 else
29365 {
29366 size = force_reg (Pmode, size);
29367 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29368 }
29369 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29370 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29371 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29372 gen_rtx_LABEL_REF (VOIDmode, label),
29373 pc_rtx);
29374 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29375 JUMP_LABEL (jump) = label;
29376 }
29377 \f
29378 /* A C compound statement that outputs the assembler code for a thunk
29379 function, used to implement C++ virtual function calls with
29380 multiple inheritance. The thunk acts as a wrapper around a virtual
29381 function, adjusting the implicit object parameter before handing
29382 control off to the real function.
29383
29384 First, emit code to add the integer DELTA to the location that
29385 contains the incoming first argument. Assume that this argument
29386 contains a pointer, and is the one used to pass the `this' pointer
29387 in C++. This is the incoming argument *before* the function
29388 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29389 values of all other incoming arguments.
29390
29391 After the addition, emit code to jump to FUNCTION, which is a
29392 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29393 not touch the return address. Hence returning from FUNCTION will
29394 return to whoever called the current `thunk'.
29395
29396 The effect must be as if FUNCTION had been called directly with the
29397 adjusted first argument. This macro is responsible for emitting
29398 all of the code for a thunk function; output_function_prologue()
29399 and output_function_epilogue() are not invoked.
29400
29401 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29402 been extracted from it.) It might possibly be useful on some
29403 targets, but probably not.
29404
29405 If you do not define this macro, the target-independent code in the
29406 C++ frontend will generate a less efficient heavyweight thunk that
29407 calls FUNCTION instead of jumping to it. The generic approach does
29408 not support varargs. */
29409
29410 static void
29411 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29412 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29413 tree function)
29414 {
29415 rtx this_rtx, funexp;
29416 rtx_insn *insn;
29417
29418 reload_completed = 1;
29419 epilogue_completed = 1;
29420
29421 /* Mark the end of the (empty) prologue. */
29422 emit_note (NOTE_INSN_PROLOGUE_END);
29423
29424 /* Find the "this" pointer. If the function returns a structure,
29425 the structure return pointer is in r3. */
29426 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29427 this_rtx = gen_rtx_REG (Pmode, 4);
29428 else
29429 this_rtx = gen_rtx_REG (Pmode, 3);
29430
29431 /* Apply the constant offset, if required. */
29432 if (delta)
29433 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29434
29435 /* Apply the offset from the vtable, if required. */
29436 if (vcall_offset)
29437 {
29438 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29439 rtx tmp = gen_rtx_REG (Pmode, 12);
29440
29441 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29442 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29443 {
29444 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29445 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29446 }
29447 else
29448 {
29449 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29450
29451 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29452 }
29453 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29454 }
29455
29456 /* Generate a tail call to the target function. */
29457 if (!TREE_USED (function))
29458 {
29459 assemble_external (function);
29460 TREE_USED (function) = 1;
29461 }
29462 funexp = XEXP (DECL_RTL (function), 0);
29463 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29464
29465 #if TARGET_MACHO
29466 if (MACHOPIC_INDIRECT)
29467 funexp = machopic_indirect_call_target (funexp);
29468 #endif
29469
29470 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29471 generate sibcall RTL explicitly. */
29472 insn = emit_call_insn (
29473 gen_rtx_PARALLEL (VOIDmode,
29474 gen_rtvec (3,
29475 gen_rtx_CALL (VOIDmode,
29476 funexp, const0_rtx),
29477 gen_rtx_USE (VOIDmode, const0_rtx),
29478 simple_return_rtx)));
29479 SIBLING_CALL_P (insn) = 1;
29480 emit_barrier ();
29481
29482 /* Run just enough of rest_of_compilation to get the insns emitted.
29483 There's not really enough bulk here to make other passes such as
29484 instruction scheduling worth while. Note that use_thunk calls
29485 assemble_start_function and assemble_end_function. */
29486 insn = get_insns ();
29487 shorten_branches (insn);
29488 final_start_function (insn, file, 1);
29489 final (insn, file, 1);
29490 final_end_function ();
29491
29492 reload_completed = 0;
29493 epilogue_completed = 0;
29494 }
29495 \f
29496 /* A quick summary of the various types of 'constant-pool tables'
29497 under PowerPC:
29498
29499 Target Flags Name One table per
29500 AIX (none) AIX TOC object file
29501 AIX -mfull-toc AIX TOC object file
29502 AIX -mminimal-toc AIX minimal TOC translation unit
29503 SVR4/EABI (none) SVR4 SDATA object file
29504 SVR4/EABI -fpic SVR4 pic object file
29505 SVR4/EABI -fPIC SVR4 PIC translation unit
29506 SVR4/EABI -mrelocatable EABI TOC function
29507 SVR4/EABI -maix AIX TOC object file
29508 SVR4/EABI -maix -mminimal-toc
29509 AIX minimal TOC translation unit
29510
29511 Name Reg. Set by entries contains:
29512 made by addrs? fp? sum?
29513
29514 AIX TOC 2 crt0 as Y option option
29515 AIX minimal TOC 30 prolog gcc Y Y option
29516 SVR4 SDATA 13 crt0 gcc N Y N
29517 SVR4 pic 30 prolog ld Y not yet N
29518 SVR4 PIC 30 prolog gcc Y option option
29519 EABI TOC 30 prolog gcc Y option option
29520
29521 */
29522
29523 /* Hash functions for the hash table. */
29524
29525 static unsigned
29526 rs6000_hash_constant (rtx k)
29527 {
29528 enum rtx_code code = GET_CODE (k);
29529 machine_mode mode = GET_MODE (k);
29530 unsigned result = (code << 3) ^ mode;
29531 const char *format;
29532 int flen, fidx;
29533
29534 format = GET_RTX_FORMAT (code);
29535 flen = strlen (format);
29536 fidx = 0;
29537
29538 switch (code)
29539 {
29540 case LABEL_REF:
29541 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29542
29543 case CONST_WIDE_INT:
29544 {
29545 int i;
29546 flen = CONST_WIDE_INT_NUNITS (k);
29547 for (i = 0; i < flen; i++)
29548 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29549 return result;
29550 }
29551
29552 case CONST_DOUBLE:
29553 if (mode != VOIDmode)
29554 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29555 flen = 2;
29556 break;
29557
29558 case CODE_LABEL:
29559 fidx = 3;
29560 break;
29561
29562 default:
29563 break;
29564 }
29565
29566 for (; fidx < flen; fidx++)
29567 switch (format[fidx])
29568 {
29569 case 's':
29570 {
29571 unsigned i, len;
29572 const char *str = XSTR (k, fidx);
29573 len = strlen (str);
29574 result = result * 613 + len;
29575 for (i = 0; i < len; i++)
29576 result = result * 613 + (unsigned) str[i];
29577 break;
29578 }
29579 case 'u':
29580 case 'e':
29581 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29582 break;
29583 case 'i':
29584 case 'n':
29585 result = result * 613 + (unsigned) XINT (k, fidx);
29586 break;
29587 case 'w':
29588 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29589 result = result * 613 + (unsigned) XWINT (k, fidx);
29590 else
29591 {
29592 size_t i;
29593 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29594 result = result * 613 + (unsigned) (XWINT (k, fidx)
29595 >> CHAR_BIT * i);
29596 }
29597 break;
29598 case '0':
29599 break;
29600 default:
29601 gcc_unreachable ();
29602 }
29603
29604 return result;
29605 }
29606
29607 hashval_t
29608 toc_hasher::hash (toc_hash_struct *thc)
29609 {
29610 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29611 }
29612
29613 /* Compare H1 and H2 for equivalence. */
29614
29615 bool
29616 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29617 {
29618 rtx r1 = h1->key;
29619 rtx r2 = h2->key;
29620
29621 if (h1->key_mode != h2->key_mode)
29622 return 0;
29623
29624 return rtx_equal_p (r1, r2);
29625 }
29626
29627 /* These are the names given by the C++ front-end to vtables, and
29628 vtable-like objects. Ideally, this logic should not be here;
29629 instead, there should be some programmatic way of inquiring as
29630 to whether or not an object is a vtable. */
29631
29632 #define VTABLE_NAME_P(NAME) \
29633 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29634 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29635 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29636 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29637 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29638
29639 #ifdef NO_DOLLAR_IN_LABEL
29640 /* Return a GGC-allocated character string translating dollar signs in
29641 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29642
29643 const char *
29644 rs6000_xcoff_strip_dollar (const char *name)
29645 {
29646 char *strip, *p;
29647 const char *q;
29648 size_t len;
29649
29650 q = (const char *) strchr (name, '$');
29651
29652 if (q == 0 || q == name)
29653 return name;
29654
29655 len = strlen (name);
29656 strip = XALLOCAVEC (char, len + 1);
29657 strcpy (strip, name);
29658 p = strip + (q - name);
29659 while (p)
29660 {
29661 *p = '_';
29662 p = strchr (p + 1, '$');
29663 }
29664
29665 return ggc_alloc_string (strip, len);
29666 }
29667 #endif
29668
29669 void
29670 rs6000_output_symbol_ref (FILE *file, rtx x)
29671 {
29672 const char *name = XSTR (x, 0);
29673
29674 /* Currently C++ toc references to vtables can be emitted before it
29675 is decided whether the vtable is public or private. If this is
29676 the case, then the linker will eventually complain that there is
29677 a reference to an unknown section. Thus, for vtables only,
29678 we emit the TOC reference to reference the identifier and not the
29679 symbol. */
29680 if (VTABLE_NAME_P (name))
29681 {
29682 RS6000_OUTPUT_BASENAME (file, name);
29683 }
29684 else
29685 assemble_name (file, name);
29686 }
29687
29688 /* Output a TOC entry. We derive the entry name from what is being
29689 written. */
29690
29691 void
29692 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29693 {
29694 char buf[256];
29695 const char *name = buf;
29696 rtx base = x;
29697 HOST_WIDE_INT offset = 0;
29698
29699 gcc_assert (!TARGET_NO_TOC);
29700
29701 /* When the linker won't eliminate them, don't output duplicate
29702 TOC entries (this happens on AIX if there is any kind of TOC,
29703 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29704 CODE_LABELs. */
29705 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29706 {
29707 struct toc_hash_struct *h;
29708
29709 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29710 time because GGC is not initialized at that point. */
29711 if (toc_hash_table == NULL)
29712 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29713
29714 h = ggc_alloc<toc_hash_struct> ();
29715 h->key = x;
29716 h->key_mode = mode;
29717 h->labelno = labelno;
29718
29719 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29720 if (*found == NULL)
29721 *found = h;
29722 else /* This is indeed a duplicate.
29723 Set this label equal to that label. */
29724 {
29725 fputs ("\t.set ", file);
29726 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29727 fprintf (file, "%d,", labelno);
29728 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29729 fprintf (file, "%d\n", ((*found)->labelno));
29730
29731 #ifdef HAVE_AS_TLS
29732 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29733 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29734 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29735 {
29736 fputs ("\t.set ", file);
29737 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29738 fprintf (file, "%d,", labelno);
29739 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29740 fprintf (file, "%d\n", ((*found)->labelno));
29741 }
29742 #endif
29743 return;
29744 }
29745 }
29746
29747 /* If we're going to put a double constant in the TOC, make sure it's
29748 aligned properly when strict alignment is on. */
29749 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29750 && STRICT_ALIGNMENT
29751 && GET_MODE_BITSIZE (mode) >= 64
29752 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29753 ASM_OUTPUT_ALIGN (file, 3);
29754 }
29755
29756 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29757
29758 /* Handle FP constants specially. Note that if we have a minimal
29759 TOC, things we put here aren't actually in the TOC, so we can allow
29760 FP constants. */
29761 if (GET_CODE (x) == CONST_DOUBLE &&
29762 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29763 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29764 {
29765 long k[4];
29766
29767 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29768 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29769 else
29770 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29771
29772 if (TARGET_64BIT)
29773 {
29774 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29775 fputs (DOUBLE_INT_ASM_OP, file);
29776 else
29777 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29778 k[0] & 0xffffffff, k[1] & 0xffffffff,
29779 k[2] & 0xffffffff, k[3] & 0xffffffff);
29780 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29781 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29782 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29783 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29784 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29785 return;
29786 }
29787 else
29788 {
29789 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29790 fputs ("\t.long ", file);
29791 else
29792 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29793 k[0] & 0xffffffff, k[1] & 0xffffffff,
29794 k[2] & 0xffffffff, k[3] & 0xffffffff);
29795 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29796 k[0] & 0xffffffff, k[1] & 0xffffffff,
29797 k[2] & 0xffffffff, k[3] & 0xffffffff);
29798 return;
29799 }
29800 }
29801 else if (GET_CODE (x) == CONST_DOUBLE &&
29802 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29803 {
29804 long k[2];
29805
29806 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29807 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29808 else
29809 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29810
29811 if (TARGET_64BIT)
29812 {
29813 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29814 fputs (DOUBLE_INT_ASM_OP, file);
29815 else
29816 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29817 k[0] & 0xffffffff, k[1] & 0xffffffff);
29818 fprintf (file, "0x%lx%08lx\n",
29819 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29820 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29821 return;
29822 }
29823 else
29824 {
29825 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29826 fputs ("\t.long ", file);
29827 else
29828 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29829 k[0] & 0xffffffff, k[1] & 0xffffffff);
29830 fprintf (file, "0x%lx,0x%lx\n",
29831 k[0] & 0xffffffff, k[1] & 0xffffffff);
29832 return;
29833 }
29834 }
29835 else if (GET_CODE (x) == CONST_DOUBLE &&
29836 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29837 {
29838 long l;
29839
29840 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29841 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29842 else
29843 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29844
29845 if (TARGET_64BIT)
29846 {
29847 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29848 fputs (DOUBLE_INT_ASM_OP, file);
29849 else
29850 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29851 if (WORDS_BIG_ENDIAN)
29852 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29853 else
29854 fprintf (file, "0x%lx\n", l & 0xffffffff);
29855 return;
29856 }
29857 else
29858 {
29859 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29860 fputs ("\t.long ", file);
29861 else
29862 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29863 fprintf (file, "0x%lx\n", l & 0xffffffff);
29864 return;
29865 }
29866 }
29867 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29868 {
29869 unsigned HOST_WIDE_INT low;
29870 HOST_WIDE_INT high;
29871
29872 low = INTVAL (x) & 0xffffffff;
29873 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29874
29875 /* TOC entries are always Pmode-sized, so when big-endian
29876 smaller integer constants in the TOC need to be padded.
29877 (This is still a win over putting the constants in
29878 a separate constant pool, because then we'd have
29879 to have both a TOC entry _and_ the actual constant.)
29880
29881 For a 32-bit target, CONST_INT values are loaded and shifted
29882 entirely within `low' and can be stored in one TOC entry. */
29883
29884 /* It would be easy to make this work, but it doesn't now. */
29885 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29886
29887 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29888 {
29889 low |= high << 32;
29890 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29891 high = (HOST_WIDE_INT) low >> 32;
29892 low &= 0xffffffff;
29893 }
29894
29895 if (TARGET_64BIT)
29896 {
29897 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29898 fputs (DOUBLE_INT_ASM_OP, file);
29899 else
29900 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29901 (long) high & 0xffffffff, (long) low & 0xffffffff);
29902 fprintf (file, "0x%lx%08lx\n",
29903 (long) high & 0xffffffff, (long) low & 0xffffffff);
29904 return;
29905 }
29906 else
29907 {
29908 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29909 {
29910 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29911 fputs ("\t.long ", file);
29912 else
29913 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29914 (long) high & 0xffffffff, (long) low & 0xffffffff);
29915 fprintf (file, "0x%lx,0x%lx\n",
29916 (long) high & 0xffffffff, (long) low & 0xffffffff);
29917 }
29918 else
29919 {
29920 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29921 fputs ("\t.long ", file);
29922 else
29923 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29924 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29925 }
29926 return;
29927 }
29928 }
29929
29930 if (GET_CODE (x) == CONST)
29931 {
29932 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29933 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29934
29935 base = XEXP (XEXP (x, 0), 0);
29936 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29937 }
29938
29939 switch (GET_CODE (base))
29940 {
29941 case SYMBOL_REF:
29942 name = XSTR (base, 0);
29943 break;
29944
29945 case LABEL_REF:
29946 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29947 CODE_LABEL_NUMBER (XEXP (base, 0)));
29948 break;
29949
29950 case CODE_LABEL:
29951 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29952 break;
29953
29954 default:
29955 gcc_unreachable ();
29956 }
29957
29958 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29959 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29960 else
29961 {
29962 fputs ("\t.tc ", file);
29963 RS6000_OUTPUT_BASENAME (file, name);
29964
29965 if (offset < 0)
29966 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29967 else if (offset)
29968 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29969
29970 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29971 after other TOC symbols, reducing overflow of small TOC access
29972 to [TC] symbols. */
29973 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29974 ? "[TE]," : "[TC],", file);
29975 }
29976
29977 /* Currently C++ toc references to vtables can be emitted before it
29978 is decided whether the vtable is public or private. If this is
29979 the case, then the linker will eventually complain that there is
29980 a TOC reference to an unknown section. Thus, for vtables only,
29981 we emit the TOC reference to reference the symbol and not the
29982 section. */
29983 if (VTABLE_NAME_P (name))
29984 {
29985 RS6000_OUTPUT_BASENAME (file, name);
29986 if (offset < 0)
29987 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29988 else if (offset > 0)
29989 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29990 }
29991 else
29992 output_addr_const (file, x);
29993
29994 #if HAVE_AS_TLS
29995 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29996 {
29997 switch (SYMBOL_REF_TLS_MODEL (base))
29998 {
29999 case 0:
30000 break;
30001 case TLS_MODEL_LOCAL_EXEC:
30002 fputs ("@le", file);
30003 break;
30004 case TLS_MODEL_INITIAL_EXEC:
30005 fputs ("@ie", file);
30006 break;
30007 /* Use global-dynamic for local-dynamic. */
30008 case TLS_MODEL_GLOBAL_DYNAMIC:
30009 case TLS_MODEL_LOCAL_DYNAMIC:
30010 putc ('\n', file);
30011 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30012 fputs ("\t.tc .", file);
30013 RS6000_OUTPUT_BASENAME (file, name);
30014 fputs ("[TC],", file);
30015 output_addr_const (file, x);
30016 fputs ("@m", file);
30017 break;
30018 default:
30019 gcc_unreachable ();
30020 }
30021 }
30022 #endif
30023
30024 putc ('\n', file);
30025 }
30026 \f
30027 /* Output an assembler pseudo-op to write an ASCII string of N characters
30028 starting at P to FILE.
30029
30030 On the RS/6000, we have to do this using the .byte operation and
30031 write out special characters outside the quoted string.
30032 Also, the assembler is broken; very long strings are truncated,
30033 so we must artificially break them up early. */
30034
30035 void
30036 output_ascii (FILE *file, const char *p, int n)
30037 {
30038 char c;
30039 int i, count_string;
30040 const char *for_string = "\t.byte \"";
30041 const char *for_decimal = "\t.byte ";
30042 const char *to_close = NULL;
30043
30044 count_string = 0;
30045 for (i = 0; i < n; i++)
30046 {
30047 c = *p++;
30048 if (c >= ' ' && c < 0177)
30049 {
30050 if (for_string)
30051 fputs (for_string, file);
30052 putc (c, file);
30053
30054 /* Write two quotes to get one. */
30055 if (c == '"')
30056 {
30057 putc (c, file);
30058 ++count_string;
30059 }
30060
30061 for_string = NULL;
30062 for_decimal = "\"\n\t.byte ";
30063 to_close = "\"\n";
30064 ++count_string;
30065
30066 if (count_string >= 512)
30067 {
30068 fputs (to_close, file);
30069
30070 for_string = "\t.byte \"";
30071 for_decimal = "\t.byte ";
30072 to_close = NULL;
30073 count_string = 0;
30074 }
30075 }
30076 else
30077 {
30078 if (for_decimal)
30079 fputs (for_decimal, file);
30080 fprintf (file, "%d", c);
30081
30082 for_string = "\n\t.byte \"";
30083 for_decimal = ", ";
30084 to_close = "\n";
30085 count_string = 0;
30086 }
30087 }
30088
30089 /* Now close the string if we have written one. Then end the line. */
30090 if (to_close)
30091 fputs (to_close, file);
30092 }
30093 \f
30094 /* Generate a unique section name for FILENAME for a section type
30095 represented by SECTION_DESC. Output goes into BUF.
30096
30097 SECTION_DESC can be any string, as long as it is different for each
30098 possible section type.
30099
30100 We name the section in the same manner as xlc. The name begins with an
30101 underscore followed by the filename (after stripping any leading directory
30102 names) with the last period replaced by the string SECTION_DESC. If
30103 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30104 the name. */
30105
30106 void
30107 rs6000_gen_section_name (char **buf, const char *filename,
30108 const char *section_desc)
30109 {
30110 const char *q, *after_last_slash, *last_period = 0;
30111 char *p;
30112 int len;
30113
30114 after_last_slash = filename;
30115 for (q = filename; *q; q++)
30116 {
30117 if (*q == '/')
30118 after_last_slash = q + 1;
30119 else if (*q == '.')
30120 last_period = q;
30121 }
30122
30123 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30124 *buf = (char *) xmalloc (len);
30125
30126 p = *buf;
30127 *p++ = '_';
30128
30129 for (q = after_last_slash; *q; q++)
30130 {
30131 if (q == last_period)
30132 {
30133 strcpy (p, section_desc);
30134 p += strlen (section_desc);
30135 break;
30136 }
30137
30138 else if (ISALNUM (*q))
30139 *p++ = *q;
30140 }
30141
30142 if (last_period == 0)
30143 strcpy (p, section_desc);
30144 else
30145 *p = '\0';
30146 }
30147 \f
30148 /* Emit profile function. */
30149
30150 void
30151 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30152 {
30153 /* Non-standard profiling for kernels, which just saves LR then calls
30154 _mcount without worrying about arg saves. The idea is to change
30155 the function prologue as little as possible as it isn't easy to
30156 account for arg save/restore code added just for _mcount. */
30157 if (TARGET_PROFILE_KERNEL)
30158 return;
30159
30160 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30161 {
30162 #ifndef NO_PROFILE_COUNTERS
30163 # define NO_PROFILE_COUNTERS 0
30164 #endif
30165 if (NO_PROFILE_COUNTERS)
30166 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30167 LCT_NORMAL, VOIDmode, 0);
30168 else
30169 {
30170 char buf[30];
30171 const char *label_name;
30172 rtx fun;
30173
30174 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30175 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30176 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30177
30178 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30179 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
30180 }
30181 }
30182 else if (DEFAULT_ABI == ABI_DARWIN)
30183 {
30184 const char *mcount_name = RS6000_MCOUNT;
30185 int caller_addr_regno = LR_REGNO;
30186
30187 /* Be conservative and always set this, at least for now. */
30188 crtl->uses_pic_offset_table = 1;
30189
30190 #if TARGET_MACHO
30191 /* For PIC code, set up a stub and collect the caller's address
30192 from r0, which is where the prologue puts it. */
30193 if (MACHOPIC_INDIRECT
30194 && crtl->uses_pic_offset_table)
30195 caller_addr_regno = 0;
30196 #endif
30197 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30198 LCT_NORMAL, VOIDmode, 1,
30199 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30200 }
30201 }
30202
30203 /* Write function profiler code. */
30204
30205 void
30206 output_function_profiler (FILE *file, int labelno)
30207 {
30208 char buf[100];
30209
30210 switch (DEFAULT_ABI)
30211 {
30212 default:
30213 gcc_unreachable ();
30214
30215 case ABI_V4:
30216 if (!TARGET_32BIT)
30217 {
30218 warning (0, "no profiling of 64-bit code for this ABI");
30219 return;
30220 }
30221 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30222 fprintf (file, "\tmflr %s\n", reg_names[0]);
30223 if (NO_PROFILE_COUNTERS)
30224 {
30225 asm_fprintf (file, "\tstw %s,4(%s)\n",
30226 reg_names[0], reg_names[1]);
30227 }
30228 else if (TARGET_SECURE_PLT && flag_pic)
30229 {
30230 if (TARGET_LINK_STACK)
30231 {
30232 char name[32];
30233 get_ppc476_thunk_name (name);
30234 asm_fprintf (file, "\tbl %s\n", name);
30235 }
30236 else
30237 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30238 asm_fprintf (file, "\tstw %s,4(%s)\n",
30239 reg_names[0], reg_names[1]);
30240 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30241 asm_fprintf (file, "\taddis %s,%s,",
30242 reg_names[12], reg_names[12]);
30243 assemble_name (file, buf);
30244 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30245 assemble_name (file, buf);
30246 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30247 }
30248 else if (flag_pic == 1)
30249 {
30250 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30251 asm_fprintf (file, "\tstw %s,4(%s)\n",
30252 reg_names[0], reg_names[1]);
30253 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30254 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30255 assemble_name (file, buf);
30256 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30257 }
30258 else if (flag_pic > 1)
30259 {
30260 asm_fprintf (file, "\tstw %s,4(%s)\n",
30261 reg_names[0], reg_names[1]);
30262 /* Now, we need to get the address of the label. */
30263 if (TARGET_LINK_STACK)
30264 {
30265 char name[32];
30266 get_ppc476_thunk_name (name);
30267 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30268 assemble_name (file, buf);
30269 fputs ("-.\n1:", file);
30270 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30271 asm_fprintf (file, "\taddi %s,%s,4\n",
30272 reg_names[11], reg_names[11]);
30273 }
30274 else
30275 {
30276 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30277 assemble_name (file, buf);
30278 fputs ("-.\n1:", file);
30279 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30280 }
30281 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30282 reg_names[0], reg_names[11]);
30283 asm_fprintf (file, "\tadd %s,%s,%s\n",
30284 reg_names[0], reg_names[0], reg_names[11]);
30285 }
30286 else
30287 {
30288 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30289 assemble_name (file, buf);
30290 fputs ("@ha\n", file);
30291 asm_fprintf (file, "\tstw %s,4(%s)\n",
30292 reg_names[0], reg_names[1]);
30293 asm_fprintf (file, "\tla %s,", reg_names[0]);
30294 assemble_name (file, buf);
30295 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30296 }
30297
30298 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30299 fprintf (file, "\tbl %s%s\n",
30300 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30301 break;
30302
30303 case ABI_AIX:
30304 case ABI_ELFv2:
30305 case ABI_DARWIN:
30306 /* Don't do anything, done in output_profile_hook (). */
30307 break;
30308 }
30309 }
30310
30311 \f
30312
30313 /* The following variable value is the last issued insn. */
30314
30315 static rtx_insn *last_scheduled_insn;
30316
30317 /* The following variable helps to balance issuing of load and
30318 store instructions */
30319
30320 static int load_store_pendulum;
30321
30322 /* The following variable helps pair divide insns during scheduling. */
30323 static int divide_cnt;
30324 /* The following variable helps pair and alternate vector and vector load
30325 insns during scheduling. */
30326 static int vec_pairing;
30327
30328
30329 /* Power4 load update and store update instructions are cracked into a
30330 load or store and an integer insn which are executed in the same cycle.
30331 Branches have their own dispatch slot which does not count against the
30332 GCC issue rate, but it changes the program flow so there are no other
30333 instructions to issue in this cycle. */
30334
30335 static int
30336 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30337 {
30338 last_scheduled_insn = insn;
30339 if (GET_CODE (PATTERN (insn)) == USE
30340 || GET_CODE (PATTERN (insn)) == CLOBBER)
30341 {
30342 cached_can_issue_more = more;
30343 return cached_can_issue_more;
30344 }
30345
30346 if (insn_terminates_group_p (insn, current_group))
30347 {
30348 cached_can_issue_more = 0;
30349 return cached_can_issue_more;
30350 }
30351
30352 /* If no reservation, but reach here */
30353 if (recog_memoized (insn) < 0)
30354 return more;
30355
30356 if (rs6000_sched_groups)
30357 {
30358 if (is_microcoded_insn (insn))
30359 cached_can_issue_more = 0;
30360 else if (is_cracked_insn (insn))
30361 cached_can_issue_more = more > 2 ? more - 2 : 0;
30362 else
30363 cached_can_issue_more = more - 1;
30364
30365 return cached_can_issue_more;
30366 }
30367
30368 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
30369 return 0;
30370
30371 cached_can_issue_more = more - 1;
30372 return cached_can_issue_more;
30373 }
30374
30375 static int
30376 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30377 {
30378 int r = rs6000_variable_issue_1 (insn, more);
30379 if (verbose)
30380 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30381 return r;
30382 }
30383
30384 /* Adjust the cost of a scheduling dependency. Return the new cost of
30385 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30386
30387 static int
30388 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30389 unsigned int)
30390 {
30391 enum attr_type attr_type;
30392
30393 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30394 return cost;
30395
30396 switch (dep_type)
30397 {
30398 case REG_DEP_TRUE:
30399 {
30400 /* Data dependency; DEP_INSN writes a register that INSN reads
30401 some cycles later. */
30402
30403 /* Separate a load from a narrower, dependent store. */
30404 if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9)
30405 && GET_CODE (PATTERN (insn)) == SET
30406 && GET_CODE (PATTERN (dep_insn)) == SET
30407 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30408 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30409 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30410 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30411 return cost + 14;
30412
30413 attr_type = get_attr_type (insn);
30414
30415 switch (attr_type)
30416 {
30417 case TYPE_JMPREG:
30418 /* Tell the first scheduling pass about the latency between
30419 a mtctr and bctr (and mtlr and br/blr). The first
30420 scheduling pass will not know about this latency since
30421 the mtctr instruction, which has the latency associated
30422 to it, will be generated by reload. */
30423 return 4;
30424 case TYPE_BRANCH:
30425 /* Leave some extra cycles between a compare and its
30426 dependent branch, to inhibit expensive mispredicts. */
30427 if ((rs6000_cpu_attr == CPU_PPC603
30428 || rs6000_cpu_attr == CPU_PPC604
30429 || rs6000_cpu_attr == CPU_PPC604E
30430 || rs6000_cpu_attr == CPU_PPC620
30431 || rs6000_cpu_attr == CPU_PPC630
30432 || rs6000_cpu_attr == CPU_PPC750
30433 || rs6000_cpu_attr == CPU_PPC7400
30434 || rs6000_cpu_attr == CPU_PPC7450
30435 || rs6000_cpu_attr == CPU_PPCE5500
30436 || rs6000_cpu_attr == CPU_PPCE6500
30437 || rs6000_cpu_attr == CPU_POWER4
30438 || rs6000_cpu_attr == CPU_POWER5
30439 || rs6000_cpu_attr == CPU_POWER7
30440 || rs6000_cpu_attr == CPU_POWER8
30441 || rs6000_cpu_attr == CPU_POWER9
30442 || rs6000_cpu_attr == CPU_CELL)
30443 && recog_memoized (dep_insn)
30444 && (INSN_CODE (dep_insn) >= 0))
30445
30446 switch (get_attr_type (dep_insn))
30447 {
30448 case TYPE_CMP:
30449 case TYPE_FPCOMPARE:
30450 case TYPE_CR_LOGICAL:
30451 case TYPE_DELAYED_CR:
30452 return cost + 2;
30453 case TYPE_EXTS:
30454 case TYPE_MUL:
30455 if (get_attr_dot (dep_insn) == DOT_YES)
30456 return cost + 2;
30457 else
30458 break;
30459 case TYPE_SHIFT:
30460 if (get_attr_dot (dep_insn) == DOT_YES
30461 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30462 return cost + 2;
30463 else
30464 break;
30465 default:
30466 break;
30467 }
30468 break;
30469
30470 case TYPE_STORE:
30471 case TYPE_FPSTORE:
30472 if ((rs6000_cpu == PROCESSOR_POWER6)
30473 && recog_memoized (dep_insn)
30474 && (INSN_CODE (dep_insn) >= 0))
30475 {
30476
30477 if (GET_CODE (PATTERN (insn)) != SET)
30478 /* If this happens, we have to extend this to schedule
30479 optimally. Return default for now. */
30480 return cost;
30481
30482 /* Adjust the cost for the case where the value written
30483 by a fixed point operation is used as the address
30484 gen value on a store. */
30485 switch (get_attr_type (dep_insn))
30486 {
30487 case TYPE_LOAD:
30488 case TYPE_CNTLZ:
30489 {
30490 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30491 return get_attr_sign_extend (dep_insn)
30492 == SIGN_EXTEND_YES ? 6 : 4;
30493 break;
30494 }
30495 case TYPE_SHIFT:
30496 {
30497 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30498 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30499 6 : 3;
30500 break;
30501 }
30502 case TYPE_INTEGER:
30503 case TYPE_ADD:
30504 case TYPE_LOGICAL:
30505 case TYPE_EXTS:
30506 case TYPE_INSERT:
30507 {
30508 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30509 return 3;
30510 break;
30511 }
30512 case TYPE_STORE:
30513 case TYPE_FPLOAD:
30514 case TYPE_FPSTORE:
30515 {
30516 if (get_attr_update (dep_insn) == UPDATE_YES
30517 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30518 return 3;
30519 break;
30520 }
30521 case TYPE_MUL:
30522 {
30523 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30524 return 17;
30525 break;
30526 }
30527 case TYPE_DIV:
30528 {
30529 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30530 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30531 break;
30532 }
30533 default:
30534 break;
30535 }
30536 }
30537 break;
30538
30539 case TYPE_LOAD:
30540 if ((rs6000_cpu == PROCESSOR_POWER6)
30541 && recog_memoized (dep_insn)
30542 && (INSN_CODE (dep_insn) >= 0))
30543 {
30544
30545 /* Adjust the cost for the case where the value written
30546 by a fixed point instruction is used within the address
30547 gen portion of a subsequent load(u)(x) */
30548 switch (get_attr_type (dep_insn))
30549 {
30550 case TYPE_LOAD:
30551 case TYPE_CNTLZ:
30552 {
30553 if (set_to_load_agen (dep_insn, insn))
30554 return get_attr_sign_extend (dep_insn)
30555 == SIGN_EXTEND_YES ? 6 : 4;
30556 break;
30557 }
30558 case TYPE_SHIFT:
30559 {
30560 if (set_to_load_agen (dep_insn, insn))
30561 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30562 6 : 3;
30563 break;
30564 }
30565 case TYPE_INTEGER:
30566 case TYPE_ADD:
30567 case TYPE_LOGICAL:
30568 case TYPE_EXTS:
30569 case TYPE_INSERT:
30570 {
30571 if (set_to_load_agen (dep_insn, insn))
30572 return 3;
30573 break;
30574 }
30575 case TYPE_STORE:
30576 case TYPE_FPLOAD:
30577 case TYPE_FPSTORE:
30578 {
30579 if (get_attr_update (dep_insn) == UPDATE_YES
30580 && set_to_load_agen (dep_insn, insn))
30581 return 3;
30582 break;
30583 }
30584 case TYPE_MUL:
30585 {
30586 if (set_to_load_agen (dep_insn, insn))
30587 return 17;
30588 break;
30589 }
30590 case TYPE_DIV:
30591 {
30592 if (set_to_load_agen (dep_insn, insn))
30593 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30594 break;
30595 }
30596 default:
30597 break;
30598 }
30599 }
30600 break;
30601
30602 case TYPE_FPLOAD:
30603 if ((rs6000_cpu == PROCESSOR_POWER6)
30604 && get_attr_update (insn) == UPDATE_NO
30605 && recog_memoized (dep_insn)
30606 && (INSN_CODE (dep_insn) >= 0)
30607 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30608 return 2;
30609
30610 default:
30611 break;
30612 }
30613
30614 /* Fall out to return default cost. */
30615 }
30616 break;
30617
30618 case REG_DEP_OUTPUT:
30619 /* Output dependency; DEP_INSN writes a register that INSN writes some
30620 cycles later. */
30621 if ((rs6000_cpu == PROCESSOR_POWER6)
30622 && recog_memoized (dep_insn)
30623 && (INSN_CODE (dep_insn) >= 0))
30624 {
30625 attr_type = get_attr_type (insn);
30626
30627 switch (attr_type)
30628 {
30629 case TYPE_FP:
30630 case TYPE_FPSIMPLE:
30631 if (get_attr_type (dep_insn) == TYPE_FP
30632 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30633 return 1;
30634 break;
30635 case TYPE_FPLOAD:
30636 if (get_attr_update (insn) == UPDATE_NO
30637 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30638 return 2;
30639 break;
30640 default:
30641 break;
30642 }
30643 }
30644 /* Fall through, no cost for output dependency. */
30645 /* FALLTHRU */
30646
30647 case REG_DEP_ANTI:
30648 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30649 cycles later. */
30650 return 0;
30651
30652 default:
30653 gcc_unreachable ();
30654 }
30655
30656 return cost;
30657 }
30658
30659 /* Debug version of rs6000_adjust_cost. */
30660
30661 static int
30662 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30663 int cost, unsigned int dw)
30664 {
30665 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30666
30667 if (ret != cost)
30668 {
30669 const char *dep;
30670
30671 switch (dep_type)
30672 {
30673 default: dep = "unknown depencency"; break;
30674 case REG_DEP_TRUE: dep = "data dependency"; break;
30675 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30676 case REG_DEP_ANTI: dep = "anti depencency"; break;
30677 }
30678
30679 fprintf (stderr,
30680 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30681 "%s, insn:\n", ret, cost, dep);
30682
30683 debug_rtx (insn);
30684 }
30685
30686 return ret;
30687 }
30688
30689 /* The function returns a true if INSN is microcoded.
30690 Return false otherwise. */
30691
30692 static bool
30693 is_microcoded_insn (rtx_insn *insn)
30694 {
30695 if (!insn || !NONDEBUG_INSN_P (insn)
30696 || GET_CODE (PATTERN (insn)) == USE
30697 || GET_CODE (PATTERN (insn)) == CLOBBER)
30698 return false;
30699
30700 if (rs6000_cpu_attr == CPU_CELL)
30701 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30702
30703 if (rs6000_sched_groups
30704 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30705 {
30706 enum attr_type type = get_attr_type (insn);
30707 if ((type == TYPE_LOAD
30708 && get_attr_update (insn) == UPDATE_YES
30709 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30710 || ((type == TYPE_LOAD || type == TYPE_STORE)
30711 && get_attr_update (insn) == UPDATE_YES
30712 && get_attr_indexed (insn) == INDEXED_YES)
30713 || type == TYPE_MFCR)
30714 return true;
30715 }
30716
30717 return false;
30718 }
30719
30720 /* The function returns true if INSN is cracked into 2 instructions
30721 by the processor (and therefore occupies 2 issue slots). */
30722
30723 static bool
30724 is_cracked_insn (rtx_insn *insn)
30725 {
30726 if (!insn || !NONDEBUG_INSN_P (insn)
30727 || GET_CODE (PATTERN (insn)) == USE
30728 || GET_CODE (PATTERN (insn)) == CLOBBER)
30729 return false;
30730
30731 if (rs6000_sched_groups
30732 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
30733 {
30734 enum attr_type type = get_attr_type (insn);
30735 if ((type == TYPE_LOAD
30736 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30737 && get_attr_update (insn) == UPDATE_NO)
30738 || (type == TYPE_LOAD
30739 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30740 && get_attr_update (insn) == UPDATE_YES
30741 && get_attr_indexed (insn) == INDEXED_NO)
30742 || (type == TYPE_STORE
30743 && get_attr_update (insn) == UPDATE_YES
30744 && get_attr_indexed (insn) == INDEXED_NO)
30745 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30746 && get_attr_update (insn) == UPDATE_YES)
30747 || type == TYPE_DELAYED_CR
30748 || (type == TYPE_EXTS
30749 && get_attr_dot (insn) == DOT_YES)
30750 || (type == TYPE_SHIFT
30751 && get_attr_dot (insn) == DOT_YES
30752 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30753 || (type == TYPE_MUL
30754 && get_attr_dot (insn) == DOT_YES)
30755 || type == TYPE_DIV
30756 || (type == TYPE_INSERT
30757 && get_attr_size (insn) == SIZE_32))
30758 return true;
30759 }
30760
30761 return false;
30762 }
30763
30764 /* The function returns true if INSN can be issued only from
30765 the branch slot. */
30766
30767 static bool
30768 is_branch_slot_insn (rtx_insn *insn)
30769 {
30770 if (!insn || !NONDEBUG_INSN_P (insn)
30771 || GET_CODE (PATTERN (insn)) == USE
30772 || GET_CODE (PATTERN (insn)) == CLOBBER)
30773 return false;
30774
30775 if (rs6000_sched_groups)
30776 {
30777 enum attr_type type = get_attr_type (insn);
30778 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30779 return true;
30780 return false;
30781 }
30782
30783 return false;
30784 }
30785
30786 /* The function returns true if out_inst sets a value that is
30787 used in the address generation computation of in_insn */
30788 static bool
30789 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30790 {
30791 rtx out_set, in_set;
30792
30793 /* For performance reasons, only handle the simple case where
30794 both loads are a single_set. */
30795 out_set = single_set (out_insn);
30796 if (out_set)
30797 {
30798 in_set = single_set (in_insn);
30799 if (in_set)
30800 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30801 }
30802
30803 return false;
30804 }
30805
30806 /* Try to determine base/offset/size parts of the given MEM.
30807 Return true if successful, false if all the values couldn't
30808 be determined.
30809
30810 This function only looks for REG or REG+CONST address forms.
30811 REG+REG address form will return false. */
30812
30813 static bool
30814 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30815 HOST_WIDE_INT *size)
30816 {
30817 rtx addr_rtx;
30818 if MEM_SIZE_KNOWN_P (mem)
30819 *size = MEM_SIZE (mem);
30820 else
30821 return false;
30822
30823 addr_rtx = (XEXP (mem, 0));
30824 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30825 addr_rtx = XEXP (addr_rtx, 1);
30826
30827 *offset = 0;
30828 while (GET_CODE (addr_rtx) == PLUS
30829 && CONST_INT_P (XEXP (addr_rtx, 1)))
30830 {
30831 *offset += INTVAL (XEXP (addr_rtx, 1));
30832 addr_rtx = XEXP (addr_rtx, 0);
30833 }
30834 if (!REG_P (addr_rtx))
30835 return false;
30836
30837 *base = addr_rtx;
30838 return true;
30839 }
30840
30841 /* The function returns true if the target storage location of
30842 mem1 is adjacent to the target storage location of mem2 */
30843 /* Return 1 if memory locations are adjacent. */
30844
30845 static bool
30846 adjacent_mem_locations (rtx mem1, rtx mem2)
30847 {
30848 rtx reg1, reg2;
30849 HOST_WIDE_INT off1, size1, off2, size2;
30850
30851 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30852 && get_memref_parts (mem2, &reg2, &off2, &size2))
30853 return ((REGNO (reg1) == REGNO (reg2))
30854 && ((off1 + size1 == off2)
30855 || (off2 + size2 == off1)));
30856
30857 return false;
30858 }
30859
30860 /* This function returns true if it can be determined that the two MEM
30861 locations overlap by at least 1 byte based on base reg/offset/size. */
30862
30863 static bool
30864 mem_locations_overlap (rtx mem1, rtx mem2)
30865 {
30866 rtx reg1, reg2;
30867 HOST_WIDE_INT off1, size1, off2, size2;
30868
30869 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30870 && get_memref_parts (mem2, &reg2, &off2, &size2))
30871 return ((REGNO (reg1) == REGNO (reg2))
30872 && (((off1 <= off2) && (off1 + size1 > off2))
30873 || ((off2 <= off1) && (off2 + size2 > off1))));
30874
30875 return false;
30876 }
30877
30878 /* A C statement (sans semicolon) to update the integer scheduling
30879 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30880 INSN earlier, reduce the priority to execute INSN later. Do not
30881 define this macro if you do not need to adjust the scheduling
30882 priorities of insns. */
30883
30884 static int
30885 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30886 {
30887 rtx load_mem, str_mem;
30888 /* On machines (like the 750) which have asymmetric integer units,
30889 where one integer unit can do multiply and divides and the other
30890 can't, reduce the priority of multiply/divide so it is scheduled
30891 before other integer operations. */
30892
30893 #if 0
30894 if (! INSN_P (insn))
30895 return priority;
30896
30897 if (GET_CODE (PATTERN (insn)) == USE)
30898 return priority;
30899
30900 switch (rs6000_cpu_attr) {
30901 case CPU_PPC750:
30902 switch (get_attr_type (insn))
30903 {
30904 default:
30905 break;
30906
30907 case TYPE_MUL:
30908 case TYPE_DIV:
30909 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30910 priority, priority);
30911 if (priority >= 0 && priority < 0x01000000)
30912 priority >>= 3;
30913 break;
30914 }
30915 }
30916 #endif
30917
30918 if (insn_must_be_first_in_group (insn)
30919 && reload_completed
30920 && current_sched_info->sched_max_insns_priority
30921 && rs6000_sched_restricted_insns_priority)
30922 {
30923
30924 /* Prioritize insns that can be dispatched only in the first
30925 dispatch slot. */
30926 if (rs6000_sched_restricted_insns_priority == 1)
30927 /* Attach highest priority to insn. This means that in
30928 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30929 precede 'priority' (critical path) considerations. */
30930 return current_sched_info->sched_max_insns_priority;
30931 else if (rs6000_sched_restricted_insns_priority == 2)
30932 /* Increase priority of insn by a minimal amount. This means that in
30933 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30934 considerations precede dispatch-slot restriction considerations. */
30935 return (priority + 1);
30936 }
30937
30938 if (rs6000_cpu == PROCESSOR_POWER6
30939 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30940 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30941 /* Attach highest priority to insn if the scheduler has just issued two
30942 stores and this instruction is a load, or two loads and this instruction
30943 is a store. Power6 wants loads and stores scheduled alternately
30944 when possible */
30945 return current_sched_info->sched_max_insns_priority;
30946
30947 return priority;
30948 }
30949
30950 /* Return true if the instruction is nonpipelined on the Cell. */
30951 static bool
30952 is_nonpipeline_insn (rtx_insn *insn)
30953 {
30954 enum attr_type type;
30955 if (!insn || !NONDEBUG_INSN_P (insn)
30956 || GET_CODE (PATTERN (insn)) == USE
30957 || GET_CODE (PATTERN (insn)) == CLOBBER)
30958 return false;
30959
30960 type = get_attr_type (insn);
30961 if (type == TYPE_MUL
30962 || type == TYPE_DIV
30963 || type == TYPE_SDIV
30964 || type == TYPE_DDIV
30965 || type == TYPE_SSQRT
30966 || type == TYPE_DSQRT
30967 || type == TYPE_MFCR
30968 || type == TYPE_MFCRF
30969 || type == TYPE_MFJMPR)
30970 {
30971 return true;
30972 }
30973 return false;
30974 }
30975
30976
30977 /* Return how many instructions the machine can issue per cycle. */
30978
30979 static int
30980 rs6000_issue_rate (void)
30981 {
30982 /* Unless scheduling for register pressure, use issue rate of 1 for
30983 first scheduling pass to decrease degradation. */
30984 if (!reload_completed && !flag_sched_pressure)
30985 return 1;
30986
30987 switch (rs6000_cpu_attr) {
30988 case CPU_RS64A:
30989 case CPU_PPC601: /* ? */
30990 case CPU_PPC7450:
30991 return 3;
30992 case CPU_PPC440:
30993 case CPU_PPC603:
30994 case CPU_PPC750:
30995 case CPU_PPC7400:
30996 case CPU_PPC8540:
30997 case CPU_PPC8548:
30998 case CPU_CELL:
30999 case CPU_PPCE300C2:
31000 case CPU_PPCE300C3:
31001 case CPU_PPCE500MC:
31002 case CPU_PPCE500MC64:
31003 case CPU_PPCE5500:
31004 case CPU_PPCE6500:
31005 case CPU_TITAN:
31006 return 2;
31007 case CPU_PPC476:
31008 case CPU_PPC604:
31009 case CPU_PPC604E:
31010 case CPU_PPC620:
31011 case CPU_PPC630:
31012 return 4;
31013 case CPU_POWER4:
31014 case CPU_POWER5:
31015 case CPU_POWER6:
31016 case CPU_POWER7:
31017 return 5;
31018 case CPU_POWER8:
31019 return 7;
31020 case CPU_POWER9:
31021 return 6;
31022 default:
31023 return 1;
31024 }
31025 }
31026
31027 /* Return how many instructions to look ahead for better insn
31028 scheduling. */
31029
31030 static int
31031 rs6000_use_sched_lookahead (void)
31032 {
31033 switch (rs6000_cpu_attr)
31034 {
31035 case CPU_PPC8540:
31036 case CPU_PPC8548:
31037 return 4;
31038
31039 case CPU_CELL:
31040 return (reload_completed ? 8 : 0);
31041
31042 default:
31043 return 0;
31044 }
31045 }
31046
31047 /* We are choosing insn from the ready queue. Return zero if INSN can be
31048 chosen. */
31049 static int
31050 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31051 {
31052 if (ready_index == 0)
31053 return 0;
31054
31055 if (rs6000_cpu_attr != CPU_CELL)
31056 return 0;
31057
31058 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31059
31060 if (!reload_completed
31061 || is_nonpipeline_insn (insn)
31062 || is_microcoded_insn (insn))
31063 return 1;
31064
31065 return 0;
31066 }
31067
31068 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31069 and return true. */
31070
31071 static bool
31072 find_mem_ref (rtx pat, rtx *mem_ref)
31073 {
31074 const char * fmt;
31075 int i, j;
31076
31077 /* stack_tie does not produce any real memory traffic. */
31078 if (tie_operand (pat, VOIDmode))
31079 return false;
31080
31081 if (GET_CODE (pat) == MEM)
31082 {
31083 *mem_ref = pat;
31084 return true;
31085 }
31086
31087 /* Recursively process the pattern. */
31088 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31089
31090 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31091 {
31092 if (fmt[i] == 'e')
31093 {
31094 if (find_mem_ref (XEXP (pat, i), mem_ref))
31095 return true;
31096 }
31097 else if (fmt[i] == 'E')
31098 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31099 {
31100 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31101 return true;
31102 }
31103 }
31104
31105 return false;
31106 }
31107
31108 /* Determine if PAT is a PATTERN of a load insn. */
31109
31110 static bool
31111 is_load_insn1 (rtx pat, rtx *load_mem)
31112 {
31113 if (!pat || pat == NULL_RTX)
31114 return false;
31115
31116 if (GET_CODE (pat) == SET)
31117 return find_mem_ref (SET_SRC (pat), load_mem);
31118
31119 if (GET_CODE (pat) == PARALLEL)
31120 {
31121 int i;
31122
31123 for (i = 0; i < XVECLEN (pat, 0); i++)
31124 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31125 return true;
31126 }
31127
31128 return false;
31129 }
31130
31131 /* Determine if INSN loads from memory. */
31132
31133 static bool
31134 is_load_insn (rtx insn, rtx *load_mem)
31135 {
31136 if (!insn || !INSN_P (insn))
31137 return false;
31138
31139 if (CALL_P (insn))
31140 return false;
31141
31142 return is_load_insn1 (PATTERN (insn), load_mem);
31143 }
31144
31145 /* Determine if PAT is a PATTERN of a store insn. */
31146
31147 static bool
31148 is_store_insn1 (rtx pat, rtx *str_mem)
31149 {
31150 if (!pat || pat == NULL_RTX)
31151 return false;
31152
31153 if (GET_CODE (pat) == SET)
31154 return find_mem_ref (SET_DEST (pat), str_mem);
31155
31156 if (GET_CODE (pat) == PARALLEL)
31157 {
31158 int i;
31159
31160 for (i = 0; i < XVECLEN (pat, 0); i++)
31161 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31162 return true;
31163 }
31164
31165 return false;
31166 }
31167
31168 /* Determine if INSN stores to memory. */
31169
31170 static bool
31171 is_store_insn (rtx insn, rtx *str_mem)
31172 {
31173 if (!insn || !INSN_P (insn))
31174 return false;
31175
31176 return is_store_insn1 (PATTERN (insn), str_mem);
31177 }
31178
31179 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31180
31181 static bool
31182 is_power9_pairable_vec_type (enum attr_type type)
31183 {
31184 switch (type)
31185 {
31186 case TYPE_VECSIMPLE:
31187 case TYPE_VECCOMPLEX:
31188 case TYPE_VECDIV:
31189 case TYPE_VECCMP:
31190 case TYPE_VECPERM:
31191 case TYPE_VECFLOAT:
31192 case TYPE_VECFDIV:
31193 case TYPE_VECDOUBLE:
31194 return true;
31195 default:
31196 break;
31197 }
31198 return false;
31199 }
31200
31201 /* Returns whether the dependence between INSN and NEXT is considered
31202 costly by the given target. */
31203
31204 static bool
31205 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31206 {
31207 rtx insn;
31208 rtx next;
31209 rtx load_mem, str_mem;
31210
31211 /* If the flag is not enabled - no dependence is considered costly;
31212 allow all dependent insns in the same group.
31213 This is the most aggressive option. */
31214 if (rs6000_sched_costly_dep == no_dep_costly)
31215 return false;
31216
31217 /* If the flag is set to 1 - a dependence is always considered costly;
31218 do not allow dependent instructions in the same group.
31219 This is the most conservative option. */
31220 if (rs6000_sched_costly_dep == all_deps_costly)
31221 return true;
31222
31223 insn = DEP_PRO (dep);
31224 next = DEP_CON (dep);
31225
31226 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31227 && is_load_insn (next, &load_mem)
31228 && is_store_insn (insn, &str_mem))
31229 /* Prevent load after store in the same group. */
31230 return true;
31231
31232 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31233 && is_load_insn (next, &load_mem)
31234 && is_store_insn (insn, &str_mem)
31235 && DEP_TYPE (dep) == REG_DEP_TRUE
31236 && mem_locations_overlap(str_mem, load_mem))
31237 /* Prevent load after store in the same group if it is a true
31238 dependence. */
31239 return true;
31240
31241 /* The flag is set to X; dependences with latency >= X are considered costly,
31242 and will not be scheduled in the same group. */
31243 if (rs6000_sched_costly_dep <= max_dep_latency
31244 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31245 return true;
31246
31247 return false;
31248 }
31249
31250 /* Return the next insn after INSN that is found before TAIL is reached,
31251 skipping any "non-active" insns - insns that will not actually occupy
31252 an issue slot. Return NULL_RTX if such an insn is not found. */
31253
31254 static rtx_insn *
31255 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31256 {
31257 if (insn == NULL_RTX || insn == tail)
31258 return NULL;
31259
31260 while (1)
31261 {
31262 insn = NEXT_INSN (insn);
31263 if (insn == NULL_RTX || insn == tail)
31264 return NULL;
31265
31266 if (CALL_P (insn)
31267 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31268 || (NONJUMP_INSN_P (insn)
31269 && GET_CODE (PATTERN (insn)) != USE
31270 && GET_CODE (PATTERN (insn)) != CLOBBER
31271 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31272 break;
31273 }
31274 return insn;
31275 }
31276
31277 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31278
31279 static int
31280 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31281 {
31282 int pos;
31283 int i;
31284 rtx_insn *tmp;
31285 enum attr_type type, type2;
31286
31287 type = get_attr_type (last_scheduled_insn);
31288
31289 /* Try to issue fixed point divides back-to-back in pairs so they will be
31290 routed to separate execution units and execute in parallel. */
31291 if (type == TYPE_DIV && divide_cnt == 0)
31292 {
31293 /* First divide has been scheduled. */
31294 divide_cnt = 1;
31295
31296 /* Scan the ready list looking for another divide, if found move it
31297 to the end of the list so it is chosen next. */
31298 pos = lastpos;
31299 while (pos >= 0)
31300 {
31301 if (recog_memoized (ready[pos]) >= 0
31302 && get_attr_type (ready[pos]) == TYPE_DIV)
31303 {
31304 tmp = ready[pos];
31305 for (i = pos; i < lastpos; i++)
31306 ready[i] = ready[i + 1];
31307 ready[lastpos] = tmp;
31308 break;
31309 }
31310 pos--;
31311 }
31312 }
31313 else
31314 {
31315 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31316 divide_cnt = 0;
31317
31318 /* The best dispatch throughput for vector and vector load insns can be
31319 achieved by interleaving a vector and vector load such that they'll
31320 dispatch to the same superslice. If this pairing cannot be achieved
31321 then it is best to pair vector insns together and vector load insns
31322 together.
31323
31324 To aid in this pairing, vec_pairing maintains the current state with
31325 the following values:
31326
31327 0 : Initial state, no vecload/vector pairing has been started.
31328
31329 1 : A vecload or vector insn has been issued and a candidate for
31330 pairing has been found and moved to the end of the ready
31331 list. */
31332 if (type == TYPE_VECLOAD)
31333 {
31334 /* Issued a vecload. */
31335 if (vec_pairing == 0)
31336 {
31337 int vecload_pos = -1;
31338 /* We issued a single vecload, look for a vector insn to pair it
31339 with. If one isn't found, try to pair another vecload. */
31340 pos = lastpos;
31341 while (pos >= 0)
31342 {
31343 if (recog_memoized (ready[pos]) >= 0)
31344 {
31345 type2 = get_attr_type (ready[pos]);
31346 if (is_power9_pairable_vec_type (type2))
31347 {
31348 /* Found a vector insn to pair with, move it to the
31349 end of the ready list so it is scheduled next. */
31350 tmp = ready[pos];
31351 for (i = pos; i < lastpos; i++)
31352 ready[i] = ready[i + 1];
31353 ready[lastpos] = tmp;
31354 vec_pairing = 1;
31355 return cached_can_issue_more;
31356 }
31357 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31358 /* Remember position of first vecload seen. */
31359 vecload_pos = pos;
31360 }
31361 pos--;
31362 }
31363 if (vecload_pos >= 0)
31364 {
31365 /* Didn't find a vector to pair with but did find a vecload,
31366 move it to the end of the ready list. */
31367 tmp = ready[vecload_pos];
31368 for (i = vecload_pos; i < lastpos; i++)
31369 ready[i] = ready[i + 1];
31370 ready[lastpos] = tmp;
31371 vec_pairing = 1;
31372 return cached_can_issue_more;
31373 }
31374 }
31375 }
31376 else if (is_power9_pairable_vec_type (type))
31377 {
31378 /* Issued a vector operation. */
31379 if (vec_pairing == 0)
31380 {
31381 int vec_pos = -1;
31382 /* We issued a single vector insn, look for a vecload to pair it
31383 with. If one isn't found, try to pair another vector. */
31384 pos = lastpos;
31385 while (pos >= 0)
31386 {
31387 if (recog_memoized (ready[pos]) >= 0)
31388 {
31389 type2 = get_attr_type (ready[pos]);
31390 if (type2 == TYPE_VECLOAD)
31391 {
31392 /* Found a vecload insn to pair with, move it to the
31393 end of the ready list so it is scheduled next. */
31394 tmp = ready[pos];
31395 for (i = pos; i < lastpos; i++)
31396 ready[i] = ready[i + 1];
31397 ready[lastpos] = tmp;
31398 vec_pairing = 1;
31399 return cached_can_issue_more;
31400 }
31401 else if (is_power9_pairable_vec_type (type2)
31402 && vec_pos == -1)
31403 /* Remember position of first vector insn seen. */
31404 vec_pos = pos;
31405 }
31406 pos--;
31407 }
31408 if (vec_pos >= 0)
31409 {
31410 /* Didn't find a vecload to pair with but did find a vector
31411 insn, move it to the end of the ready list. */
31412 tmp = ready[vec_pos];
31413 for (i = vec_pos; i < lastpos; i++)
31414 ready[i] = ready[i + 1];
31415 ready[lastpos] = tmp;
31416 vec_pairing = 1;
31417 return cached_can_issue_more;
31418 }
31419 }
31420 }
31421
31422 /* We've either finished a vec/vecload pair, couldn't find an insn to
31423 continue the current pair, or the last insn had nothing to do with
31424 with pairing. In any case, reset the state. */
31425 vec_pairing = 0;
31426 }
31427
31428 return cached_can_issue_more;
31429 }
31430
31431 /* We are about to begin issuing insns for this clock cycle. */
31432
31433 static int
31434 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31435 rtx_insn **ready ATTRIBUTE_UNUSED,
31436 int *pn_ready ATTRIBUTE_UNUSED,
31437 int clock_var ATTRIBUTE_UNUSED)
31438 {
31439 int n_ready = *pn_ready;
31440
31441 if (sched_verbose)
31442 fprintf (dump, "// rs6000_sched_reorder :\n");
31443
31444 /* Reorder the ready list, if the second to last ready insn
31445 is a nonepipeline insn. */
31446 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
31447 {
31448 if (is_nonpipeline_insn (ready[n_ready - 1])
31449 && (recog_memoized (ready[n_ready - 2]) > 0))
31450 /* Simply swap first two insns. */
31451 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31452 }
31453
31454 if (rs6000_cpu == PROCESSOR_POWER6)
31455 load_store_pendulum = 0;
31456
31457 return rs6000_issue_rate ();
31458 }
31459
31460 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31461
31462 static int
31463 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31464 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31465 {
31466 if (sched_verbose)
31467 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31468
31469 /* For Power6, we need to handle some special cases to try and keep the
31470 store queue from overflowing and triggering expensive flushes.
31471
31472 This code monitors how load and store instructions are being issued
31473 and skews the ready list one way or the other to increase the likelihood
31474 that a desired instruction is issued at the proper time.
31475
31476 A couple of things are done. First, we maintain a "load_store_pendulum"
31477 to track the current state of load/store issue.
31478
31479 - If the pendulum is at zero, then no loads or stores have been
31480 issued in the current cycle so we do nothing.
31481
31482 - If the pendulum is 1, then a single load has been issued in this
31483 cycle and we attempt to locate another load in the ready list to
31484 issue with it.
31485
31486 - If the pendulum is -2, then two stores have already been
31487 issued in this cycle, so we increase the priority of the first load
31488 in the ready list to increase it's likelihood of being chosen first
31489 in the next cycle.
31490
31491 - If the pendulum is -1, then a single store has been issued in this
31492 cycle and we attempt to locate another store in the ready list to
31493 issue with it, preferring a store to an adjacent memory location to
31494 facilitate store pairing in the store queue.
31495
31496 - If the pendulum is 2, then two loads have already been
31497 issued in this cycle, so we increase the priority of the first store
31498 in the ready list to increase it's likelihood of being chosen first
31499 in the next cycle.
31500
31501 - If the pendulum < -2 or > 2, then do nothing.
31502
31503 Note: This code covers the most common scenarios. There exist non
31504 load/store instructions which make use of the LSU and which
31505 would need to be accounted for to strictly model the behavior
31506 of the machine. Those instructions are currently unaccounted
31507 for to help minimize compile time overhead of this code.
31508 */
31509 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
31510 {
31511 int pos;
31512 int i;
31513 rtx_insn *tmp;
31514 rtx load_mem, str_mem;
31515
31516 if (is_store_insn (last_scheduled_insn, &str_mem))
31517 /* Issuing a store, swing the load_store_pendulum to the left */
31518 load_store_pendulum--;
31519 else if (is_load_insn (last_scheduled_insn, &load_mem))
31520 /* Issuing a load, swing the load_store_pendulum to the right */
31521 load_store_pendulum++;
31522 else
31523 return cached_can_issue_more;
31524
31525 /* If the pendulum is balanced, or there is only one instruction on
31526 the ready list, then all is well, so return. */
31527 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31528 return cached_can_issue_more;
31529
31530 if (load_store_pendulum == 1)
31531 {
31532 /* A load has been issued in this cycle. Scan the ready list
31533 for another load to issue with it */
31534 pos = *pn_ready-1;
31535
31536 while (pos >= 0)
31537 {
31538 if (is_load_insn (ready[pos], &load_mem))
31539 {
31540 /* Found a load. Move it to the head of the ready list,
31541 and adjust it's priority so that it is more likely to
31542 stay there */
31543 tmp = ready[pos];
31544 for (i=pos; i<*pn_ready-1; i++)
31545 ready[i] = ready[i + 1];
31546 ready[*pn_ready-1] = tmp;
31547
31548 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31549 INSN_PRIORITY (tmp)++;
31550 break;
31551 }
31552 pos--;
31553 }
31554 }
31555 else if (load_store_pendulum == -2)
31556 {
31557 /* Two stores have been issued in this cycle. Increase the
31558 priority of the first load in the ready list to favor it for
31559 issuing in the next cycle. */
31560 pos = *pn_ready-1;
31561
31562 while (pos >= 0)
31563 {
31564 if (is_load_insn (ready[pos], &load_mem)
31565 && !sel_sched_p ()
31566 && INSN_PRIORITY_KNOWN (ready[pos]))
31567 {
31568 INSN_PRIORITY (ready[pos])++;
31569
31570 /* Adjust the pendulum to account for the fact that a load
31571 was found and increased in priority. This is to prevent
31572 increasing the priority of multiple loads */
31573 load_store_pendulum--;
31574
31575 break;
31576 }
31577 pos--;
31578 }
31579 }
31580 else if (load_store_pendulum == -1)
31581 {
31582 /* A store has been issued in this cycle. Scan the ready list for
31583 another store to issue with it, preferring a store to an adjacent
31584 memory location */
31585 int first_store_pos = -1;
31586
31587 pos = *pn_ready-1;
31588
31589 while (pos >= 0)
31590 {
31591 if (is_store_insn (ready[pos], &str_mem))
31592 {
31593 rtx str_mem2;
31594 /* Maintain the index of the first store found on the
31595 list */
31596 if (first_store_pos == -1)
31597 first_store_pos = pos;
31598
31599 if (is_store_insn (last_scheduled_insn, &str_mem2)
31600 && adjacent_mem_locations (str_mem, str_mem2))
31601 {
31602 /* Found an adjacent store. Move it to the head of the
31603 ready list, and adjust it's priority so that it is
31604 more likely to stay there */
31605 tmp = ready[pos];
31606 for (i=pos; i<*pn_ready-1; i++)
31607 ready[i] = ready[i + 1];
31608 ready[*pn_ready-1] = tmp;
31609
31610 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31611 INSN_PRIORITY (tmp)++;
31612
31613 first_store_pos = -1;
31614
31615 break;
31616 };
31617 }
31618 pos--;
31619 }
31620
31621 if (first_store_pos >= 0)
31622 {
31623 /* An adjacent store wasn't found, but a non-adjacent store was,
31624 so move the non-adjacent store to the front of the ready
31625 list, and adjust its priority so that it is more likely to
31626 stay there. */
31627 tmp = ready[first_store_pos];
31628 for (i=first_store_pos; i<*pn_ready-1; i++)
31629 ready[i] = ready[i + 1];
31630 ready[*pn_ready-1] = tmp;
31631 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31632 INSN_PRIORITY (tmp)++;
31633 }
31634 }
31635 else if (load_store_pendulum == 2)
31636 {
31637 /* Two loads have been issued in this cycle. Increase the priority
31638 of the first store in the ready list to favor it for issuing in
31639 the next cycle. */
31640 pos = *pn_ready-1;
31641
31642 while (pos >= 0)
31643 {
31644 if (is_store_insn (ready[pos], &str_mem)
31645 && !sel_sched_p ()
31646 && INSN_PRIORITY_KNOWN (ready[pos]))
31647 {
31648 INSN_PRIORITY (ready[pos])++;
31649
31650 /* Adjust the pendulum to account for the fact that a store
31651 was found and increased in priority. This is to prevent
31652 increasing the priority of multiple stores */
31653 load_store_pendulum++;
31654
31655 break;
31656 }
31657 pos--;
31658 }
31659 }
31660 }
31661
31662 /* Do Power9 dependent reordering if necessary. */
31663 if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn
31664 && recog_memoized (last_scheduled_insn) >= 0)
31665 return power9_sched_reorder2 (ready, *pn_ready - 1);
31666
31667 return cached_can_issue_more;
31668 }
31669
31670 /* Return whether the presence of INSN causes a dispatch group termination
31671 of group WHICH_GROUP.
31672
31673 If WHICH_GROUP == current_group, this function will return true if INSN
31674 causes the termination of the current group (i.e, the dispatch group to
31675 which INSN belongs). This means that INSN will be the last insn in the
31676 group it belongs to.
31677
31678 If WHICH_GROUP == previous_group, this function will return true if INSN
31679 causes the termination of the previous group (i.e, the dispatch group that
31680 precedes the group to which INSN belongs). This means that INSN will be
31681 the first insn in the group it belongs to). */
31682
31683 static bool
31684 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31685 {
31686 bool first, last;
31687
31688 if (! insn)
31689 return false;
31690
31691 first = insn_must_be_first_in_group (insn);
31692 last = insn_must_be_last_in_group (insn);
31693
31694 if (first && last)
31695 return true;
31696
31697 if (which_group == current_group)
31698 return last;
31699 else if (which_group == previous_group)
31700 return first;
31701
31702 return false;
31703 }
31704
31705
31706 static bool
31707 insn_must_be_first_in_group (rtx_insn *insn)
31708 {
31709 enum attr_type type;
31710
31711 if (!insn
31712 || NOTE_P (insn)
31713 || DEBUG_INSN_P (insn)
31714 || GET_CODE (PATTERN (insn)) == USE
31715 || GET_CODE (PATTERN (insn)) == CLOBBER)
31716 return false;
31717
31718 switch (rs6000_cpu)
31719 {
31720 case PROCESSOR_POWER5:
31721 if (is_cracked_insn (insn))
31722 return true;
31723 /* FALLTHRU */
31724 case PROCESSOR_POWER4:
31725 if (is_microcoded_insn (insn))
31726 return true;
31727
31728 if (!rs6000_sched_groups)
31729 return false;
31730
31731 type = get_attr_type (insn);
31732
31733 switch (type)
31734 {
31735 case TYPE_MFCR:
31736 case TYPE_MFCRF:
31737 case TYPE_MTCR:
31738 case TYPE_DELAYED_CR:
31739 case TYPE_CR_LOGICAL:
31740 case TYPE_MTJMPR:
31741 case TYPE_MFJMPR:
31742 case TYPE_DIV:
31743 case TYPE_LOAD_L:
31744 case TYPE_STORE_C:
31745 case TYPE_ISYNC:
31746 case TYPE_SYNC:
31747 return true;
31748 default:
31749 break;
31750 }
31751 break;
31752 case PROCESSOR_POWER6:
31753 type = get_attr_type (insn);
31754
31755 switch (type)
31756 {
31757 case TYPE_EXTS:
31758 case TYPE_CNTLZ:
31759 case TYPE_TRAP:
31760 case TYPE_MUL:
31761 case TYPE_INSERT:
31762 case TYPE_FPCOMPARE:
31763 case TYPE_MFCR:
31764 case TYPE_MTCR:
31765 case TYPE_MFJMPR:
31766 case TYPE_MTJMPR:
31767 case TYPE_ISYNC:
31768 case TYPE_SYNC:
31769 case TYPE_LOAD_L:
31770 case TYPE_STORE_C:
31771 return true;
31772 case TYPE_SHIFT:
31773 if (get_attr_dot (insn) == DOT_NO
31774 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31775 return true;
31776 else
31777 break;
31778 case TYPE_DIV:
31779 if (get_attr_size (insn) == SIZE_32)
31780 return true;
31781 else
31782 break;
31783 case TYPE_LOAD:
31784 case TYPE_STORE:
31785 case TYPE_FPLOAD:
31786 case TYPE_FPSTORE:
31787 if (get_attr_update (insn) == UPDATE_YES)
31788 return true;
31789 else
31790 break;
31791 default:
31792 break;
31793 }
31794 break;
31795 case PROCESSOR_POWER7:
31796 type = get_attr_type (insn);
31797
31798 switch (type)
31799 {
31800 case TYPE_CR_LOGICAL:
31801 case TYPE_MFCR:
31802 case TYPE_MFCRF:
31803 case TYPE_MTCR:
31804 case TYPE_DIV:
31805 case TYPE_ISYNC:
31806 case TYPE_LOAD_L:
31807 case TYPE_STORE_C:
31808 case TYPE_MFJMPR:
31809 case TYPE_MTJMPR:
31810 return true;
31811 case TYPE_MUL:
31812 case TYPE_SHIFT:
31813 case TYPE_EXTS:
31814 if (get_attr_dot (insn) == DOT_YES)
31815 return true;
31816 else
31817 break;
31818 case TYPE_LOAD:
31819 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31820 || get_attr_update (insn) == UPDATE_YES)
31821 return true;
31822 else
31823 break;
31824 case TYPE_STORE:
31825 case TYPE_FPLOAD:
31826 case TYPE_FPSTORE:
31827 if (get_attr_update (insn) == UPDATE_YES)
31828 return true;
31829 else
31830 break;
31831 default:
31832 break;
31833 }
31834 break;
31835 case PROCESSOR_POWER8:
31836 type = get_attr_type (insn);
31837
31838 switch (type)
31839 {
31840 case TYPE_CR_LOGICAL:
31841 case TYPE_DELAYED_CR:
31842 case TYPE_MFCR:
31843 case TYPE_MFCRF:
31844 case TYPE_MTCR:
31845 case TYPE_SYNC:
31846 case TYPE_ISYNC:
31847 case TYPE_LOAD_L:
31848 case TYPE_STORE_C:
31849 case TYPE_VECSTORE:
31850 case TYPE_MFJMPR:
31851 case TYPE_MTJMPR:
31852 return true;
31853 case TYPE_SHIFT:
31854 case TYPE_EXTS:
31855 case TYPE_MUL:
31856 if (get_attr_dot (insn) == DOT_YES)
31857 return true;
31858 else
31859 break;
31860 case TYPE_LOAD:
31861 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31862 || get_attr_update (insn) == UPDATE_YES)
31863 return true;
31864 else
31865 break;
31866 case TYPE_STORE:
31867 if (get_attr_update (insn) == UPDATE_YES
31868 && get_attr_indexed (insn) == INDEXED_YES)
31869 return true;
31870 else
31871 break;
31872 default:
31873 break;
31874 }
31875 break;
31876 default:
31877 break;
31878 }
31879
31880 return false;
31881 }
31882
31883 static bool
31884 insn_must_be_last_in_group (rtx_insn *insn)
31885 {
31886 enum attr_type type;
31887
31888 if (!insn
31889 || NOTE_P (insn)
31890 || DEBUG_INSN_P (insn)
31891 || GET_CODE (PATTERN (insn)) == USE
31892 || GET_CODE (PATTERN (insn)) == CLOBBER)
31893 return false;
31894
31895 switch (rs6000_cpu) {
31896 case PROCESSOR_POWER4:
31897 case PROCESSOR_POWER5:
31898 if (is_microcoded_insn (insn))
31899 return true;
31900
31901 if (is_branch_slot_insn (insn))
31902 return true;
31903
31904 break;
31905 case PROCESSOR_POWER6:
31906 type = get_attr_type (insn);
31907
31908 switch (type)
31909 {
31910 case TYPE_EXTS:
31911 case TYPE_CNTLZ:
31912 case TYPE_TRAP:
31913 case TYPE_MUL:
31914 case TYPE_FPCOMPARE:
31915 case TYPE_MFCR:
31916 case TYPE_MTCR:
31917 case TYPE_MFJMPR:
31918 case TYPE_MTJMPR:
31919 case TYPE_ISYNC:
31920 case TYPE_SYNC:
31921 case TYPE_LOAD_L:
31922 case TYPE_STORE_C:
31923 return true;
31924 case TYPE_SHIFT:
31925 if (get_attr_dot (insn) == DOT_NO
31926 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31927 return true;
31928 else
31929 break;
31930 case TYPE_DIV:
31931 if (get_attr_size (insn) == SIZE_32)
31932 return true;
31933 else
31934 break;
31935 default:
31936 break;
31937 }
31938 break;
31939 case PROCESSOR_POWER7:
31940 type = get_attr_type (insn);
31941
31942 switch (type)
31943 {
31944 case TYPE_ISYNC:
31945 case TYPE_SYNC:
31946 case TYPE_LOAD_L:
31947 case TYPE_STORE_C:
31948 return true;
31949 case TYPE_LOAD:
31950 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31951 && get_attr_update (insn) == UPDATE_YES)
31952 return true;
31953 else
31954 break;
31955 case TYPE_STORE:
31956 if (get_attr_update (insn) == UPDATE_YES
31957 && get_attr_indexed (insn) == INDEXED_YES)
31958 return true;
31959 else
31960 break;
31961 default:
31962 break;
31963 }
31964 break;
31965 case PROCESSOR_POWER8:
31966 type = get_attr_type (insn);
31967
31968 switch (type)
31969 {
31970 case TYPE_MFCR:
31971 case TYPE_MTCR:
31972 case TYPE_ISYNC:
31973 case TYPE_SYNC:
31974 case TYPE_LOAD_L:
31975 case TYPE_STORE_C:
31976 return true;
31977 case TYPE_LOAD:
31978 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31979 && get_attr_update (insn) == UPDATE_YES)
31980 return true;
31981 else
31982 break;
31983 case TYPE_STORE:
31984 if (get_attr_update (insn) == UPDATE_YES
31985 && get_attr_indexed (insn) == INDEXED_YES)
31986 return true;
31987 else
31988 break;
31989 default:
31990 break;
31991 }
31992 break;
31993 default:
31994 break;
31995 }
31996
31997 return false;
31998 }
31999
32000 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32001 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32002
32003 static bool
32004 is_costly_group (rtx *group_insns, rtx next_insn)
32005 {
32006 int i;
32007 int issue_rate = rs6000_issue_rate ();
32008
32009 for (i = 0; i < issue_rate; i++)
32010 {
32011 sd_iterator_def sd_it;
32012 dep_t dep;
32013 rtx insn = group_insns[i];
32014
32015 if (!insn)
32016 continue;
32017
32018 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32019 {
32020 rtx next = DEP_CON (dep);
32021
32022 if (next == next_insn
32023 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32024 return true;
32025 }
32026 }
32027
32028 return false;
32029 }
32030
32031 /* Utility of the function redefine_groups.
32032 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32033 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32034 to keep it "far" (in a separate group) from GROUP_INSNS, following
32035 one of the following schemes, depending on the value of the flag
32036 -minsert_sched_nops = X:
32037 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32038 in order to force NEXT_INSN into a separate group.
32039 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32040 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32041 insertion (has a group just ended, how many vacant issue slots remain in the
32042 last group, and how many dispatch groups were encountered so far). */
32043
32044 static int
32045 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32046 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32047 int *group_count)
32048 {
32049 rtx nop;
32050 bool force;
32051 int issue_rate = rs6000_issue_rate ();
32052 bool end = *group_end;
32053 int i;
32054
32055 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32056 return can_issue_more;
32057
32058 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32059 return can_issue_more;
32060
32061 force = is_costly_group (group_insns, next_insn);
32062 if (!force)
32063 return can_issue_more;
32064
32065 if (sched_verbose > 6)
32066 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32067 *group_count ,can_issue_more);
32068
32069 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32070 {
32071 if (*group_end)
32072 can_issue_more = 0;
32073
32074 /* Since only a branch can be issued in the last issue_slot, it is
32075 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32076 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32077 in this case the last nop will start a new group and the branch
32078 will be forced to the new group. */
32079 if (can_issue_more && !is_branch_slot_insn (next_insn))
32080 can_issue_more--;
32081
32082 /* Do we have a special group ending nop? */
32083 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
32084 || rs6000_cpu_attr == CPU_POWER8)
32085 {
32086 nop = gen_group_ending_nop ();
32087 emit_insn_before (nop, next_insn);
32088 can_issue_more = 0;
32089 }
32090 else
32091 while (can_issue_more > 0)
32092 {
32093 nop = gen_nop ();
32094 emit_insn_before (nop, next_insn);
32095 can_issue_more--;
32096 }
32097
32098 *group_end = true;
32099 return 0;
32100 }
32101
32102 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32103 {
32104 int n_nops = rs6000_sched_insert_nops;
32105
32106 /* Nops can't be issued from the branch slot, so the effective
32107 issue_rate for nops is 'issue_rate - 1'. */
32108 if (can_issue_more == 0)
32109 can_issue_more = issue_rate;
32110 can_issue_more--;
32111 if (can_issue_more == 0)
32112 {
32113 can_issue_more = issue_rate - 1;
32114 (*group_count)++;
32115 end = true;
32116 for (i = 0; i < issue_rate; i++)
32117 {
32118 group_insns[i] = 0;
32119 }
32120 }
32121
32122 while (n_nops > 0)
32123 {
32124 nop = gen_nop ();
32125 emit_insn_before (nop, next_insn);
32126 if (can_issue_more == issue_rate - 1) /* new group begins */
32127 end = false;
32128 can_issue_more--;
32129 if (can_issue_more == 0)
32130 {
32131 can_issue_more = issue_rate - 1;
32132 (*group_count)++;
32133 end = true;
32134 for (i = 0; i < issue_rate; i++)
32135 {
32136 group_insns[i] = 0;
32137 }
32138 }
32139 n_nops--;
32140 }
32141
32142 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32143 can_issue_more++;
32144
32145 /* Is next_insn going to start a new group? */
32146 *group_end
32147 = (end
32148 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32149 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32150 || (can_issue_more < issue_rate &&
32151 insn_terminates_group_p (next_insn, previous_group)));
32152 if (*group_end && end)
32153 (*group_count)--;
32154
32155 if (sched_verbose > 6)
32156 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32157 *group_count, can_issue_more);
32158 return can_issue_more;
32159 }
32160
32161 return can_issue_more;
32162 }
32163
32164 /* This function tries to synch the dispatch groups that the compiler "sees"
32165 with the dispatch groups that the processor dispatcher is expected to
32166 form in practice. It tries to achieve this synchronization by forcing the
32167 estimated processor grouping on the compiler (as opposed to the function
32168 'pad_goups' which tries to force the scheduler's grouping on the processor).
32169
32170 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32171 examines the (estimated) dispatch groups that will be formed by the processor
32172 dispatcher. It marks these group boundaries to reflect the estimated
32173 processor grouping, overriding the grouping that the scheduler had marked.
32174 Depending on the value of the flag '-minsert-sched-nops' this function can
32175 force certain insns into separate groups or force a certain distance between
32176 them by inserting nops, for example, if there exists a "costly dependence"
32177 between the insns.
32178
32179 The function estimates the group boundaries that the processor will form as
32180 follows: It keeps track of how many vacant issue slots are available after
32181 each insn. A subsequent insn will start a new group if one of the following
32182 4 cases applies:
32183 - no more vacant issue slots remain in the current dispatch group.
32184 - only the last issue slot, which is the branch slot, is vacant, but the next
32185 insn is not a branch.
32186 - only the last 2 or less issue slots, including the branch slot, are vacant,
32187 which means that a cracked insn (which occupies two issue slots) can't be
32188 issued in this group.
32189 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32190 start a new group. */
32191
32192 static int
32193 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32194 rtx_insn *tail)
32195 {
32196 rtx_insn *insn, *next_insn;
32197 int issue_rate;
32198 int can_issue_more;
32199 int slot, i;
32200 bool group_end;
32201 int group_count = 0;
32202 rtx *group_insns;
32203
32204 /* Initialize. */
32205 issue_rate = rs6000_issue_rate ();
32206 group_insns = XALLOCAVEC (rtx, issue_rate);
32207 for (i = 0; i < issue_rate; i++)
32208 {
32209 group_insns[i] = 0;
32210 }
32211 can_issue_more = issue_rate;
32212 slot = 0;
32213 insn = get_next_active_insn (prev_head_insn, tail);
32214 group_end = false;
32215
32216 while (insn != NULL_RTX)
32217 {
32218 slot = (issue_rate - can_issue_more);
32219 group_insns[slot] = insn;
32220 can_issue_more =
32221 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32222 if (insn_terminates_group_p (insn, current_group))
32223 can_issue_more = 0;
32224
32225 next_insn = get_next_active_insn (insn, tail);
32226 if (next_insn == NULL_RTX)
32227 return group_count + 1;
32228
32229 /* Is next_insn going to start a new group? */
32230 group_end
32231 = (can_issue_more == 0
32232 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32233 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32234 || (can_issue_more < issue_rate &&
32235 insn_terminates_group_p (next_insn, previous_group)));
32236
32237 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32238 next_insn, &group_end, can_issue_more,
32239 &group_count);
32240
32241 if (group_end)
32242 {
32243 group_count++;
32244 can_issue_more = 0;
32245 for (i = 0; i < issue_rate; i++)
32246 {
32247 group_insns[i] = 0;
32248 }
32249 }
32250
32251 if (GET_MODE (next_insn) == TImode && can_issue_more)
32252 PUT_MODE (next_insn, VOIDmode);
32253 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32254 PUT_MODE (next_insn, TImode);
32255
32256 insn = next_insn;
32257 if (can_issue_more == 0)
32258 can_issue_more = issue_rate;
32259 } /* while */
32260
32261 return group_count;
32262 }
32263
32264 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32265 dispatch group boundaries that the scheduler had marked. Pad with nops
32266 any dispatch groups which have vacant issue slots, in order to force the
32267 scheduler's grouping on the processor dispatcher. The function
32268 returns the number of dispatch groups found. */
32269
32270 static int
32271 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32272 rtx_insn *tail)
32273 {
32274 rtx_insn *insn, *next_insn;
32275 rtx nop;
32276 int issue_rate;
32277 int can_issue_more;
32278 int group_end;
32279 int group_count = 0;
32280
32281 /* Initialize issue_rate. */
32282 issue_rate = rs6000_issue_rate ();
32283 can_issue_more = issue_rate;
32284
32285 insn = get_next_active_insn (prev_head_insn, tail);
32286 next_insn = get_next_active_insn (insn, tail);
32287
32288 while (insn != NULL_RTX)
32289 {
32290 can_issue_more =
32291 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32292
32293 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32294
32295 if (next_insn == NULL_RTX)
32296 break;
32297
32298 if (group_end)
32299 {
32300 /* If the scheduler had marked group termination at this location
32301 (between insn and next_insn), and neither insn nor next_insn will
32302 force group termination, pad the group with nops to force group
32303 termination. */
32304 if (can_issue_more
32305 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32306 && !insn_terminates_group_p (insn, current_group)
32307 && !insn_terminates_group_p (next_insn, previous_group))
32308 {
32309 if (!is_branch_slot_insn (next_insn))
32310 can_issue_more--;
32311
32312 while (can_issue_more)
32313 {
32314 nop = gen_nop ();
32315 emit_insn_before (nop, next_insn);
32316 can_issue_more--;
32317 }
32318 }
32319
32320 can_issue_more = issue_rate;
32321 group_count++;
32322 }
32323
32324 insn = next_insn;
32325 next_insn = get_next_active_insn (insn, tail);
32326 }
32327
32328 return group_count;
32329 }
32330
32331 /* We're beginning a new block. Initialize data structures as necessary. */
32332
32333 static void
32334 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32335 int sched_verbose ATTRIBUTE_UNUSED,
32336 int max_ready ATTRIBUTE_UNUSED)
32337 {
32338 last_scheduled_insn = NULL;
32339 load_store_pendulum = 0;
32340 divide_cnt = 0;
32341 vec_pairing = 0;
32342 }
32343
32344 /* The following function is called at the end of scheduling BB.
32345 After reload, it inserts nops at insn group bundling. */
32346
32347 static void
32348 rs6000_sched_finish (FILE *dump, int sched_verbose)
32349 {
32350 int n_groups;
32351
32352 if (sched_verbose)
32353 fprintf (dump, "=== Finishing schedule.\n");
32354
32355 if (reload_completed && rs6000_sched_groups)
32356 {
32357 /* Do not run sched_finish hook when selective scheduling enabled. */
32358 if (sel_sched_p ())
32359 return;
32360
32361 if (rs6000_sched_insert_nops == sched_finish_none)
32362 return;
32363
32364 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32365 n_groups = pad_groups (dump, sched_verbose,
32366 current_sched_info->prev_head,
32367 current_sched_info->next_tail);
32368 else
32369 n_groups = redefine_groups (dump, sched_verbose,
32370 current_sched_info->prev_head,
32371 current_sched_info->next_tail);
32372
32373 if (sched_verbose >= 6)
32374 {
32375 fprintf (dump, "ngroups = %d\n", n_groups);
32376 print_rtl (dump, current_sched_info->prev_head);
32377 fprintf (dump, "Done finish_sched\n");
32378 }
32379 }
32380 }
32381
32382 struct rs6000_sched_context
32383 {
32384 short cached_can_issue_more;
32385 rtx_insn *last_scheduled_insn;
32386 int load_store_pendulum;
32387 int divide_cnt;
32388 int vec_pairing;
32389 };
32390
32391 typedef struct rs6000_sched_context rs6000_sched_context_def;
32392 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32393
32394 /* Allocate store for new scheduling context. */
32395 static void *
32396 rs6000_alloc_sched_context (void)
32397 {
32398 return xmalloc (sizeof (rs6000_sched_context_def));
32399 }
32400
32401 /* If CLEAN_P is true then initializes _SC with clean data,
32402 and from the global context otherwise. */
32403 static void
32404 rs6000_init_sched_context (void *_sc, bool clean_p)
32405 {
32406 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32407
32408 if (clean_p)
32409 {
32410 sc->cached_can_issue_more = 0;
32411 sc->last_scheduled_insn = NULL;
32412 sc->load_store_pendulum = 0;
32413 sc->divide_cnt = 0;
32414 sc->vec_pairing = 0;
32415 }
32416 else
32417 {
32418 sc->cached_can_issue_more = cached_can_issue_more;
32419 sc->last_scheduled_insn = last_scheduled_insn;
32420 sc->load_store_pendulum = load_store_pendulum;
32421 sc->divide_cnt = divide_cnt;
32422 sc->vec_pairing = vec_pairing;
32423 }
32424 }
32425
32426 /* Sets the global scheduling context to the one pointed to by _SC. */
32427 static void
32428 rs6000_set_sched_context (void *_sc)
32429 {
32430 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32431
32432 gcc_assert (sc != NULL);
32433
32434 cached_can_issue_more = sc->cached_can_issue_more;
32435 last_scheduled_insn = sc->last_scheduled_insn;
32436 load_store_pendulum = sc->load_store_pendulum;
32437 divide_cnt = sc->divide_cnt;
32438 vec_pairing = sc->vec_pairing;
32439 }
32440
32441 /* Free _SC. */
32442 static void
32443 rs6000_free_sched_context (void *_sc)
32444 {
32445 gcc_assert (_sc != NULL);
32446
32447 free (_sc);
32448 }
32449
32450 static bool
32451 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32452 {
32453 switch (get_attr_type (insn))
32454 {
32455 case TYPE_DIV:
32456 case TYPE_SDIV:
32457 case TYPE_DDIV:
32458 case TYPE_VECDIV:
32459 case TYPE_SSQRT:
32460 case TYPE_DSQRT:
32461 return false;
32462
32463 default:
32464 return true;
32465 }
32466 }
32467 \f
32468 /* Length in units of the trampoline for entering a nested function. */
32469
32470 int
32471 rs6000_trampoline_size (void)
32472 {
32473 int ret = 0;
32474
32475 switch (DEFAULT_ABI)
32476 {
32477 default:
32478 gcc_unreachable ();
32479
32480 case ABI_AIX:
32481 ret = (TARGET_32BIT) ? 12 : 24;
32482 break;
32483
32484 case ABI_ELFv2:
32485 gcc_assert (!TARGET_32BIT);
32486 ret = 32;
32487 break;
32488
32489 case ABI_DARWIN:
32490 case ABI_V4:
32491 ret = (TARGET_32BIT) ? 40 : 48;
32492 break;
32493 }
32494
32495 return ret;
32496 }
32497
32498 /* Emit RTL insns to initialize the variable parts of a trampoline.
32499 FNADDR is an RTX for the address of the function's pure code.
32500 CXT is an RTX for the static chain value for the function. */
32501
32502 static void
32503 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32504 {
32505 int regsize = (TARGET_32BIT) ? 4 : 8;
32506 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32507 rtx ctx_reg = force_reg (Pmode, cxt);
32508 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32509
32510 switch (DEFAULT_ABI)
32511 {
32512 default:
32513 gcc_unreachable ();
32514
32515 /* Under AIX, just build the 3 word function descriptor */
32516 case ABI_AIX:
32517 {
32518 rtx fnmem, fn_reg, toc_reg;
32519
32520 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32521 error ("You cannot take the address of a nested function if you use "
32522 "the -mno-pointers-to-nested-functions option.");
32523
32524 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32525 fn_reg = gen_reg_rtx (Pmode);
32526 toc_reg = gen_reg_rtx (Pmode);
32527
32528 /* Macro to shorten the code expansions below. */
32529 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32530
32531 m_tramp = replace_equiv_address (m_tramp, addr);
32532
32533 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32534 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32535 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32536 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32537 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32538
32539 # undef MEM_PLUS
32540 }
32541 break;
32542
32543 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32544 case ABI_ELFv2:
32545 case ABI_DARWIN:
32546 case ABI_V4:
32547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32548 LCT_NORMAL, VOIDmode, 4,
32549 addr, Pmode,
32550 GEN_INT (rs6000_trampoline_size ()), SImode,
32551 fnaddr, Pmode,
32552 ctx_reg, Pmode);
32553 break;
32554 }
32555 }
32556
32557 \f
32558 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32559 identifier as an argument, so the front end shouldn't look it up. */
32560
32561 static bool
32562 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32563 {
32564 return is_attribute_p ("altivec", attr_id);
32565 }
32566
32567 /* Handle the "altivec" attribute. The attribute may have
32568 arguments as follows:
32569
32570 __attribute__((altivec(vector__)))
32571 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32572 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32573
32574 and may appear more than once (e.g., 'vector bool char') in a
32575 given declaration. */
32576
32577 static tree
32578 rs6000_handle_altivec_attribute (tree *node,
32579 tree name ATTRIBUTE_UNUSED,
32580 tree args,
32581 int flags ATTRIBUTE_UNUSED,
32582 bool *no_add_attrs)
32583 {
32584 tree type = *node, result = NULL_TREE;
32585 machine_mode mode;
32586 int unsigned_p;
32587 char altivec_type
32588 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32589 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32590 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32591 : '?');
32592
32593 while (POINTER_TYPE_P (type)
32594 || TREE_CODE (type) == FUNCTION_TYPE
32595 || TREE_CODE (type) == METHOD_TYPE
32596 || TREE_CODE (type) == ARRAY_TYPE)
32597 type = TREE_TYPE (type);
32598
32599 mode = TYPE_MODE (type);
32600
32601 /* Check for invalid AltiVec type qualifiers. */
32602 if (type == long_double_type_node)
32603 error ("use of %<long double%> in AltiVec types is invalid");
32604 else if (type == boolean_type_node)
32605 error ("use of boolean types in AltiVec types is invalid");
32606 else if (TREE_CODE (type) == COMPLEX_TYPE)
32607 error ("use of %<complex%> in AltiVec types is invalid");
32608 else if (DECIMAL_FLOAT_MODE_P (mode))
32609 error ("use of decimal floating point types in AltiVec types is invalid");
32610 else if (!TARGET_VSX)
32611 {
32612 if (type == long_unsigned_type_node || type == long_integer_type_node)
32613 {
32614 if (TARGET_64BIT)
32615 error ("use of %<long%> in AltiVec types is invalid for "
32616 "64-bit code without -mvsx");
32617 else if (rs6000_warn_altivec_long)
32618 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32619 "use %<int%>");
32620 }
32621 else if (type == long_long_unsigned_type_node
32622 || type == long_long_integer_type_node)
32623 error ("use of %<long long%> in AltiVec types is invalid without "
32624 "-mvsx");
32625 else if (type == double_type_node)
32626 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
32627 }
32628
32629 switch (altivec_type)
32630 {
32631 case 'v':
32632 unsigned_p = TYPE_UNSIGNED (type);
32633 switch (mode)
32634 {
32635 case TImode:
32636 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32637 break;
32638 case DImode:
32639 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32640 break;
32641 case SImode:
32642 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32643 break;
32644 case HImode:
32645 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32646 break;
32647 case QImode:
32648 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32649 break;
32650 case SFmode: result = V4SF_type_node; break;
32651 case DFmode: result = V2DF_type_node; break;
32652 /* If the user says 'vector int bool', we may be handed the 'bool'
32653 attribute _before_ the 'vector' attribute, and so select the
32654 proper type in the 'b' case below. */
32655 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
32656 case V2DImode: case V2DFmode:
32657 result = type;
32658 default: break;
32659 }
32660 break;
32661 case 'b':
32662 switch (mode)
32663 {
32664 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
32665 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
32666 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
32667 case QImode: case V16QImode: result = bool_V16QI_type_node;
32668 default: break;
32669 }
32670 break;
32671 case 'p':
32672 switch (mode)
32673 {
32674 case V8HImode: result = pixel_V8HI_type_node;
32675 default: break;
32676 }
32677 default: break;
32678 }
32679
32680 /* Propagate qualifiers attached to the element type
32681 onto the vector type. */
32682 if (result && result != type && TYPE_QUALS (type))
32683 result = build_qualified_type (result, TYPE_QUALS (type));
32684
32685 *no_add_attrs = true; /* No need to hang on to the attribute. */
32686
32687 if (result)
32688 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32689
32690 return NULL_TREE;
32691 }
32692
32693 /* AltiVec defines four built-in scalar types that serve as vector
32694 elements; we must teach the compiler how to mangle them. */
32695
32696 static const char *
32697 rs6000_mangle_type (const_tree type)
32698 {
32699 type = TYPE_MAIN_VARIANT (type);
32700
32701 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32702 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32703 return NULL;
32704
32705 if (type == bool_char_type_node) return "U6__boolc";
32706 if (type == bool_short_type_node) return "U6__bools";
32707 if (type == pixel_type_node) return "u7__pixel";
32708 if (type == bool_int_type_node) return "U6__booli";
32709 if (type == bool_long_type_node) return "U6__booll";
32710
32711 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32712 "g" for IBM extended double, no matter whether it is long double (using
32713 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32714 if (TARGET_FLOAT128_TYPE)
32715 {
32716 if (type == ieee128_float_type_node)
32717 return "U10__float128";
32718
32719 if (type == ibm128_float_type_node)
32720 return "g";
32721
32722 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
32723 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32724 }
32725
32726 /* Mangle IBM extended float long double as `g' (__float128) on
32727 powerpc*-linux where long-double-64 previously was the default. */
32728 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32729 && TARGET_ELF
32730 && TARGET_LONG_DOUBLE_128
32731 && !TARGET_IEEEQUAD)
32732 return "g";
32733
32734 /* For all other types, use normal C++ mangling. */
32735 return NULL;
32736 }
32737
32738 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32739 struct attribute_spec.handler. */
32740
32741 static tree
32742 rs6000_handle_longcall_attribute (tree *node, tree name,
32743 tree args ATTRIBUTE_UNUSED,
32744 int flags ATTRIBUTE_UNUSED,
32745 bool *no_add_attrs)
32746 {
32747 if (TREE_CODE (*node) != FUNCTION_TYPE
32748 && TREE_CODE (*node) != FIELD_DECL
32749 && TREE_CODE (*node) != TYPE_DECL)
32750 {
32751 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32752 name);
32753 *no_add_attrs = true;
32754 }
32755
32756 return NULL_TREE;
32757 }
32758
32759 /* Set longcall attributes on all functions declared when
32760 rs6000_default_long_calls is true. */
32761 static void
32762 rs6000_set_default_type_attributes (tree type)
32763 {
32764 if (rs6000_default_long_calls
32765 && (TREE_CODE (type) == FUNCTION_TYPE
32766 || TREE_CODE (type) == METHOD_TYPE))
32767 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32768 NULL_TREE,
32769 TYPE_ATTRIBUTES (type));
32770
32771 #if TARGET_MACHO
32772 darwin_set_default_type_attributes (type);
32773 #endif
32774 }
32775
32776 /* Return a reference suitable for calling a function with the
32777 longcall attribute. */
32778
32779 rtx
32780 rs6000_longcall_ref (rtx call_ref)
32781 {
32782 const char *call_name;
32783 tree node;
32784
32785 if (GET_CODE (call_ref) != SYMBOL_REF)
32786 return call_ref;
32787
32788 /* System V adds '.' to the internal name, so skip them. */
32789 call_name = XSTR (call_ref, 0);
32790 if (*call_name == '.')
32791 {
32792 while (*call_name == '.')
32793 call_name++;
32794
32795 node = get_identifier (call_name);
32796 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32797 }
32798
32799 return force_reg (Pmode, call_ref);
32800 }
32801 \f
32802 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32803 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32804 #endif
32805
32806 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32807 struct attribute_spec.handler. */
32808 static tree
32809 rs6000_handle_struct_attribute (tree *node, tree name,
32810 tree args ATTRIBUTE_UNUSED,
32811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32812 {
32813 tree *type = NULL;
32814 if (DECL_P (*node))
32815 {
32816 if (TREE_CODE (*node) == TYPE_DECL)
32817 type = &TREE_TYPE (*node);
32818 }
32819 else
32820 type = node;
32821
32822 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32823 || TREE_CODE (*type) == UNION_TYPE)))
32824 {
32825 warning (OPT_Wattributes, "%qE attribute ignored", name);
32826 *no_add_attrs = true;
32827 }
32828
32829 else if ((is_attribute_p ("ms_struct", name)
32830 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32831 || ((is_attribute_p ("gcc_struct", name)
32832 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32833 {
32834 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32835 name);
32836 *no_add_attrs = true;
32837 }
32838
32839 return NULL_TREE;
32840 }
32841
32842 static bool
32843 rs6000_ms_bitfield_layout_p (const_tree record_type)
32844 {
32845 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32846 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32847 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32848 }
32849 \f
32850 #ifdef USING_ELFOS_H
32851
32852 /* A get_unnamed_section callback, used for switching to toc_section. */
32853
32854 static void
32855 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32856 {
32857 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32858 && TARGET_MINIMAL_TOC)
32859 {
32860 if (!toc_initialized)
32861 {
32862 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32863 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32864 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32865 fprintf (asm_out_file, "\t.tc ");
32866 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32867 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32868 fprintf (asm_out_file, "\n");
32869
32870 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32871 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32872 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32873 fprintf (asm_out_file, " = .+32768\n");
32874 toc_initialized = 1;
32875 }
32876 else
32877 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32878 }
32879 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32880 {
32881 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32882 if (!toc_initialized)
32883 {
32884 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32885 toc_initialized = 1;
32886 }
32887 }
32888 else
32889 {
32890 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32891 if (!toc_initialized)
32892 {
32893 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32894 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32895 fprintf (asm_out_file, " = .+32768\n");
32896 toc_initialized = 1;
32897 }
32898 }
32899 }
32900
32901 /* Implement TARGET_ASM_INIT_SECTIONS. */
32902
32903 static void
32904 rs6000_elf_asm_init_sections (void)
32905 {
32906 toc_section
32907 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32908
32909 sdata2_section
32910 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32911 SDATA2_SECTION_ASM_OP);
32912 }
32913
32914 /* Implement TARGET_SELECT_RTX_SECTION. */
32915
32916 static section *
32917 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32918 unsigned HOST_WIDE_INT align)
32919 {
32920 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32921 return toc_section;
32922 else
32923 return default_elf_select_rtx_section (mode, x, align);
32924 }
32925 \f
32926 /* For a SYMBOL_REF, set generic flags and then perform some
32927 target-specific processing.
32928
32929 When the AIX ABI is requested on a non-AIX system, replace the
32930 function name with the real name (with a leading .) rather than the
32931 function descriptor name. This saves a lot of overriding code to
32932 read the prefixes. */
32933
32934 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32935 static void
32936 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32937 {
32938 default_encode_section_info (decl, rtl, first);
32939
32940 if (first
32941 && TREE_CODE (decl) == FUNCTION_DECL
32942 && !TARGET_AIX
32943 && DEFAULT_ABI == ABI_AIX)
32944 {
32945 rtx sym_ref = XEXP (rtl, 0);
32946 size_t len = strlen (XSTR (sym_ref, 0));
32947 char *str = XALLOCAVEC (char, len + 2);
32948 str[0] = '.';
32949 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32950 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32951 }
32952 }
32953
32954 static inline bool
32955 compare_section_name (const char *section, const char *templ)
32956 {
32957 int len;
32958
32959 len = strlen (templ);
32960 return (strncmp (section, templ, len) == 0
32961 && (section[len] == 0 || section[len] == '.'));
32962 }
32963
32964 bool
32965 rs6000_elf_in_small_data_p (const_tree decl)
32966 {
32967 if (rs6000_sdata == SDATA_NONE)
32968 return false;
32969
32970 /* We want to merge strings, so we never consider them small data. */
32971 if (TREE_CODE (decl) == STRING_CST)
32972 return false;
32973
32974 /* Functions are never in the small data area. */
32975 if (TREE_CODE (decl) == FUNCTION_DECL)
32976 return false;
32977
32978 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32979 {
32980 const char *section = DECL_SECTION_NAME (decl);
32981 if (compare_section_name (section, ".sdata")
32982 || compare_section_name (section, ".sdata2")
32983 || compare_section_name (section, ".gnu.linkonce.s")
32984 || compare_section_name (section, ".sbss")
32985 || compare_section_name (section, ".sbss2")
32986 || compare_section_name (section, ".gnu.linkonce.sb")
32987 || strcmp (section, ".PPC.EMB.sdata0") == 0
32988 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32989 return true;
32990 }
32991 else
32992 {
32993 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32994
32995 if (size > 0
32996 && size <= g_switch_value
32997 /* If it's not public, and we're not going to reference it there,
32998 there's no need to put it in the small data section. */
32999 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33000 return true;
33001 }
33002
33003 return false;
33004 }
33005
33006 #endif /* USING_ELFOS_H */
33007 \f
33008 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33009
33010 static bool
33011 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33012 {
33013 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33014 }
33015
33016 /* Do not place thread-local symbols refs in the object blocks. */
33017
33018 static bool
33019 rs6000_use_blocks_for_decl_p (const_tree decl)
33020 {
33021 return !DECL_THREAD_LOCAL_P (decl);
33022 }
33023 \f
33024 /* Return a REG that occurs in ADDR with coefficient 1.
33025 ADDR can be effectively incremented by incrementing REG.
33026
33027 r0 is special and we must not select it as an address
33028 register by this routine since our caller will try to
33029 increment the returned register via an "la" instruction. */
33030
33031 rtx
33032 find_addr_reg (rtx addr)
33033 {
33034 while (GET_CODE (addr) == PLUS)
33035 {
33036 if (GET_CODE (XEXP (addr, 0)) == REG
33037 && REGNO (XEXP (addr, 0)) != 0)
33038 addr = XEXP (addr, 0);
33039 else if (GET_CODE (XEXP (addr, 1)) == REG
33040 && REGNO (XEXP (addr, 1)) != 0)
33041 addr = XEXP (addr, 1);
33042 else if (CONSTANT_P (XEXP (addr, 0)))
33043 addr = XEXP (addr, 1);
33044 else if (CONSTANT_P (XEXP (addr, 1)))
33045 addr = XEXP (addr, 0);
33046 else
33047 gcc_unreachable ();
33048 }
33049 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
33050 return addr;
33051 }
33052
33053 void
33054 rs6000_fatal_bad_address (rtx op)
33055 {
33056 fatal_insn ("bad address", op);
33057 }
33058
33059 #if TARGET_MACHO
33060
33061 typedef struct branch_island_d {
33062 tree function_name;
33063 tree label_name;
33064 int line_number;
33065 } branch_island;
33066
33067
33068 static vec<branch_island, va_gc> *branch_islands;
33069
33070 /* Remember to generate a branch island for far calls to the given
33071 function. */
33072
33073 static void
33074 add_compiler_branch_island (tree label_name, tree function_name,
33075 int line_number)
33076 {
33077 branch_island bi = {function_name, label_name, line_number};
33078 vec_safe_push (branch_islands, bi);
33079 }
33080
33081 /* Generate far-jump branch islands for everything recorded in
33082 branch_islands. Invoked immediately after the last instruction of
33083 the epilogue has been emitted; the branch islands must be appended
33084 to, and contiguous with, the function body. Mach-O stubs are
33085 generated in machopic_output_stub(). */
33086
33087 static void
33088 macho_branch_islands (void)
33089 {
33090 char tmp_buf[512];
33091
33092 while (!vec_safe_is_empty (branch_islands))
33093 {
33094 branch_island *bi = &branch_islands->last ();
33095 const char *label = IDENTIFIER_POINTER (bi->label_name);
33096 const char *name = IDENTIFIER_POINTER (bi->function_name);
33097 char name_buf[512];
33098 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33099 if (name[0] == '*' || name[0] == '&')
33100 strcpy (name_buf, name+1);
33101 else
33102 {
33103 name_buf[0] = '_';
33104 strcpy (name_buf+1, name);
33105 }
33106 strcpy (tmp_buf, "\n");
33107 strcat (tmp_buf, label);
33108 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33109 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33110 dbxout_stabd (N_SLINE, bi->line_number);
33111 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33112 if (flag_pic)
33113 {
33114 if (TARGET_LINK_STACK)
33115 {
33116 char name[32];
33117 get_ppc476_thunk_name (name);
33118 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33119 strcat (tmp_buf, name);
33120 strcat (tmp_buf, "\n");
33121 strcat (tmp_buf, label);
33122 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33123 }
33124 else
33125 {
33126 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33127 strcat (tmp_buf, label);
33128 strcat (tmp_buf, "_pic\n");
33129 strcat (tmp_buf, label);
33130 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33131 }
33132
33133 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33134 strcat (tmp_buf, name_buf);
33135 strcat (tmp_buf, " - ");
33136 strcat (tmp_buf, label);
33137 strcat (tmp_buf, "_pic)\n");
33138
33139 strcat (tmp_buf, "\tmtlr r0\n");
33140
33141 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33142 strcat (tmp_buf, name_buf);
33143 strcat (tmp_buf, " - ");
33144 strcat (tmp_buf, label);
33145 strcat (tmp_buf, "_pic)\n");
33146
33147 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33148 }
33149 else
33150 {
33151 strcat (tmp_buf, ":\nlis r12,hi16(");
33152 strcat (tmp_buf, name_buf);
33153 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33154 strcat (tmp_buf, name_buf);
33155 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33156 }
33157 output_asm_insn (tmp_buf, 0);
33158 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33159 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33160 dbxout_stabd (N_SLINE, bi->line_number);
33161 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33162 branch_islands->pop ();
33163 }
33164 }
33165
33166 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33167 already there or not. */
33168
33169 static int
33170 no_previous_def (tree function_name)
33171 {
33172 branch_island *bi;
33173 unsigned ix;
33174
33175 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33176 if (function_name == bi->function_name)
33177 return 0;
33178 return 1;
33179 }
33180
33181 /* GET_PREV_LABEL gets the label name from the previous definition of
33182 the function. */
33183
33184 static tree
33185 get_prev_label (tree function_name)
33186 {
33187 branch_island *bi;
33188 unsigned ix;
33189
33190 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33191 if (function_name == bi->function_name)
33192 return bi->label_name;
33193 return NULL_TREE;
33194 }
33195
33196 /* INSN is either a function call or a millicode call. It may have an
33197 unconditional jump in its delay slot.
33198
33199 CALL_DEST is the routine we are calling. */
33200
33201 char *
33202 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33203 int cookie_operand_number)
33204 {
33205 static char buf[256];
33206 if (darwin_emit_branch_islands
33207 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33208 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33209 {
33210 tree labelname;
33211 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33212
33213 if (no_previous_def (funname))
33214 {
33215 rtx label_rtx = gen_label_rtx ();
33216 char *label_buf, temp_buf[256];
33217 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33218 CODE_LABEL_NUMBER (label_rtx));
33219 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33220 labelname = get_identifier (label_buf);
33221 add_compiler_branch_island (labelname, funname, insn_line (insn));
33222 }
33223 else
33224 labelname = get_prev_label (funname);
33225
33226 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33227 instruction will reach 'foo', otherwise link as 'bl L42'".
33228 "L42" should be a 'branch island', that will do a far jump to
33229 'foo'. Branch islands are generated in
33230 macho_branch_islands(). */
33231 sprintf (buf, "jbsr %%z%d,%.246s",
33232 dest_operand_number, IDENTIFIER_POINTER (labelname));
33233 }
33234 else
33235 sprintf (buf, "bl %%z%d", dest_operand_number);
33236 return buf;
33237 }
33238
33239 /* Generate PIC and indirect symbol stubs. */
33240
33241 void
33242 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33243 {
33244 unsigned int length;
33245 char *symbol_name, *lazy_ptr_name;
33246 char *local_label_0;
33247 static int label = 0;
33248
33249 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33250 symb = (*targetm.strip_name_encoding) (symb);
33251
33252
33253 length = strlen (symb);
33254 symbol_name = XALLOCAVEC (char, length + 32);
33255 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33256
33257 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33258 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33259
33260 if (flag_pic == 2)
33261 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33262 else
33263 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33264
33265 if (flag_pic == 2)
33266 {
33267 fprintf (file, "\t.align 5\n");
33268
33269 fprintf (file, "%s:\n", stub);
33270 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33271
33272 label++;
33273 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33274 sprintf (local_label_0, "\"L%011d$spb\"", label);
33275
33276 fprintf (file, "\tmflr r0\n");
33277 if (TARGET_LINK_STACK)
33278 {
33279 char name[32];
33280 get_ppc476_thunk_name (name);
33281 fprintf (file, "\tbl %s\n", name);
33282 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33283 }
33284 else
33285 {
33286 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33287 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33288 }
33289 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33290 lazy_ptr_name, local_label_0);
33291 fprintf (file, "\tmtlr r0\n");
33292 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33293 (TARGET_64BIT ? "ldu" : "lwzu"),
33294 lazy_ptr_name, local_label_0);
33295 fprintf (file, "\tmtctr r12\n");
33296 fprintf (file, "\tbctr\n");
33297 }
33298 else
33299 {
33300 fprintf (file, "\t.align 4\n");
33301
33302 fprintf (file, "%s:\n", stub);
33303 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33304
33305 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33306 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33307 (TARGET_64BIT ? "ldu" : "lwzu"),
33308 lazy_ptr_name);
33309 fprintf (file, "\tmtctr r12\n");
33310 fprintf (file, "\tbctr\n");
33311 }
33312
33313 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33314 fprintf (file, "%s:\n", lazy_ptr_name);
33315 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33316 fprintf (file, "%sdyld_stub_binding_helper\n",
33317 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33318 }
33319
33320 /* Legitimize PIC addresses. If the address is already
33321 position-independent, we return ORIG. Newly generated
33322 position-independent addresses go into a reg. This is REG if non
33323 zero, otherwise we allocate register(s) as necessary. */
33324
33325 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33326
33327 rtx
33328 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33329 rtx reg)
33330 {
33331 rtx base, offset;
33332
33333 if (reg == NULL && ! reload_in_progress && ! reload_completed)
33334 reg = gen_reg_rtx (Pmode);
33335
33336 if (GET_CODE (orig) == CONST)
33337 {
33338 rtx reg_temp;
33339
33340 if (GET_CODE (XEXP (orig, 0)) == PLUS
33341 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33342 return orig;
33343
33344 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33345
33346 /* Use a different reg for the intermediate value, as
33347 it will be marked UNCHANGING. */
33348 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33349 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33350 Pmode, reg_temp);
33351 offset =
33352 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33353 Pmode, reg);
33354
33355 if (GET_CODE (offset) == CONST_INT)
33356 {
33357 if (SMALL_INT (offset))
33358 return plus_constant (Pmode, base, INTVAL (offset));
33359 else if (! reload_in_progress && ! reload_completed)
33360 offset = force_reg (Pmode, offset);
33361 else
33362 {
33363 rtx mem = force_const_mem (Pmode, orig);
33364 return machopic_legitimize_pic_address (mem, Pmode, reg);
33365 }
33366 }
33367 return gen_rtx_PLUS (Pmode, base, offset);
33368 }
33369
33370 /* Fall back on generic machopic code. */
33371 return machopic_legitimize_pic_address (orig, mode, reg);
33372 }
33373
33374 /* Output a .machine directive for the Darwin assembler, and call
33375 the generic start_file routine. */
33376
33377 static void
33378 rs6000_darwin_file_start (void)
33379 {
33380 static const struct
33381 {
33382 const char *arg;
33383 const char *name;
33384 HOST_WIDE_INT if_set;
33385 } mapping[] = {
33386 { "ppc64", "ppc64", MASK_64BIT },
33387 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33388 { "power4", "ppc970", 0 },
33389 { "G5", "ppc970", 0 },
33390 { "7450", "ppc7450", 0 },
33391 { "7400", "ppc7400", MASK_ALTIVEC },
33392 { "G4", "ppc7400", 0 },
33393 { "750", "ppc750", 0 },
33394 { "740", "ppc750", 0 },
33395 { "G3", "ppc750", 0 },
33396 { "604e", "ppc604e", 0 },
33397 { "604", "ppc604", 0 },
33398 { "603e", "ppc603", 0 },
33399 { "603", "ppc603", 0 },
33400 { "601", "ppc601", 0 },
33401 { NULL, "ppc", 0 } };
33402 const char *cpu_id = "";
33403 size_t i;
33404
33405 rs6000_file_start ();
33406 darwin_file_start ();
33407
33408 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33409
33410 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33411 cpu_id = rs6000_default_cpu;
33412
33413 if (global_options_set.x_rs6000_cpu_index)
33414 cpu_id = processor_target_table[rs6000_cpu_index].name;
33415
33416 /* Look through the mapping array. Pick the first name that either
33417 matches the argument, has a bit set in IF_SET that is also set
33418 in the target flags, or has a NULL name. */
33419
33420 i = 0;
33421 while (mapping[i].arg != NULL
33422 && strcmp (mapping[i].arg, cpu_id) != 0
33423 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33424 i++;
33425
33426 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33427 }
33428
33429 #endif /* TARGET_MACHO */
33430
33431 #if TARGET_ELF
33432 static int
33433 rs6000_elf_reloc_rw_mask (void)
33434 {
33435 if (flag_pic)
33436 return 3;
33437 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33438 return 2;
33439 else
33440 return 0;
33441 }
33442
33443 /* Record an element in the table of global constructors. SYMBOL is
33444 a SYMBOL_REF of the function to be called; PRIORITY is a number
33445 between 0 and MAX_INIT_PRIORITY.
33446
33447 This differs from default_named_section_asm_out_constructor in
33448 that we have special handling for -mrelocatable. */
33449
33450 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33451 static void
33452 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33453 {
33454 const char *section = ".ctors";
33455 char buf[18];
33456
33457 if (priority != DEFAULT_INIT_PRIORITY)
33458 {
33459 sprintf (buf, ".ctors.%.5u",
33460 /* Invert the numbering so the linker puts us in the proper
33461 order; constructors are run from right to left, and the
33462 linker sorts in increasing order. */
33463 MAX_INIT_PRIORITY - priority);
33464 section = buf;
33465 }
33466
33467 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33468 assemble_align (POINTER_SIZE);
33469
33470 if (DEFAULT_ABI == ABI_V4
33471 && (TARGET_RELOCATABLE || flag_pic > 1))
33472 {
33473 fputs ("\t.long (", asm_out_file);
33474 output_addr_const (asm_out_file, symbol);
33475 fputs (")@fixup\n", asm_out_file);
33476 }
33477 else
33478 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33479 }
33480
33481 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33482 static void
33483 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33484 {
33485 const char *section = ".dtors";
33486 char buf[18];
33487
33488 if (priority != DEFAULT_INIT_PRIORITY)
33489 {
33490 sprintf (buf, ".dtors.%.5u",
33491 /* Invert the numbering so the linker puts us in the proper
33492 order; constructors are run from right to left, and the
33493 linker sorts in increasing order. */
33494 MAX_INIT_PRIORITY - priority);
33495 section = buf;
33496 }
33497
33498 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33499 assemble_align (POINTER_SIZE);
33500
33501 if (DEFAULT_ABI == ABI_V4
33502 && (TARGET_RELOCATABLE || flag_pic > 1))
33503 {
33504 fputs ("\t.long (", asm_out_file);
33505 output_addr_const (asm_out_file, symbol);
33506 fputs (")@fixup\n", asm_out_file);
33507 }
33508 else
33509 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33510 }
33511
33512 void
33513 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33514 {
33515 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33516 {
33517 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33518 ASM_OUTPUT_LABEL (file, name);
33519 fputs (DOUBLE_INT_ASM_OP, file);
33520 rs6000_output_function_entry (file, name);
33521 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33522 if (DOT_SYMBOLS)
33523 {
33524 fputs ("\t.size\t", file);
33525 assemble_name (file, name);
33526 fputs (",24\n\t.type\t.", file);
33527 assemble_name (file, name);
33528 fputs (",@function\n", file);
33529 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33530 {
33531 fputs ("\t.globl\t.", file);
33532 assemble_name (file, name);
33533 putc ('\n', file);
33534 }
33535 }
33536 else
33537 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33538 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33539 rs6000_output_function_entry (file, name);
33540 fputs (":\n", file);
33541 return;
33542 }
33543
33544 if (DEFAULT_ABI == ABI_V4
33545 && (TARGET_RELOCATABLE || flag_pic > 1)
33546 && !TARGET_SECURE_PLT
33547 && (!constant_pool_empty_p () || crtl->profile)
33548 && uses_TOC ())
33549 {
33550 char buf[256];
33551
33552 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33553
33554 fprintf (file, "\t.long ");
33555 assemble_name (file, toc_label_name);
33556 need_toc_init = 1;
33557 putc ('-', file);
33558 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33559 assemble_name (file, buf);
33560 putc ('\n', file);
33561 }
33562
33563 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33564 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33565
33566 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33567 {
33568 char buf[256];
33569
33570 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33571
33572 fprintf (file, "\t.quad .TOC.-");
33573 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33574 assemble_name (file, buf);
33575 putc ('\n', file);
33576 }
33577
33578 if (DEFAULT_ABI == ABI_AIX)
33579 {
33580 const char *desc_name, *orig_name;
33581
33582 orig_name = (*targetm.strip_name_encoding) (name);
33583 desc_name = orig_name;
33584 while (*desc_name == '.')
33585 desc_name++;
33586
33587 if (TREE_PUBLIC (decl))
33588 fprintf (file, "\t.globl %s\n", desc_name);
33589
33590 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33591 fprintf (file, "%s:\n", desc_name);
33592 fprintf (file, "\t.long %s\n", orig_name);
33593 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33594 fputs ("\t.long 0\n", file);
33595 fprintf (file, "\t.previous\n");
33596 }
33597 ASM_OUTPUT_LABEL (file, name);
33598 }
33599
33600 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33601 static void
33602 rs6000_elf_file_end (void)
33603 {
33604 #ifdef HAVE_AS_GNU_ATTRIBUTE
33605 /* ??? The value emitted depends on options active at file end.
33606 Assume anyone using #pragma or attributes that might change
33607 options knows what they are doing. */
33608 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33609 && rs6000_passes_float)
33610 {
33611 int fp;
33612
33613 if (TARGET_DF_FPR)
33614 fp = 1;
33615 else if (TARGET_SF_FPR)
33616 fp = 3;
33617 else
33618 fp = 2;
33619 if (rs6000_passes_long_double)
33620 {
33621 if (!TARGET_LONG_DOUBLE_128)
33622 fp |= 2 * 4;
33623 else if (TARGET_IEEEQUAD)
33624 fp |= 3 * 4;
33625 else
33626 fp |= 1 * 4;
33627 }
33628 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33629 }
33630 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33631 {
33632 if (rs6000_passes_vector)
33633 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33634 (TARGET_ALTIVEC_ABI ? 2 : 1));
33635 if (rs6000_returns_struct)
33636 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33637 aix_struct_return ? 2 : 1);
33638 }
33639 #endif
33640 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33641 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33642 file_end_indicate_exec_stack ();
33643 #endif
33644
33645 if (flag_split_stack)
33646 file_end_indicate_split_stack ();
33647
33648 if (cpu_builtin_p)
33649 {
33650 /* We have expanded a CPU builtin, so we need to emit a reference to
33651 the special symbol that LIBC uses to declare it supports the
33652 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33653 switch_to_section (data_section);
33654 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33655 fprintf (asm_out_file, "\t%s %s\n",
33656 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33657 }
33658 }
33659 #endif
33660
33661 #if TARGET_XCOFF
33662
33663 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33664 #define HAVE_XCOFF_DWARF_EXTRAS 0
33665 #endif
33666
33667 static enum unwind_info_type
33668 rs6000_xcoff_debug_unwind_info (void)
33669 {
33670 return UI_NONE;
33671 }
33672
33673 static void
33674 rs6000_xcoff_asm_output_anchor (rtx symbol)
33675 {
33676 char buffer[100];
33677
33678 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33679 SYMBOL_REF_BLOCK_OFFSET (symbol));
33680 fprintf (asm_out_file, "%s", SET_ASM_OP);
33681 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33682 fprintf (asm_out_file, ",");
33683 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33684 fprintf (asm_out_file, "\n");
33685 }
33686
33687 static void
33688 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33689 {
33690 fputs (GLOBAL_ASM_OP, stream);
33691 RS6000_OUTPUT_BASENAME (stream, name);
33692 putc ('\n', stream);
33693 }
33694
33695 /* A get_unnamed_decl callback, used for read-only sections. PTR
33696 points to the section string variable. */
33697
33698 static void
33699 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33700 {
33701 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33702 *(const char *const *) directive,
33703 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33704 }
33705
33706 /* Likewise for read-write sections. */
33707
33708 static void
33709 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33710 {
33711 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33712 *(const char *const *) directive,
33713 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33714 }
33715
33716 static void
33717 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33718 {
33719 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33720 *(const char *const *) directive,
33721 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33722 }
33723
33724 /* A get_unnamed_section callback, used for switching to toc_section. */
33725
33726 static void
33727 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33728 {
33729 if (TARGET_MINIMAL_TOC)
33730 {
33731 /* toc_section is always selected at least once from
33732 rs6000_xcoff_file_start, so this is guaranteed to
33733 always be defined once and only once in each file. */
33734 if (!toc_initialized)
33735 {
33736 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33737 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33738 toc_initialized = 1;
33739 }
33740 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33741 (TARGET_32BIT ? "" : ",3"));
33742 }
33743 else
33744 fputs ("\t.toc\n", asm_out_file);
33745 }
33746
33747 /* Implement TARGET_ASM_INIT_SECTIONS. */
33748
33749 static void
33750 rs6000_xcoff_asm_init_sections (void)
33751 {
33752 read_only_data_section
33753 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33754 &xcoff_read_only_section_name);
33755
33756 private_data_section
33757 = get_unnamed_section (SECTION_WRITE,
33758 rs6000_xcoff_output_readwrite_section_asm_op,
33759 &xcoff_private_data_section_name);
33760
33761 tls_data_section
33762 = get_unnamed_section (SECTION_TLS,
33763 rs6000_xcoff_output_tls_section_asm_op,
33764 &xcoff_tls_data_section_name);
33765
33766 tls_private_data_section
33767 = get_unnamed_section (SECTION_TLS,
33768 rs6000_xcoff_output_tls_section_asm_op,
33769 &xcoff_private_data_section_name);
33770
33771 read_only_private_data_section
33772 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33773 &xcoff_private_data_section_name);
33774
33775 toc_section
33776 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33777
33778 readonly_data_section = read_only_data_section;
33779 }
33780
33781 static int
33782 rs6000_xcoff_reloc_rw_mask (void)
33783 {
33784 return 3;
33785 }
33786
33787 static void
33788 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33789 tree decl ATTRIBUTE_UNUSED)
33790 {
33791 int smclass;
33792 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33793
33794 if (flags & SECTION_EXCLUDE)
33795 smclass = 4;
33796 else if (flags & SECTION_DEBUG)
33797 {
33798 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33799 return;
33800 }
33801 else if (flags & SECTION_CODE)
33802 smclass = 0;
33803 else if (flags & SECTION_TLS)
33804 smclass = 3;
33805 else if (flags & SECTION_WRITE)
33806 smclass = 2;
33807 else
33808 smclass = 1;
33809
33810 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33811 (flags & SECTION_CODE) ? "." : "",
33812 name, suffix[smclass], flags & SECTION_ENTSIZE);
33813 }
33814
33815 #define IN_NAMED_SECTION(DECL) \
33816 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33817 && DECL_SECTION_NAME (DECL) != NULL)
33818
33819 static section *
33820 rs6000_xcoff_select_section (tree decl, int reloc,
33821 unsigned HOST_WIDE_INT align)
33822 {
33823 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33824 named section. */
33825 if (align > BIGGEST_ALIGNMENT)
33826 {
33827 resolve_unique_section (decl, reloc, true);
33828 if (IN_NAMED_SECTION (decl))
33829 return get_named_section (decl, NULL, reloc);
33830 }
33831
33832 if (decl_readonly_section (decl, reloc))
33833 {
33834 if (TREE_PUBLIC (decl))
33835 return read_only_data_section;
33836 else
33837 return read_only_private_data_section;
33838 }
33839 else
33840 {
33841 #if HAVE_AS_TLS
33842 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33843 {
33844 if (TREE_PUBLIC (decl))
33845 return tls_data_section;
33846 else if (bss_initializer_p (decl))
33847 {
33848 /* Convert to COMMON to emit in BSS. */
33849 DECL_COMMON (decl) = 1;
33850 return tls_comm_section;
33851 }
33852 else
33853 return tls_private_data_section;
33854 }
33855 else
33856 #endif
33857 if (TREE_PUBLIC (decl))
33858 return data_section;
33859 else
33860 return private_data_section;
33861 }
33862 }
33863
33864 static void
33865 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33866 {
33867 const char *name;
33868
33869 /* Use select_section for private data and uninitialized data with
33870 alignment <= BIGGEST_ALIGNMENT. */
33871 if (!TREE_PUBLIC (decl)
33872 || DECL_COMMON (decl)
33873 || (DECL_INITIAL (decl) == NULL_TREE
33874 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33875 || DECL_INITIAL (decl) == error_mark_node
33876 || (flag_zero_initialized_in_bss
33877 && initializer_zerop (DECL_INITIAL (decl))))
33878 return;
33879
33880 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33881 name = (*targetm.strip_name_encoding) (name);
33882 set_decl_section_name (decl, name);
33883 }
33884
33885 /* Select section for constant in constant pool.
33886
33887 On RS/6000, all constants are in the private read-only data area.
33888 However, if this is being placed in the TOC it must be output as a
33889 toc entry. */
33890
33891 static section *
33892 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33893 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33894 {
33895 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33896 return toc_section;
33897 else
33898 return read_only_private_data_section;
33899 }
33900
33901 /* Remove any trailing [DS] or the like from the symbol name. */
33902
33903 static const char *
33904 rs6000_xcoff_strip_name_encoding (const char *name)
33905 {
33906 size_t len;
33907 if (*name == '*')
33908 name++;
33909 len = strlen (name);
33910 if (name[len - 1] == ']')
33911 return ggc_alloc_string (name, len - 4);
33912 else
33913 return name;
33914 }
33915
33916 /* Section attributes. AIX is always PIC. */
33917
33918 static unsigned int
33919 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33920 {
33921 unsigned int align;
33922 unsigned int flags = default_section_type_flags (decl, name, reloc);
33923
33924 /* Align to at least UNIT size. */
33925 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33926 align = MIN_UNITS_PER_WORD;
33927 else
33928 /* Increase alignment of large objects if not already stricter. */
33929 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33930 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33931 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33932
33933 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33934 }
33935
33936 /* Output at beginning of assembler file.
33937
33938 Initialize the section names for the RS/6000 at this point.
33939
33940 Specify filename, including full path, to assembler.
33941
33942 We want to go into the TOC section so at least one .toc will be emitted.
33943 Also, in order to output proper .bs/.es pairs, we need at least one static
33944 [RW] section emitted.
33945
33946 Finally, declare mcount when profiling to make the assembler happy. */
33947
33948 static void
33949 rs6000_xcoff_file_start (void)
33950 {
33951 rs6000_gen_section_name (&xcoff_bss_section_name,
33952 main_input_filename, ".bss_");
33953 rs6000_gen_section_name (&xcoff_private_data_section_name,
33954 main_input_filename, ".rw_");
33955 rs6000_gen_section_name (&xcoff_read_only_section_name,
33956 main_input_filename, ".ro_");
33957 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33958 main_input_filename, ".tls_");
33959 rs6000_gen_section_name (&xcoff_tbss_section_name,
33960 main_input_filename, ".tbss_[UL]");
33961
33962 fputs ("\t.file\t", asm_out_file);
33963 output_quoted_string (asm_out_file, main_input_filename);
33964 fputc ('\n', asm_out_file);
33965 if (write_symbols != NO_DEBUG)
33966 switch_to_section (private_data_section);
33967 switch_to_section (toc_section);
33968 switch_to_section (text_section);
33969 if (profile_flag)
33970 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33971 rs6000_file_start ();
33972 }
33973
33974 /* Output at end of assembler file.
33975 On the RS/6000, referencing data should automatically pull in text. */
33976
33977 static void
33978 rs6000_xcoff_file_end (void)
33979 {
33980 switch_to_section (text_section);
33981 fputs ("_section_.text:\n", asm_out_file);
33982 switch_to_section (data_section);
33983 fputs (TARGET_32BIT
33984 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33985 asm_out_file);
33986 }
33987
33988 struct declare_alias_data
33989 {
33990 FILE *file;
33991 bool function_descriptor;
33992 };
33993
33994 /* Declare alias N. A helper function for for_node_and_aliases. */
33995
33996 static bool
33997 rs6000_declare_alias (struct symtab_node *n, void *d)
33998 {
33999 struct declare_alias_data *data = (struct declare_alias_data *)d;
34000 /* Main symbol is output specially, because varasm machinery does part of
34001 the job for us - we do not need to declare .globl/lglobs and such. */
34002 if (!n->alias || n->weakref)
34003 return false;
34004
34005 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34006 return false;
34007
34008 /* Prevent assemble_alias from trying to use .set pseudo operation
34009 that does not behave as expected by the middle-end. */
34010 TREE_ASM_WRITTEN (n->decl) = true;
34011
34012 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34013 char *buffer = (char *) alloca (strlen (name) + 2);
34014 char *p;
34015 int dollar_inside = 0;
34016
34017 strcpy (buffer, name);
34018 p = strchr (buffer, '$');
34019 while (p) {
34020 *p = '_';
34021 dollar_inside++;
34022 p = strchr (p + 1, '$');
34023 }
34024 if (TREE_PUBLIC (n->decl))
34025 {
34026 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34027 {
34028 if (dollar_inside) {
34029 if (data->function_descriptor)
34030 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34031 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34032 }
34033 if (data->function_descriptor)
34034 {
34035 fputs ("\t.globl .", data->file);
34036 RS6000_OUTPUT_BASENAME (data->file, buffer);
34037 putc ('\n', data->file);
34038 }
34039 fputs ("\t.globl ", data->file);
34040 RS6000_OUTPUT_BASENAME (data->file, buffer);
34041 putc ('\n', data->file);
34042 }
34043 #ifdef ASM_WEAKEN_DECL
34044 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34045 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34046 #endif
34047 }
34048 else
34049 {
34050 if (dollar_inside)
34051 {
34052 if (data->function_descriptor)
34053 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34054 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34055 }
34056 if (data->function_descriptor)
34057 {
34058 fputs ("\t.lglobl .", data->file);
34059 RS6000_OUTPUT_BASENAME (data->file, buffer);
34060 putc ('\n', data->file);
34061 }
34062 fputs ("\t.lglobl ", data->file);
34063 RS6000_OUTPUT_BASENAME (data->file, buffer);
34064 putc ('\n', data->file);
34065 }
34066 if (data->function_descriptor)
34067 fputs (".", data->file);
34068 RS6000_OUTPUT_BASENAME (data->file, buffer);
34069 fputs (":\n", data->file);
34070 return false;
34071 }
34072
34073
34074 #ifdef HAVE_GAS_HIDDEN
34075 /* Helper function to calculate visibility of a DECL
34076 and return the value as a const string. */
34077
34078 static const char *
34079 rs6000_xcoff_visibility (tree decl)
34080 {
34081 static const char * const visibility_types[] = {
34082 "", ",protected", ",hidden", ",internal"
34083 };
34084
34085 enum symbol_visibility vis = DECL_VISIBILITY (decl);
34086
34087 if (TREE_CODE (decl) == FUNCTION_DECL
34088 && cgraph_node::get (decl)
34089 && cgraph_node::get (decl)->instrumentation_clone
34090 && cgraph_node::get (decl)->instrumented_version)
34091 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
34092
34093 return visibility_types[vis];
34094 }
34095 #endif
34096
34097
34098 /* This macro produces the initial definition of a function name.
34099 On the RS/6000, we need to place an extra '.' in the function name and
34100 output the function descriptor.
34101 Dollar signs are converted to underscores.
34102
34103 The csect for the function will have already been created when
34104 text_section was selected. We do have to go back to that csect, however.
34105
34106 The third and fourth parameters to the .function pseudo-op (16 and 044)
34107 are placeholders which no longer have any use.
34108
34109 Because AIX assembler's .set command has unexpected semantics, we output
34110 all aliases as alternative labels in front of the definition. */
34111
34112 void
34113 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34114 {
34115 char *buffer = (char *) alloca (strlen (name) + 1);
34116 char *p;
34117 int dollar_inside = 0;
34118 struct declare_alias_data data = {file, false};
34119
34120 strcpy (buffer, name);
34121 p = strchr (buffer, '$');
34122 while (p) {
34123 *p = '_';
34124 dollar_inside++;
34125 p = strchr (p + 1, '$');
34126 }
34127 if (TREE_PUBLIC (decl))
34128 {
34129 if (!RS6000_WEAK || !DECL_WEAK (decl))
34130 {
34131 if (dollar_inside) {
34132 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34133 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34134 }
34135 fputs ("\t.globl .", file);
34136 RS6000_OUTPUT_BASENAME (file, buffer);
34137 #ifdef HAVE_GAS_HIDDEN
34138 fputs (rs6000_xcoff_visibility (decl), file);
34139 #endif
34140 putc ('\n', file);
34141 }
34142 }
34143 else
34144 {
34145 if (dollar_inside) {
34146 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34147 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34148 }
34149 fputs ("\t.lglobl .", file);
34150 RS6000_OUTPUT_BASENAME (file, buffer);
34151 putc ('\n', file);
34152 }
34153 fputs ("\t.csect ", file);
34154 RS6000_OUTPUT_BASENAME (file, buffer);
34155 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34156 RS6000_OUTPUT_BASENAME (file, buffer);
34157 fputs (":\n", file);
34158 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34159 &data, true);
34160 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34161 RS6000_OUTPUT_BASENAME (file, buffer);
34162 fputs (", TOC[tc0], 0\n", file);
34163 in_section = NULL;
34164 switch_to_section (function_section (decl));
34165 putc ('.', file);
34166 RS6000_OUTPUT_BASENAME (file, buffer);
34167 fputs (":\n", file);
34168 data.function_descriptor = true;
34169 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34170 &data, true);
34171 if (!DECL_IGNORED_P (decl))
34172 {
34173 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34174 xcoffout_declare_function (file, decl, buffer);
34175 else if (write_symbols == DWARF2_DEBUG)
34176 {
34177 name = (*targetm.strip_name_encoding) (name);
34178 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34179 }
34180 }
34181 return;
34182 }
34183
34184
34185 /* Output assembly language to globalize a symbol from a DECL,
34186 possibly with visibility. */
34187
34188 void
34189 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
34190 {
34191 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
34192 fputs (GLOBAL_ASM_OP, stream);
34193 RS6000_OUTPUT_BASENAME (stream, name);
34194 #ifdef HAVE_GAS_HIDDEN
34195 fputs (rs6000_xcoff_visibility (decl), stream);
34196 #endif
34197 putc ('\n', stream);
34198 }
34199
34200 /* Output assembly language to define a symbol as COMMON from a DECL,
34201 possibly with visibility. */
34202
34203 void
34204 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34205 tree decl ATTRIBUTE_UNUSED,
34206 const char *name,
34207 unsigned HOST_WIDE_INT size,
34208 unsigned HOST_WIDE_INT align)
34209 {
34210 unsigned HOST_WIDE_INT align2 = 2;
34211
34212 if (align > 32)
34213 align2 = floor_log2 (align / BITS_PER_UNIT);
34214 else if (size > 4)
34215 align2 = 3;
34216
34217 fputs (COMMON_ASM_OP, stream);
34218 RS6000_OUTPUT_BASENAME (stream, name);
34219
34220 fprintf (stream,
34221 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34222 size, align2);
34223
34224 #ifdef HAVE_GAS_HIDDEN
34225 fputs (rs6000_xcoff_visibility (decl), stream);
34226 #endif
34227 putc ('\n', stream);
34228 }
34229
34230 /* This macro produces the initial definition of a object (variable) name.
34231 Because AIX assembler's .set command has unexpected semantics, we output
34232 all aliases as alternative labels in front of the definition. */
34233
34234 void
34235 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34236 {
34237 struct declare_alias_data data = {file, false};
34238 RS6000_OUTPUT_BASENAME (file, name);
34239 fputs (":\n", file);
34240 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34241 &data, true);
34242 }
34243
34244 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34245
34246 void
34247 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34248 {
34249 fputs (integer_asm_op (size, FALSE), file);
34250 assemble_name (file, label);
34251 fputs ("-$", file);
34252 }
34253
34254 /* Output a symbol offset relative to the dbase for the current object.
34255 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34256 signed offsets.
34257
34258 __gcc_unwind_dbase is embedded in all executables/libraries through
34259 libgcc/config/rs6000/crtdbase.S. */
34260
34261 void
34262 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34263 {
34264 fputs (integer_asm_op (size, FALSE), file);
34265 assemble_name (file, label);
34266 fputs("-__gcc_unwind_dbase", file);
34267 }
34268
34269 #ifdef HAVE_AS_TLS
34270 static void
34271 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34272 {
34273 rtx symbol;
34274 int flags;
34275 const char *symname;
34276
34277 default_encode_section_info (decl, rtl, first);
34278
34279 /* Careful not to prod global register variables. */
34280 if (!MEM_P (rtl))
34281 return;
34282 symbol = XEXP (rtl, 0);
34283 if (GET_CODE (symbol) != SYMBOL_REF)
34284 return;
34285
34286 flags = SYMBOL_REF_FLAGS (symbol);
34287
34288 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34289 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34290
34291 SYMBOL_REF_FLAGS (symbol) = flags;
34292
34293 /* Append mapping class to extern decls. */
34294 symname = XSTR (symbol, 0);
34295 if (decl /* sync condition with assemble_external () */
34296 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34297 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34298 || TREE_CODE (decl) == FUNCTION_DECL)
34299 && symname[strlen (symname) - 1] != ']')
34300 {
34301 char *newname = (char *) alloca (strlen (symname) + 5);
34302 strcpy (newname, symname);
34303 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34304 ? "[DS]" : "[UA]"));
34305 XSTR (symbol, 0) = ggc_strdup (newname);
34306 }
34307 }
34308 #endif /* HAVE_AS_TLS */
34309 #endif /* TARGET_XCOFF */
34310
34311 void
34312 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34313 const char *name, const char *val)
34314 {
34315 fputs ("\t.weak\t", stream);
34316 RS6000_OUTPUT_BASENAME (stream, name);
34317 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34318 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34319 {
34320 if (TARGET_XCOFF)
34321 fputs ("[DS]", stream);
34322 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34323 if (TARGET_XCOFF)
34324 fputs (rs6000_xcoff_visibility (decl), stream);
34325 #endif
34326 fputs ("\n\t.weak\t.", stream);
34327 RS6000_OUTPUT_BASENAME (stream, name);
34328 }
34329 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34330 if (TARGET_XCOFF)
34331 fputs (rs6000_xcoff_visibility (decl), stream);
34332 #endif
34333 fputc ('\n', stream);
34334 if (val)
34335 {
34336 #ifdef ASM_OUTPUT_DEF
34337 ASM_OUTPUT_DEF (stream, name, val);
34338 #endif
34339 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34340 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34341 {
34342 fputs ("\t.set\t.", stream);
34343 RS6000_OUTPUT_BASENAME (stream, name);
34344 fputs (",.", stream);
34345 RS6000_OUTPUT_BASENAME (stream, val);
34346 fputc ('\n', stream);
34347 }
34348 }
34349 }
34350
34351
34352 /* Return true if INSN should not be copied. */
34353
34354 static bool
34355 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34356 {
34357 return recog_memoized (insn) >= 0
34358 && get_attr_cannot_copy (insn);
34359 }
34360
34361 /* Compute a (partial) cost for rtx X. Return true if the complete
34362 cost has been computed, and false if subexpressions should be
34363 scanned. In either case, *TOTAL contains the cost result. */
34364
34365 static bool
34366 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34367 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34368 {
34369 int code = GET_CODE (x);
34370
34371 switch (code)
34372 {
34373 /* On the RS/6000, if it is valid in the insn, it is free. */
34374 case CONST_INT:
34375 if (((outer_code == SET
34376 || outer_code == PLUS
34377 || outer_code == MINUS)
34378 && (satisfies_constraint_I (x)
34379 || satisfies_constraint_L (x)))
34380 || (outer_code == AND
34381 && (satisfies_constraint_K (x)
34382 || (mode == SImode
34383 ? satisfies_constraint_L (x)
34384 : satisfies_constraint_J (x))))
34385 || ((outer_code == IOR || outer_code == XOR)
34386 && (satisfies_constraint_K (x)
34387 || (mode == SImode
34388 ? satisfies_constraint_L (x)
34389 : satisfies_constraint_J (x))))
34390 || outer_code == ASHIFT
34391 || outer_code == ASHIFTRT
34392 || outer_code == LSHIFTRT
34393 || outer_code == ROTATE
34394 || outer_code == ROTATERT
34395 || outer_code == ZERO_EXTRACT
34396 || (outer_code == MULT
34397 && satisfies_constraint_I (x))
34398 || ((outer_code == DIV || outer_code == UDIV
34399 || outer_code == MOD || outer_code == UMOD)
34400 && exact_log2 (INTVAL (x)) >= 0)
34401 || (outer_code == COMPARE
34402 && (satisfies_constraint_I (x)
34403 || satisfies_constraint_K (x)))
34404 || ((outer_code == EQ || outer_code == NE)
34405 && (satisfies_constraint_I (x)
34406 || satisfies_constraint_K (x)
34407 || (mode == SImode
34408 ? satisfies_constraint_L (x)
34409 : satisfies_constraint_J (x))))
34410 || (outer_code == GTU
34411 && satisfies_constraint_I (x))
34412 || (outer_code == LTU
34413 && satisfies_constraint_P (x)))
34414 {
34415 *total = 0;
34416 return true;
34417 }
34418 else if ((outer_code == PLUS
34419 && reg_or_add_cint_operand (x, VOIDmode))
34420 || (outer_code == MINUS
34421 && reg_or_sub_cint_operand (x, VOIDmode))
34422 || ((outer_code == SET
34423 || outer_code == IOR
34424 || outer_code == XOR)
34425 && (INTVAL (x)
34426 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34427 {
34428 *total = COSTS_N_INSNS (1);
34429 return true;
34430 }
34431 /* FALLTHRU */
34432
34433 case CONST_DOUBLE:
34434 case CONST_WIDE_INT:
34435 case CONST:
34436 case HIGH:
34437 case SYMBOL_REF:
34438 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34439 return true;
34440
34441 case MEM:
34442 /* When optimizing for size, MEM should be slightly more expensive
34443 than generating address, e.g., (plus (reg) (const)).
34444 L1 cache latency is about two instructions. */
34445 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34446 if (SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (x)))
34447 *total += COSTS_N_INSNS (100);
34448 return true;
34449
34450 case LABEL_REF:
34451 *total = 0;
34452 return true;
34453
34454 case PLUS:
34455 case MINUS:
34456 if (FLOAT_MODE_P (mode))
34457 *total = rs6000_cost->fp;
34458 else
34459 *total = COSTS_N_INSNS (1);
34460 return false;
34461
34462 case MULT:
34463 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34464 && satisfies_constraint_I (XEXP (x, 1)))
34465 {
34466 if (INTVAL (XEXP (x, 1)) >= -256
34467 && INTVAL (XEXP (x, 1)) <= 255)
34468 *total = rs6000_cost->mulsi_const9;
34469 else
34470 *total = rs6000_cost->mulsi_const;
34471 }
34472 else if (mode == SFmode)
34473 *total = rs6000_cost->fp;
34474 else if (FLOAT_MODE_P (mode))
34475 *total = rs6000_cost->dmul;
34476 else if (mode == DImode)
34477 *total = rs6000_cost->muldi;
34478 else
34479 *total = rs6000_cost->mulsi;
34480 return false;
34481
34482 case FMA:
34483 if (mode == SFmode)
34484 *total = rs6000_cost->fp;
34485 else
34486 *total = rs6000_cost->dmul;
34487 break;
34488
34489 case DIV:
34490 case MOD:
34491 if (FLOAT_MODE_P (mode))
34492 {
34493 *total = mode == DFmode ? rs6000_cost->ddiv
34494 : rs6000_cost->sdiv;
34495 return false;
34496 }
34497 /* FALLTHRU */
34498
34499 case UDIV:
34500 case UMOD:
34501 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34502 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34503 {
34504 if (code == DIV || code == MOD)
34505 /* Shift, addze */
34506 *total = COSTS_N_INSNS (2);
34507 else
34508 /* Shift */
34509 *total = COSTS_N_INSNS (1);
34510 }
34511 else
34512 {
34513 if (GET_MODE (XEXP (x, 1)) == DImode)
34514 *total = rs6000_cost->divdi;
34515 else
34516 *total = rs6000_cost->divsi;
34517 }
34518 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34519 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34520 *total += COSTS_N_INSNS (2);
34521 return false;
34522
34523 case CTZ:
34524 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34525 return false;
34526
34527 case FFS:
34528 *total = COSTS_N_INSNS (4);
34529 return false;
34530
34531 case POPCOUNT:
34532 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34533 return false;
34534
34535 case PARITY:
34536 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34537 return false;
34538
34539 case NOT:
34540 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34541 *total = 0;
34542 else
34543 *total = COSTS_N_INSNS (1);
34544 return false;
34545
34546 case AND:
34547 if (CONST_INT_P (XEXP (x, 1)))
34548 {
34549 rtx left = XEXP (x, 0);
34550 rtx_code left_code = GET_CODE (left);
34551
34552 /* rotate-and-mask: 1 insn. */
34553 if ((left_code == ROTATE
34554 || left_code == ASHIFT
34555 || left_code == LSHIFTRT)
34556 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34557 {
34558 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34559 if (!CONST_INT_P (XEXP (left, 1)))
34560 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34561 *total += COSTS_N_INSNS (1);
34562 return true;
34563 }
34564
34565 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34566 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34567 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34568 || (val & 0xffff) == val
34569 || (val & 0xffff0000) == val
34570 || ((val & 0xffff) == 0 && mode == SImode))
34571 {
34572 *total = rtx_cost (left, mode, AND, 0, speed);
34573 *total += COSTS_N_INSNS (1);
34574 return true;
34575 }
34576
34577 /* 2 insns. */
34578 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34579 {
34580 *total = rtx_cost (left, mode, AND, 0, speed);
34581 *total += COSTS_N_INSNS (2);
34582 return true;
34583 }
34584 }
34585
34586 *total = COSTS_N_INSNS (1);
34587 return false;
34588
34589 case IOR:
34590 /* FIXME */
34591 *total = COSTS_N_INSNS (1);
34592 return true;
34593
34594 case CLZ:
34595 case XOR:
34596 case ZERO_EXTRACT:
34597 *total = COSTS_N_INSNS (1);
34598 return false;
34599
34600 case ASHIFT:
34601 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34602 the sign extend and shift separately within the insn. */
34603 if (TARGET_EXTSWSLI && mode == DImode
34604 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34605 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34606 {
34607 *total = 0;
34608 return false;
34609 }
34610 /* fall through */
34611
34612 case ASHIFTRT:
34613 case LSHIFTRT:
34614 case ROTATE:
34615 case ROTATERT:
34616 /* Handle mul_highpart. */
34617 if (outer_code == TRUNCATE
34618 && GET_CODE (XEXP (x, 0)) == MULT)
34619 {
34620 if (mode == DImode)
34621 *total = rs6000_cost->muldi;
34622 else
34623 *total = rs6000_cost->mulsi;
34624 return true;
34625 }
34626 else if (outer_code == AND)
34627 *total = 0;
34628 else
34629 *total = COSTS_N_INSNS (1);
34630 return false;
34631
34632 case SIGN_EXTEND:
34633 case ZERO_EXTEND:
34634 if (GET_CODE (XEXP (x, 0)) == MEM)
34635 *total = 0;
34636 else
34637 *total = COSTS_N_INSNS (1);
34638 return false;
34639
34640 case COMPARE:
34641 case NEG:
34642 case ABS:
34643 if (!FLOAT_MODE_P (mode))
34644 {
34645 *total = COSTS_N_INSNS (1);
34646 return false;
34647 }
34648 /* FALLTHRU */
34649
34650 case FLOAT:
34651 case UNSIGNED_FLOAT:
34652 case FIX:
34653 case UNSIGNED_FIX:
34654 case FLOAT_TRUNCATE:
34655 *total = rs6000_cost->fp;
34656 return false;
34657
34658 case FLOAT_EXTEND:
34659 if (mode == DFmode)
34660 *total = rs6000_cost->sfdf_convert;
34661 else
34662 *total = rs6000_cost->fp;
34663 return false;
34664
34665 case UNSPEC:
34666 switch (XINT (x, 1))
34667 {
34668 case UNSPEC_FRSP:
34669 *total = rs6000_cost->fp;
34670 return true;
34671
34672 default:
34673 break;
34674 }
34675 break;
34676
34677 case CALL:
34678 case IF_THEN_ELSE:
34679 if (!speed)
34680 {
34681 *total = COSTS_N_INSNS (1);
34682 return true;
34683 }
34684 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34685 {
34686 *total = rs6000_cost->fp;
34687 return false;
34688 }
34689 break;
34690
34691 case NE:
34692 case EQ:
34693 case GTU:
34694 case LTU:
34695 /* Carry bit requires mode == Pmode.
34696 NEG or PLUS already counted so only add one. */
34697 if (mode == Pmode
34698 && (outer_code == NEG || outer_code == PLUS))
34699 {
34700 *total = COSTS_N_INSNS (1);
34701 return true;
34702 }
34703 if (outer_code == SET)
34704 {
34705 if (XEXP (x, 1) == const0_rtx)
34706 {
34707 if (TARGET_ISEL && !TARGET_MFCRF)
34708 *total = COSTS_N_INSNS (8);
34709 else
34710 *total = COSTS_N_INSNS (2);
34711 return true;
34712 }
34713 else
34714 {
34715 *total = COSTS_N_INSNS (3);
34716 return false;
34717 }
34718 }
34719 /* FALLTHRU */
34720
34721 case GT:
34722 case LT:
34723 case UNORDERED:
34724 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
34725 {
34726 if (TARGET_ISEL && !TARGET_MFCRF)
34727 *total = COSTS_N_INSNS (8);
34728 else
34729 *total = COSTS_N_INSNS (2);
34730 return true;
34731 }
34732 /* CC COMPARE. */
34733 if (outer_code == COMPARE)
34734 {
34735 *total = 0;
34736 return true;
34737 }
34738 break;
34739
34740 default:
34741 break;
34742 }
34743
34744 return false;
34745 }
34746
34747 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34748
34749 static bool
34750 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34751 int opno, int *total, bool speed)
34752 {
34753 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34754
34755 fprintf (stderr,
34756 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34757 "opno = %d, total = %d, speed = %s, x:\n",
34758 ret ? "complete" : "scan inner",
34759 GET_MODE_NAME (mode),
34760 GET_RTX_NAME (outer_code),
34761 opno,
34762 *total,
34763 speed ? "true" : "false");
34764
34765 debug_rtx (x);
34766
34767 return ret;
34768 }
34769
34770 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34771
34772 static int
34773 rs6000_debug_address_cost (rtx x, machine_mode mode,
34774 addr_space_t as, bool speed)
34775 {
34776 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34777
34778 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34779 ret, speed ? "true" : "false");
34780 debug_rtx (x);
34781
34782 return ret;
34783 }
34784
34785
34786 /* A C expression returning the cost of moving data from a register of class
34787 CLASS1 to one of CLASS2. */
34788
34789 static int
34790 rs6000_register_move_cost (machine_mode mode,
34791 reg_class_t from, reg_class_t to)
34792 {
34793 int ret;
34794
34795 if (TARGET_DEBUG_COST)
34796 dbg_cost_ctrl++;
34797
34798 /* Moves from/to GENERAL_REGS. */
34799 if (reg_classes_intersect_p (to, GENERAL_REGS)
34800 || reg_classes_intersect_p (from, GENERAL_REGS))
34801 {
34802 reg_class_t rclass = from;
34803
34804 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34805 rclass = to;
34806
34807 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34808 ret = (rs6000_memory_move_cost (mode, rclass, false)
34809 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34810
34811 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34812 shift. */
34813 else if (rclass == CR_REGS)
34814 ret = 4;
34815
34816 /* For those processors that have slow LR/CTR moves, make them more
34817 expensive than memory in order to bias spills to memory .*/
34818 else if ((rs6000_cpu == PROCESSOR_POWER6
34819 || rs6000_cpu == PROCESSOR_POWER7
34820 || rs6000_cpu == PROCESSOR_POWER8
34821 || rs6000_cpu == PROCESSOR_POWER9)
34822 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34823 ret = 6 * hard_regno_nregs[0][mode];
34824
34825 else
34826 /* A move will cost one instruction per GPR moved. */
34827 ret = 2 * hard_regno_nregs[0][mode];
34828 }
34829
34830 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34831 else if (VECTOR_MEM_VSX_P (mode)
34832 && reg_classes_intersect_p (to, VSX_REGS)
34833 && reg_classes_intersect_p (from, VSX_REGS))
34834 ret = 2 * hard_regno_nregs[FIRST_FPR_REGNO][mode];
34835
34836 /* Moving between two similar registers is just one instruction. */
34837 else if (reg_classes_intersect_p (to, from))
34838 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34839
34840 /* Everything else has to go through GENERAL_REGS. */
34841 else
34842 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34843 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34844
34845 if (TARGET_DEBUG_COST)
34846 {
34847 if (dbg_cost_ctrl == 1)
34848 fprintf (stderr,
34849 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34850 ret, GET_MODE_NAME (mode), reg_class_names[from],
34851 reg_class_names[to]);
34852 dbg_cost_ctrl--;
34853 }
34854
34855 return ret;
34856 }
34857
34858 /* A C expressions returning the cost of moving data of MODE from a register to
34859 or from memory. */
34860
34861 static int
34862 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34863 bool in ATTRIBUTE_UNUSED)
34864 {
34865 int ret;
34866
34867 if (TARGET_DEBUG_COST)
34868 dbg_cost_ctrl++;
34869
34870 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34871 ret = 4 * hard_regno_nregs[0][mode];
34872 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34873 || reg_classes_intersect_p (rclass, VSX_REGS)))
34874 ret = 4 * hard_regno_nregs[32][mode];
34875 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34876 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
34877 else
34878 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34879
34880 if (TARGET_DEBUG_COST)
34881 {
34882 if (dbg_cost_ctrl == 1)
34883 fprintf (stderr,
34884 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34885 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34886 dbg_cost_ctrl--;
34887 }
34888
34889 return ret;
34890 }
34891
34892 /* Returns a code for a target-specific builtin that implements
34893 reciprocal of the function, or NULL_TREE if not available. */
34894
34895 static tree
34896 rs6000_builtin_reciprocal (tree fndecl)
34897 {
34898 switch (DECL_FUNCTION_CODE (fndecl))
34899 {
34900 case VSX_BUILTIN_XVSQRTDP:
34901 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34902 return NULL_TREE;
34903
34904 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34905
34906 case VSX_BUILTIN_XVSQRTSP:
34907 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34908 return NULL_TREE;
34909
34910 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34911
34912 default:
34913 return NULL_TREE;
34914 }
34915 }
34916
34917 /* Load up a constant. If the mode is a vector mode, splat the value across
34918 all of the vector elements. */
34919
34920 static rtx
34921 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34922 {
34923 rtx reg;
34924
34925 if (mode == SFmode || mode == DFmode)
34926 {
34927 rtx d = const_double_from_real_value (dconst, mode);
34928 reg = force_reg (mode, d);
34929 }
34930 else if (mode == V4SFmode)
34931 {
34932 rtx d = const_double_from_real_value (dconst, SFmode);
34933 rtvec v = gen_rtvec (4, d, d, d, d);
34934 reg = gen_reg_rtx (mode);
34935 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34936 }
34937 else if (mode == V2DFmode)
34938 {
34939 rtx d = const_double_from_real_value (dconst, DFmode);
34940 rtvec v = gen_rtvec (2, d, d);
34941 reg = gen_reg_rtx (mode);
34942 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34943 }
34944 else
34945 gcc_unreachable ();
34946
34947 return reg;
34948 }
34949
34950 /* Generate an FMA instruction. */
34951
34952 static void
34953 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34954 {
34955 machine_mode mode = GET_MODE (target);
34956 rtx dst;
34957
34958 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34959 gcc_assert (dst != NULL);
34960
34961 if (dst != target)
34962 emit_move_insn (target, dst);
34963 }
34964
34965 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34966
34967 static void
34968 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34969 {
34970 machine_mode mode = GET_MODE (dst);
34971 rtx r;
34972
34973 /* This is a tad more complicated, since the fnma_optab is for
34974 a different expression: fma(-m1, m2, a), which is the same
34975 thing except in the case of signed zeros.
34976
34977 Fortunately we know that if FMA is supported that FNMSUB is
34978 also supported in the ISA. Just expand it directly. */
34979
34980 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34981
34982 r = gen_rtx_NEG (mode, a);
34983 r = gen_rtx_FMA (mode, m1, m2, r);
34984 r = gen_rtx_NEG (mode, r);
34985 emit_insn (gen_rtx_SET (dst, r));
34986 }
34987
34988 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34989 add a reg_note saying that this was a division. Support both scalar and
34990 vector divide. Assumes no trapping math and finite arguments. */
34991
34992 void
34993 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34994 {
34995 machine_mode mode = GET_MODE (dst);
34996 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34997 int i;
34998
34999 /* Low precision estimates guarantee 5 bits of accuracy. High
35000 precision estimates guarantee 14 bits of accuracy. SFmode
35001 requires 23 bits of accuracy. DFmode requires 52 bits of
35002 accuracy. Each pass at least doubles the accuracy, leading
35003 to the following. */
35004 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35005 if (mode == DFmode || mode == V2DFmode)
35006 passes++;
35007
35008 enum insn_code code = optab_handler (smul_optab, mode);
35009 insn_gen_fn gen_mul = GEN_FCN (code);
35010
35011 gcc_assert (code != CODE_FOR_nothing);
35012
35013 one = rs6000_load_constant_and_splat (mode, dconst1);
35014
35015 /* x0 = 1./d estimate */
35016 x0 = gen_reg_rtx (mode);
35017 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35018 UNSPEC_FRES)));
35019
35020 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35021 if (passes > 1) {
35022
35023 /* e0 = 1. - d * x0 */
35024 e0 = gen_reg_rtx (mode);
35025 rs6000_emit_nmsub (e0, d, x0, one);
35026
35027 /* x1 = x0 + e0 * x0 */
35028 x1 = gen_reg_rtx (mode);
35029 rs6000_emit_madd (x1, e0, x0, x0);
35030
35031 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35032 ++i, xprev = xnext, eprev = enext) {
35033
35034 /* enext = eprev * eprev */
35035 enext = gen_reg_rtx (mode);
35036 emit_insn (gen_mul (enext, eprev, eprev));
35037
35038 /* xnext = xprev + enext * xprev */
35039 xnext = gen_reg_rtx (mode);
35040 rs6000_emit_madd (xnext, enext, xprev, xprev);
35041 }
35042
35043 } else
35044 xprev = x0;
35045
35046 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35047
35048 /* u = n * xprev */
35049 u = gen_reg_rtx (mode);
35050 emit_insn (gen_mul (u, n, xprev));
35051
35052 /* v = n - (d * u) */
35053 v = gen_reg_rtx (mode);
35054 rs6000_emit_nmsub (v, d, u, n);
35055
35056 /* dst = (v * xprev) + u */
35057 rs6000_emit_madd (dst, v, xprev, u);
35058
35059 if (note_p)
35060 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35061 }
35062
35063 /* Goldschmidt's Algorithm for single/double-precision floating point
35064 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35065
35066 void
35067 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35068 {
35069 machine_mode mode = GET_MODE (src);
35070 rtx e = gen_reg_rtx (mode);
35071 rtx g = gen_reg_rtx (mode);
35072 rtx h = gen_reg_rtx (mode);
35073
35074 /* Low precision estimates guarantee 5 bits of accuracy. High
35075 precision estimates guarantee 14 bits of accuracy. SFmode
35076 requires 23 bits of accuracy. DFmode requires 52 bits of
35077 accuracy. Each pass at least doubles the accuracy, leading
35078 to the following. */
35079 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35080 if (mode == DFmode || mode == V2DFmode)
35081 passes++;
35082
35083 int i;
35084 rtx mhalf;
35085 enum insn_code code = optab_handler (smul_optab, mode);
35086 insn_gen_fn gen_mul = GEN_FCN (code);
35087
35088 gcc_assert (code != CODE_FOR_nothing);
35089
35090 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35091
35092 /* e = rsqrt estimate */
35093 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35094 UNSPEC_RSQRT)));
35095
35096 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35097 if (!recip)
35098 {
35099 rtx zero = force_reg (mode, CONST0_RTX (mode));
35100
35101 if (mode == SFmode)
35102 {
35103 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35104 e, zero, mode, 0);
35105 if (target != e)
35106 emit_move_insn (e, target);
35107 }
35108 else
35109 {
35110 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35111 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35112 }
35113 }
35114
35115 /* g = sqrt estimate. */
35116 emit_insn (gen_mul (g, e, src));
35117 /* h = 1/(2*sqrt) estimate. */
35118 emit_insn (gen_mul (h, e, mhalf));
35119
35120 if (recip)
35121 {
35122 if (passes == 1)
35123 {
35124 rtx t = gen_reg_rtx (mode);
35125 rs6000_emit_nmsub (t, g, h, mhalf);
35126 /* Apply correction directly to 1/rsqrt estimate. */
35127 rs6000_emit_madd (dst, e, t, e);
35128 }
35129 else
35130 {
35131 for (i = 0; i < passes; i++)
35132 {
35133 rtx t1 = gen_reg_rtx (mode);
35134 rtx g1 = gen_reg_rtx (mode);
35135 rtx h1 = gen_reg_rtx (mode);
35136
35137 rs6000_emit_nmsub (t1, g, h, mhalf);
35138 rs6000_emit_madd (g1, g, t1, g);
35139 rs6000_emit_madd (h1, h, t1, h);
35140
35141 g = g1;
35142 h = h1;
35143 }
35144 /* Multiply by 2 for 1/rsqrt. */
35145 emit_insn (gen_add3_insn (dst, h, h));
35146 }
35147 }
35148 else
35149 {
35150 rtx t = gen_reg_rtx (mode);
35151 rs6000_emit_nmsub (t, g, h, mhalf);
35152 rs6000_emit_madd (dst, g, t, g);
35153 }
35154
35155 return;
35156 }
35157
35158 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35159 (Power7) targets. DST is the target, and SRC is the argument operand. */
35160
35161 void
35162 rs6000_emit_popcount (rtx dst, rtx src)
35163 {
35164 machine_mode mode = GET_MODE (dst);
35165 rtx tmp1, tmp2;
35166
35167 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35168 if (TARGET_POPCNTD)
35169 {
35170 if (mode == SImode)
35171 emit_insn (gen_popcntdsi2 (dst, src));
35172 else
35173 emit_insn (gen_popcntddi2 (dst, src));
35174 return;
35175 }
35176
35177 tmp1 = gen_reg_rtx (mode);
35178
35179 if (mode == SImode)
35180 {
35181 emit_insn (gen_popcntbsi2 (tmp1, src));
35182 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35183 NULL_RTX, 0);
35184 tmp2 = force_reg (SImode, tmp2);
35185 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35186 }
35187 else
35188 {
35189 emit_insn (gen_popcntbdi2 (tmp1, src));
35190 tmp2 = expand_mult (DImode, tmp1,
35191 GEN_INT ((HOST_WIDE_INT)
35192 0x01010101 << 32 | 0x01010101),
35193 NULL_RTX, 0);
35194 tmp2 = force_reg (DImode, tmp2);
35195 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35196 }
35197 }
35198
35199
35200 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35201 target, and SRC is the argument operand. */
35202
35203 void
35204 rs6000_emit_parity (rtx dst, rtx src)
35205 {
35206 machine_mode mode = GET_MODE (dst);
35207 rtx tmp;
35208
35209 tmp = gen_reg_rtx (mode);
35210
35211 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35212 if (TARGET_CMPB)
35213 {
35214 if (mode == SImode)
35215 {
35216 emit_insn (gen_popcntbsi2 (tmp, src));
35217 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35218 }
35219 else
35220 {
35221 emit_insn (gen_popcntbdi2 (tmp, src));
35222 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35223 }
35224 return;
35225 }
35226
35227 if (mode == SImode)
35228 {
35229 /* Is mult+shift >= shift+xor+shift+xor? */
35230 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35231 {
35232 rtx tmp1, tmp2, tmp3, tmp4;
35233
35234 tmp1 = gen_reg_rtx (SImode);
35235 emit_insn (gen_popcntbsi2 (tmp1, src));
35236
35237 tmp2 = gen_reg_rtx (SImode);
35238 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35239 tmp3 = gen_reg_rtx (SImode);
35240 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35241
35242 tmp4 = gen_reg_rtx (SImode);
35243 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35244 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35245 }
35246 else
35247 rs6000_emit_popcount (tmp, src);
35248 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35249 }
35250 else
35251 {
35252 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35253 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35254 {
35255 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35256
35257 tmp1 = gen_reg_rtx (DImode);
35258 emit_insn (gen_popcntbdi2 (tmp1, src));
35259
35260 tmp2 = gen_reg_rtx (DImode);
35261 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35262 tmp3 = gen_reg_rtx (DImode);
35263 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35264
35265 tmp4 = gen_reg_rtx (DImode);
35266 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35267 tmp5 = gen_reg_rtx (DImode);
35268 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35269
35270 tmp6 = gen_reg_rtx (DImode);
35271 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35272 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35273 }
35274 else
35275 rs6000_emit_popcount (tmp, src);
35276 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35277 }
35278 }
35279
35280 /* Expand an Altivec constant permutation for little endian mode.
35281 There are two issues: First, the two input operands must be
35282 swapped so that together they form a double-wide array in LE
35283 order. Second, the vperm instruction has surprising behavior
35284 in LE mode: it interprets the elements of the source vectors
35285 in BE mode ("left to right") and interprets the elements of
35286 the destination vector in LE mode ("right to left"). To
35287 correct for this, we must subtract each element of the permute
35288 control vector from 31.
35289
35290 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35291 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35292 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35293 serve as the permute control vector. Then, in BE mode,
35294
35295 vperm 9,10,11,12
35296
35297 places the desired result in vr9. However, in LE mode the
35298 vector contents will be
35299
35300 vr10 = 00000003 00000002 00000001 00000000
35301 vr11 = 00000007 00000006 00000005 00000004
35302
35303 The result of the vperm using the same permute control vector is
35304
35305 vr9 = 05000000 07000000 01000000 03000000
35306
35307 That is, the leftmost 4 bytes of vr10 are interpreted as the
35308 source for the rightmost 4 bytes of vr9, and so on.
35309
35310 If we change the permute control vector to
35311
35312 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35313
35314 and issue
35315
35316 vperm 9,11,10,12
35317
35318 we get the desired
35319
35320 vr9 = 00000006 00000004 00000002 00000000. */
35321
35322 void
35323 altivec_expand_vec_perm_const_le (rtx operands[4])
35324 {
35325 unsigned int i;
35326 rtx perm[16];
35327 rtx constv, unspec;
35328 rtx target = operands[0];
35329 rtx op0 = operands[1];
35330 rtx op1 = operands[2];
35331 rtx sel = operands[3];
35332
35333 /* Unpack and adjust the constant selector. */
35334 for (i = 0; i < 16; ++i)
35335 {
35336 rtx e = XVECEXP (sel, 0, i);
35337 unsigned int elt = 31 - (INTVAL (e) & 31);
35338 perm[i] = GEN_INT (elt);
35339 }
35340
35341 /* Expand to a permute, swapping the inputs and using the
35342 adjusted selector. */
35343 if (!REG_P (op0))
35344 op0 = force_reg (V16QImode, op0);
35345 if (!REG_P (op1))
35346 op1 = force_reg (V16QImode, op1);
35347
35348 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35349 constv = force_reg (V16QImode, constv);
35350 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35351 UNSPEC_VPERM);
35352 if (!REG_P (target))
35353 {
35354 rtx tmp = gen_reg_rtx (V16QImode);
35355 emit_move_insn (tmp, unspec);
35356 unspec = tmp;
35357 }
35358
35359 emit_move_insn (target, unspec);
35360 }
35361
35362 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35363 permute control vector. But here it's not a constant, so we must
35364 generate a vector NAND or NOR to do the adjustment. */
35365
35366 void
35367 altivec_expand_vec_perm_le (rtx operands[4])
35368 {
35369 rtx notx, iorx, unspec;
35370 rtx target = operands[0];
35371 rtx op0 = operands[1];
35372 rtx op1 = operands[2];
35373 rtx sel = operands[3];
35374 rtx tmp = target;
35375 rtx norreg = gen_reg_rtx (V16QImode);
35376 machine_mode mode = GET_MODE (target);
35377
35378 /* Get everything in regs so the pattern matches. */
35379 if (!REG_P (op0))
35380 op0 = force_reg (mode, op0);
35381 if (!REG_P (op1))
35382 op1 = force_reg (mode, op1);
35383 if (!REG_P (sel))
35384 sel = force_reg (V16QImode, sel);
35385 if (!REG_P (target))
35386 tmp = gen_reg_rtx (mode);
35387
35388 if (TARGET_P9_VECTOR)
35389 {
35390 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
35391 UNSPEC_VPERMR);
35392 }
35393 else
35394 {
35395 /* Invert the selector with a VNAND if available, else a VNOR.
35396 The VNAND is preferred for future fusion opportunities. */
35397 notx = gen_rtx_NOT (V16QImode, sel);
35398 iorx = (TARGET_P8_VECTOR
35399 ? gen_rtx_IOR (V16QImode, notx, notx)
35400 : gen_rtx_AND (V16QImode, notx, notx));
35401 emit_insn (gen_rtx_SET (norreg, iorx));
35402
35403 /* Permute with operands reversed and adjusted selector. */
35404 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35405 UNSPEC_VPERM);
35406 }
35407
35408 /* Copy into target, possibly by way of a register. */
35409 if (!REG_P (target))
35410 {
35411 emit_move_insn (tmp, unspec);
35412 unspec = tmp;
35413 }
35414
35415 emit_move_insn (target, unspec);
35416 }
35417
35418 /* Expand an Altivec constant permutation. Return true if we match
35419 an efficient implementation; false to fall back to VPERM. */
35420
35421 bool
35422 altivec_expand_vec_perm_const (rtx operands[4])
35423 {
35424 struct altivec_perm_insn {
35425 HOST_WIDE_INT mask;
35426 enum insn_code impl;
35427 unsigned char perm[16];
35428 };
35429 static const struct altivec_perm_insn patterns[] = {
35430 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35431 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35432 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35433 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35434 { OPTION_MASK_ALTIVEC,
35435 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35436 : CODE_FOR_altivec_vmrglb_direct),
35437 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35438 { OPTION_MASK_ALTIVEC,
35439 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35440 : CODE_FOR_altivec_vmrglh_direct),
35441 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35442 { OPTION_MASK_ALTIVEC,
35443 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35444 : CODE_FOR_altivec_vmrglw_direct),
35445 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35446 { OPTION_MASK_ALTIVEC,
35447 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35448 : CODE_FOR_altivec_vmrghb_direct),
35449 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35450 { OPTION_MASK_ALTIVEC,
35451 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35452 : CODE_FOR_altivec_vmrghh_direct),
35453 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35454 { OPTION_MASK_ALTIVEC,
35455 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35456 : CODE_FOR_altivec_vmrghw_direct),
35457 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35458 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew_v4si,
35459 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35460 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
35461 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35462 };
35463
35464 unsigned int i, j, elt, which;
35465 unsigned char perm[16];
35466 rtx target, op0, op1, sel, x;
35467 bool one_vec;
35468
35469 target = operands[0];
35470 op0 = operands[1];
35471 op1 = operands[2];
35472 sel = operands[3];
35473
35474 /* Unpack the constant selector. */
35475 for (i = which = 0; i < 16; ++i)
35476 {
35477 rtx e = XVECEXP (sel, 0, i);
35478 elt = INTVAL (e) & 31;
35479 which |= (elt < 16 ? 1 : 2);
35480 perm[i] = elt;
35481 }
35482
35483 /* Simplify the constant selector based on operands. */
35484 switch (which)
35485 {
35486 default:
35487 gcc_unreachable ();
35488
35489 case 3:
35490 one_vec = false;
35491 if (!rtx_equal_p (op0, op1))
35492 break;
35493 /* FALLTHRU */
35494
35495 case 2:
35496 for (i = 0; i < 16; ++i)
35497 perm[i] &= 15;
35498 op0 = op1;
35499 one_vec = true;
35500 break;
35501
35502 case 1:
35503 op1 = op0;
35504 one_vec = true;
35505 break;
35506 }
35507
35508 /* Look for splat patterns. */
35509 if (one_vec)
35510 {
35511 elt = perm[0];
35512
35513 for (i = 0; i < 16; ++i)
35514 if (perm[i] != elt)
35515 break;
35516 if (i == 16)
35517 {
35518 if (!BYTES_BIG_ENDIAN)
35519 elt = 15 - elt;
35520 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35521 return true;
35522 }
35523
35524 if (elt % 2 == 0)
35525 {
35526 for (i = 0; i < 16; i += 2)
35527 if (perm[i] != elt || perm[i + 1] != elt + 1)
35528 break;
35529 if (i == 16)
35530 {
35531 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35532 x = gen_reg_rtx (V8HImode);
35533 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35534 GEN_INT (field)));
35535 emit_move_insn (target, gen_lowpart (V16QImode, x));
35536 return true;
35537 }
35538 }
35539
35540 if (elt % 4 == 0)
35541 {
35542 for (i = 0; i < 16; i += 4)
35543 if (perm[i] != elt
35544 || perm[i + 1] != elt + 1
35545 || perm[i + 2] != elt + 2
35546 || perm[i + 3] != elt + 3)
35547 break;
35548 if (i == 16)
35549 {
35550 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35551 x = gen_reg_rtx (V4SImode);
35552 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35553 GEN_INT (field)));
35554 emit_move_insn (target, gen_lowpart (V16QImode, x));
35555 return true;
35556 }
35557 }
35558 }
35559
35560 /* Look for merge and pack patterns. */
35561 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35562 {
35563 bool swapped;
35564
35565 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35566 continue;
35567
35568 elt = patterns[j].perm[0];
35569 if (perm[0] == elt)
35570 swapped = false;
35571 else if (perm[0] == elt + 16)
35572 swapped = true;
35573 else
35574 continue;
35575 for (i = 1; i < 16; ++i)
35576 {
35577 elt = patterns[j].perm[i];
35578 if (swapped)
35579 elt = (elt >= 16 ? elt - 16 : elt + 16);
35580 else if (one_vec && elt >= 16)
35581 elt -= 16;
35582 if (perm[i] != elt)
35583 break;
35584 }
35585 if (i == 16)
35586 {
35587 enum insn_code icode = patterns[j].impl;
35588 machine_mode omode = insn_data[icode].operand[0].mode;
35589 machine_mode imode = insn_data[icode].operand[1].mode;
35590
35591 /* For little-endian, don't use vpkuwum and vpkuhum if the
35592 underlying vector type is not V4SI and V8HI, respectively.
35593 For example, using vpkuwum with a V8HI picks up the even
35594 halfwords (BE numbering) when the even halfwords (LE
35595 numbering) are what we need. */
35596 if (!BYTES_BIG_ENDIAN
35597 && icode == CODE_FOR_altivec_vpkuwum_direct
35598 && ((GET_CODE (op0) == REG
35599 && GET_MODE (op0) != V4SImode)
35600 || (GET_CODE (op0) == SUBREG
35601 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35602 continue;
35603 if (!BYTES_BIG_ENDIAN
35604 && icode == CODE_FOR_altivec_vpkuhum_direct
35605 && ((GET_CODE (op0) == REG
35606 && GET_MODE (op0) != V8HImode)
35607 || (GET_CODE (op0) == SUBREG
35608 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35609 continue;
35610
35611 /* For little-endian, the two input operands must be swapped
35612 (or swapped back) to ensure proper right-to-left numbering
35613 from 0 to 2N-1. */
35614 if (swapped ^ !BYTES_BIG_ENDIAN)
35615 std::swap (op0, op1);
35616 if (imode != V16QImode)
35617 {
35618 op0 = gen_lowpart (imode, op0);
35619 op1 = gen_lowpart (imode, op1);
35620 }
35621 if (omode == V16QImode)
35622 x = target;
35623 else
35624 x = gen_reg_rtx (omode);
35625 emit_insn (GEN_FCN (icode) (x, op0, op1));
35626 if (omode != V16QImode)
35627 emit_move_insn (target, gen_lowpart (V16QImode, x));
35628 return true;
35629 }
35630 }
35631
35632 if (!BYTES_BIG_ENDIAN)
35633 {
35634 altivec_expand_vec_perm_const_le (operands);
35635 return true;
35636 }
35637
35638 return false;
35639 }
35640
35641 /* Expand a Paired Single or VSX Permute Doubleword constant permutation.
35642 Return true if we match an efficient implementation. */
35643
35644 static bool
35645 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35646 unsigned char perm0, unsigned char perm1)
35647 {
35648 rtx x;
35649
35650 /* If both selectors come from the same operand, fold to single op. */
35651 if ((perm0 & 2) == (perm1 & 2))
35652 {
35653 if (perm0 & 2)
35654 op0 = op1;
35655 else
35656 op1 = op0;
35657 }
35658 /* If both operands are equal, fold to simpler permutation. */
35659 if (rtx_equal_p (op0, op1))
35660 {
35661 perm0 = perm0 & 1;
35662 perm1 = (perm1 & 1) + 2;
35663 }
35664 /* If the first selector comes from the second operand, swap. */
35665 else if (perm0 & 2)
35666 {
35667 if (perm1 & 2)
35668 return false;
35669 perm0 -= 2;
35670 perm1 += 2;
35671 std::swap (op0, op1);
35672 }
35673 /* If the second selector does not come from the second operand, fail. */
35674 else if ((perm1 & 2) == 0)
35675 return false;
35676
35677 /* Success! */
35678 if (target != NULL)
35679 {
35680 machine_mode vmode, dmode;
35681 rtvec v;
35682
35683 vmode = GET_MODE (target);
35684 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35685 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
35686 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35687 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35688 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35689 emit_insn (gen_rtx_SET (target, x));
35690 }
35691 return true;
35692 }
35693
35694 bool
35695 rs6000_expand_vec_perm_const (rtx operands[4])
35696 {
35697 rtx target, op0, op1, sel;
35698 unsigned char perm0, perm1;
35699
35700 target = operands[0];
35701 op0 = operands[1];
35702 op1 = operands[2];
35703 sel = operands[3];
35704
35705 /* Unpack the constant selector. */
35706 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
35707 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
35708
35709 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
35710 }
35711
35712 /* Test whether a constant permutation is supported. */
35713
35714 static bool
35715 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
35716 const unsigned char *sel)
35717 {
35718 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35719 if (TARGET_ALTIVEC)
35720 return true;
35721
35722 /* Check for ps_merge* or evmerge* insns. */
35723 if (TARGET_PAIRED_FLOAT && vmode == V2SFmode)
35724 {
35725 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35726 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35727 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
35728 }
35729
35730 return false;
35731 }
35732
35733 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
35734
35735 static void
35736 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35737 machine_mode vmode, unsigned nelt, rtx perm[])
35738 {
35739 machine_mode imode;
35740 rtx x;
35741
35742 imode = vmode;
35743 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
35744 {
35745 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
35746 imode = mode_for_vector (imode, nelt);
35747 }
35748
35749 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
35750 x = expand_vec_perm (vmode, op0, op1, x, target);
35751 if (x != target)
35752 emit_move_insn (target, x);
35753 }
35754
35755 /* Expand an extract even operation. */
35756
35757 void
35758 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35759 {
35760 machine_mode vmode = GET_MODE (target);
35761 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35762 rtx perm[16];
35763
35764 for (i = 0; i < nelt; i++)
35765 perm[i] = GEN_INT (i * 2);
35766
35767 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35768 }
35769
35770 /* Expand a vector interleave operation. */
35771
35772 void
35773 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35774 {
35775 machine_mode vmode = GET_MODE (target);
35776 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35777 rtx perm[16];
35778
35779 high = (highp ? 0 : nelt / 2);
35780 for (i = 0; i < nelt / 2; i++)
35781 {
35782 perm[i * 2] = GEN_INT (i + high);
35783 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
35784 }
35785
35786 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
35787 }
35788
35789 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35790 void
35791 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35792 {
35793 HOST_WIDE_INT hwi_scale (scale);
35794 REAL_VALUE_TYPE r_pow;
35795 rtvec v = rtvec_alloc (2);
35796 rtx elt;
35797 rtx scale_vec = gen_reg_rtx (V2DFmode);
35798 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35799 elt = const_double_from_real_value (r_pow, DFmode);
35800 RTVEC_ELT (v, 0) = elt;
35801 RTVEC_ELT (v, 1) = elt;
35802 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35803 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35804 }
35805
35806 /* Return an RTX representing where to find the function value of a
35807 function returning MODE. */
35808 static rtx
35809 rs6000_complex_function_value (machine_mode mode)
35810 {
35811 unsigned int regno;
35812 rtx r1, r2;
35813 machine_mode inner = GET_MODE_INNER (mode);
35814 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35815
35816 if (TARGET_FLOAT128_TYPE
35817 && (mode == KCmode
35818 || (mode == TCmode && TARGET_IEEEQUAD)))
35819 regno = ALTIVEC_ARG_RETURN;
35820
35821 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35822 regno = FP_ARG_RETURN;
35823
35824 else
35825 {
35826 regno = GP_ARG_RETURN;
35827
35828 /* 32-bit is OK since it'll go in r3/r4. */
35829 if (TARGET_32BIT && inner_bytes >= 4)
35830 return gen_rtx_REG (mode, regno);
35831 }
35832
35833 if (inner_bytes >= 8)
35834 return gen_rtx_REG (mode, regno);
35835
35836 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35837 const0_rtx);
35838 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35839 GEN_INT (inner_bytes));
35840 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35841 }
35842
35843 /* Return an rtx describing a return value of MODE as a PARALLEL
35844 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35845 stride REG_STRIDE. */
35846
35847 static rtx
35848 rs6000_parallel_return (machine_mode mode,
35849 int n_elts, machine_mode elt_mode,
35850 unsigned int regno, unsigned int reg_stride)
35851 {
35852 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35853
35854 int i;
35855 for (i = 0; i < n_elts; i++)
35856 {
35857 rtx r = gen_rtx_REG (elt_mode, regno);
35858 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35859 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35860 regno += reg_stride;
35861 }
35862
35863 return par;
35864 }
35865
35866 /* Target hook for TARGET_FUNCTION_VALUE.
35867
35868 An integer value is in r3 and a floating-point value is in fp1,
35869 unless -msoft-float. */
35870
35871 static rtx
35872 rs6000_function_value (const_tree valtype,
35873 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35874 bool outgoing ATTRIBUTE_UNUSED)
35875 {
35876 machine_mode mode;
35877 unsigned int regno;
35878 machine_mode elt_mode;
35879 int n_elts;
35880
35881 /* Special handling for structs in darwin64. */
35882 if (TARGET_MACHO
35883 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35884 {
35885 CUMULATIVE_ARGS valcum;
35886 rtx valret;
35887
35888 valcum.words = 0;
35889 valcum.fregno = FP_ARG_MIN_REG;
35890 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35891 /* Do a trial code generation as if this were going to be passed as
35892 an argument; if any part goes in memory, we return NULL. */
35893 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35894 if (valret)
35895 return valret;
35896 /* Otherwise fall through to standard ABI rules. */
35897 }
35898
35899 mode = TYPE_MODE (valtype);
35900
35901 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35902 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35903 {
35904 int first_reg, n_regs;
35905
35906 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35907 {
35908 /* _Decimal128 must use even/odd register pairs. */
35909 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35910 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35911 }
35912 else
35913 {
35914 first_reg = ALTIVEC_ARG_RETURN;
35915 n_regs = 1;
35916 }
35917
35918 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35919 }
35920
35921 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35922 if (TARGET_32BIT && TARGET_POWERPC64)
35923 switch (mode)
35924 {
35925 default:
35926 break;
35927 case DImode:
35928 case SCmode:
35929 case DCmode:
35930 case TCmode:
35931 int count = GET_MODE_SIZE (mode) / 4;
35932 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35933 }
35934
35935 if ((INTEGRAL_TYPE_P (valtype)
35936 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35937 || POINTER_TYPE_P (valtype))
35938 mode = TARGET_32BIT ? SImode : DImode;
35939
35940 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35941 /* _Decimal128 must use an even/odd register pair. */
35942 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35943 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35944 && !FLOAT128_VECTOR_P (mode)
35945 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
35946 regno = FP_ARG_RETURN;
35947 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35948 && targetm.calls.split_complex_arg)
35949 return rs6000_complex_function_value (mode);
35950 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35951 return register is used in both cases, and we won't see V2DImode/V2DFmode
35952 for pure altivec, combine the two cases. */
35953 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35954 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35955 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35956 regno = ALTIVEC_ARG_RETURN;
35957 else
35958 regno = GP_ARG_RETURN;
35959
35960 return gen_rtx_REG (mode, regno);
35961 }
35962
35963 /* Define how to find the value returned by a library function
35964 assuming the value has mode MODE. */
35965 rtx
35966 rs6000_libcall_value (machine_mode mode)
35967 {
35968 unsigned int regno;
35969
35970 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35971 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35972 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35973
35974 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35975 /* _Decimal128 must use an even/odd register pair. */
35976 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35977 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
35978 && TARGET_HARD_FLOAT
35979 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
35980 regno = FP_ARG_RETURN;
35981 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35982 return register is used in both cases, and we won't see V2DImode/V2DFmode
35983 for pure altivec, combine the two cases. */
35984 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35985 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35986 regno = ALTIVEC_ARG_RETURN;
35987 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35988 return rs6000_complex_function_value (mode);
35989 else
35990 regno = GP_ARG_RETURN;
35991
35992 return gen_rtx_REG (mode, regno);
35993 }
35994
35995
35996 /* Return true if we use LRA instead of reload pass. */
35997 static bool
35998 rs6000_lra_p (void)
35999 {
36000 return TARGET_LRA;
36001 }
36002
36003 /* Compute register pressure classes. We implement the target hook to avoid
36004 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
36005 lead to incorrect estimates of number of available registers and therefor
36006 increased register pressure/spill. */
36007 static int
36008 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
36009 {
36010 int n;
36011
36012 n = 0;
36013 pressure_classes[n++] = GENERAL_REGS;
36014 if (TARGET_VSX)
36015 pressure_classes[n++] = VSX_REGS;
36016 else
36017 {
36018 if (TARGET_ALTIVEC)
36019 pressure_classes[n++] = ALTIVEC_REGS;
36020 if (TARGET_HARD_FLOAT)
36021 pressure_classes[n++] = FLOAT_REGS;
36022 }
36023 pressure_classes[n++] = CR_REGS;
36024 pressure_classes[n++] = SPECIAL_REGS;
36025
36026 return n;
36027 }
36028
36029 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36030 Frame pointer elimination is automatically handled.
36031
36032 For the RS/6000, if frame pointer elimination is being done, we would like
36033 to convert ap into fp, not sp.
36034
36035 We need r30 if -mminimal-toc was specified, and there are constant pool
36036 references. */
36037
36038 static bool
36039 rs6000_can_eliminate (const int from, const int to)
36040 {
36041 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36042 ? ! frame_pointer_needed
36043 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36044 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
36045 || constant_pool_empty_p ()
36046 : true);
36047 }
36048
36049 /* Define the offset between two registers, FROM to be eliminated and its
36050 replacement TO, at the start of a routine. */
36051 HOST_WIDE_INT
36052 rs6000_initial_elimination_offset (int from, int to)
36053 {
36054 rs6000_stack_t *info = rs6000_stack_info ();
36055 HOST_WIDE_INT offset;
36056
36057 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36058 offset = info->push_p ? 0 : -info->total_size;
36059 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36060 {
36061 offset = info->push_p ? 0 : -info->total_size;
36062 if (FRAME_GROWS_DOWNWARD)
36063 offset += info->fixed_size + info->vars_size + info->parm_size;
36064 }
36065 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36066 offset = FRAME_GROWS_DOWNWARD
36067 ? info->fixed_size + info->vars_size + info->parm_size
36068 : 0;
36069 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36070 offset = info->total_size;
36071 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36072 offset = info->push_p ? info->total_size : 0;
36073 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36074 offset = 0;
36075 else
36076 gcc_unreachable ();
36077
36078 return offset;
36079 }
36080
36081 /* Fill in sizes of registers used by unwinder. */
36082
36083 static void
36084 rs6000_init_dwarf_reg_sizes_extra (tree address)
36085 {
36086 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36087 {
36088 int i;
36089 machine_mode mode = TYPE_MODE (char_type_node);
36090 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36091 rtx mem = gen_rtx_MEM (BLKmode, addr);
36092 rtx value = gen_int_mode (16, mode);
36093
36094 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36095 The unwinder still needs to know the size of Altivec registers. */
36096
36097 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36098 {
36099 int column = DWARF_REG_TO_UNWIND_COLUMN
36100 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36101 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36102
36103 emit_move_insn (adjust_address (mem, mode, offset), value);
36104 }
36105 }
36106 }
36107
36108 /* Map internal gcc register numbers to debug format register numbers.
36109 FORMAT specifies the type of debug register number to use:
36110 0 -- debug information, except for frame-related sections
36111 1 -- DWARF .debug_frame section
36112 2 -- DWARF .eh_frame section */
36113
36114 unsigned int
36115 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36116 {
36117 /* Except for the above, we use the internal number for non-DWARF
36118 debug information, and also for .eh_frame. */
36119 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36120 return regno;
36121
36122 /* On some platforms, we use the standard DWARF register
36123 numbering for .debug_info and .debug_frame. */
36124 #ifdef RS6000_USE_DWARF_NUMBERING
36125 if (regno <= 63)
36126 return regno;
36127 if (regno == LR_REGNO)
36128 return 108;
36129 if (regno == CTR_REGNO)
36130 return 109;
36131 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36132 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36133 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36134 to the DWARF reg for CR. */
36135 if (format == 1 && regno == CR2_REGNO)
36136 return 64;
36137 if (CR_REGNO_P (regno))
36138 return regno - CR0_REGNO + 86;
36139 if (regno == CA_REGNO)
36140 return 101; /* XER */
36141 if (ALTIVEC_REGNO_P (regno))
36142 return regno - FIRST_ALTIVEC_REGNO + 1124;
36143 if (regno == VRSAVE_REGNO)
36144 return 356;
36145 if (regno == VSCR_REGNO)
36146 return 67;
36147 #endif
36148 return regno;
36149 }
36150
36151 /* target hook eh_return_filter_mode */
36152 static machine_mode
36153 rs6000_eh_return_filter_mode (void)
36154 {
36155 return TARGET_32BIT ? SImode : word_mode;
36156 }
36157
36158 /* Target hook for scalar_mode_supported_p. */
36159 static bool
36160 rs6000_scalar_mode_supported_p (machine_mode mode)
36161 {
36162 /* -m32 does not support TImode. This is the default, from
36163 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36164 same ABI as for -m32. But default_scalar_mode_supported_p allows
36165 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36166 for -mpowerpc64. */
36167 if (TARGET_32BIT && mode == TImode)
36168 return false;
36169
36170 if (DECIMAL_FLOAT_MODE_P (mode))
36171 return default_decimal_float_supported_p ();
36172 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36173 return true;
36174 else
36175 return default_scalar_mode_supported_p (mode);
36176 }
36177
36178 /* Target hook for vector_mode_supported_p. */
36179 static bool
36180 rs6000_vector_mode_supported_p (machine_mode mode)
36181 {
36182
36183 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36184 return true;
36185
36186 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36187 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36188 double-double. */
36189 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36190 return true;
36191
36192 else
36193 return false;
36194 }
36195
36196 /* Target hook for floatn_mode. */
36197 static machine_mode
36198 rs6000_floatn_mode (int n, bool extended)
36199 {
36200 if (extended)
36201 {
36202 switch (n)
36203 {
36204 case 32:
36205 return DFmode;
36206
36207 case 64:
36208 if (TARGET_FLOAT128_KEYWORD)
36209 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36210 else
36211 return VOIDmode;
36212
36213 case 128:
36214 return VOIDmode;
36215
36216 default:
36217 /* Those are the only valid _FloatNx types. */
36218 gcc_unreachable ();
36219 }
36220 }
36221 else
36222 {
36223 switch (n)
36224 {
36225 case 32:
36226 return SFmode;
36227
36228 case 64:
36229 return DFmode;
36230
36231 case 128:
36232 if (TARGET_FLOAT128_KEYWORD)
36233 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36234 else
36235 return VOIDmode;
36236
36237 default:
36238 return VOIDmode;
36239 }
36240 }
36241
36242 }
36243
36244 /* Target hook for c_mode_for_suffix. */
36245 static machine_mode
36246 rs6000_c_mode_for_suffix (char suffix)
36247 {
36248 if (TARGET_FLOAT128_TYPE)
36249 {
36250 if (suffix == 'q' || suffix == 'Q')
36251 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36252
36253 /* At the moment, we are not defining a suffix for IBM extended double.
36254 If/when the default for -mabi=ieeelongdouble is changed, and we want
36255 to support __ibm128 constants in legacy library code, we may need to
36256 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36257 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36258 __float80 constants. */
36259 }
36260
36261 return VOIDmode;
36262 }
36263
36264 /* Target hook for invalid_arg_for_unprototyped_fn. */
36265 static const char *
36266 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36267 {
36268 return (!rs6000_darwin64_abi
36269 && typelist == 0
36270 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36271 && (funcdecl == NULL_TREE
36272 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36273 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36274 ? N_("AltiVec argument passed to unprototyped function")
36275 : NULL;
36276 }
36277
36278 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36279 setup by using __stack_chk_fail_local hidden function instead of
36280 calling __stack_chk_fail directly. Otherwise it is better to call
36281 __stack_chk_fail directly. */
36282
36283 static tree ATTRIBUTE_UNUSED
36284 rs6000_stack_protect_fail (void)
36285 {
36286 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36287 ? default_hidden_stack_protect_fail ()
36288 : default_external_stack_protect_fail ();
36289 }
36290
36291 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36292
36293 #if TARGET_ELF
36294 static unsigned HOST_WIDE_INT
36295 rs6000_asan_shadow_offset (void)
36296 {
36297 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36298 }
36299 #endif
36300 \f
36301 /* Mask options that we want to support inside of attribute((target)) and
36302 #pragma GCC target operations. Note, we do not include things like
36303 64/32-bit, endianness, hard/soft floating point, etc. that would have
36304 different calling sequences. */
36305
36306 struct rs6000_opt_mask {
36307 const char *name; /* option name */
36308 HOST_WIDE_INT mask; /* mask to set */
36309 bool invert; /* invert sense of mask */
36310 bool valid_target; /* option is a target option */
36311 };
36312
36313 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36314 {
36315 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36316 { "cmpb", OPTION_MASK_CMPB, false, true },
36317 { "crypto", OPTION_MASK_CRYPTO, false, true },
36318 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36319 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36320 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36321 false, true },
36322 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false },
36323 { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false },
36324 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false },
36325 { "fprnd", OPTION_MASK_FPRND, false, true },
36326 { "hard-dfp", OPTION_MASK_DFP, false, true },
36327 { "htm", OPTION_MASK_HTM, false, true },
36328 { "isel", OPTION_MASK_ISEL, false, true },
36329 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36330 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36331 { "modulo", OPTION_MASK_MODULO, false, true },
36332 { "mulhw", OPTION_MASK_MULHW, false, true },
36333 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36334 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36335 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36336 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36337 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36338 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36339 { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true },
36340 { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true },
36341 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36342 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36343 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36344 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36345 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36346 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36347 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36348 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36349 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36350 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36351 { "string", OPTION_MASK_STRING, false, true },
36352 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36353 { "update", OPTION_MASK_NO_UPDATE, true , true },
36354 { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true },
36355 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
36356 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
36357 { "vsx", OPTION_MASK_VSX, false, true },
36358 { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true },
36359 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
36360 #ifdef OPTION_MASK_64BIT
36361 #if TARGET_AIX_OS
36362 { "aix64", OPTION_MASK_64BIT, false, false },
36363 { "aix32", OPTION_MASK_64BIT, true, false },
36364 #else
36365 { "64", OPTION_MASK_64BIT, false, false },
36366 { "32", OPTION_MASK_64BIT, true, false },
36367 #endif
36368 #endif
36369 #ifdef OPTION_MASK_EABI
36370 { "eabi", OPTION_MASK_EABI, false, false },
36371 #endif
36372 #ifdef OPTION_MASK_LITTLE_ENDIAN
36373 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36374 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36375 #endif
36376 #ifdef OPTION_MASK_RELOCATABLE
36377 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36378 #endif
36379 #ifdef OPTION_MASK_STRICT_ALIGN
36380 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36381 #endif
36382 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36383 { "string", OPTION_MASK_STRING, false, false },
36384 };
36385
36386 /* Builtin mask mapping for printing the flags. */
36387 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36388 {
36389 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36390 { "vsx", RS6000_BTM_VSX, false, false },
36391 { "paired", RS6000_BTM_PAIRED, false, false },
36392 { "fre", RS6000_BTM_FRE, false, false },
36393 { "fres", RS6000_BTM_FRES, false, false },
36394 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36395 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36396 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36397 { "cell", RS6000_BTM_CELL, false, false },
36398 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36399 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36400 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36401 { "crypto", RS6000_BTM_CRYPTO, false, false },
36402 { "htm", RS6000_BTM_HTM, false, false },
36403 { "hard-dfp", RS6000_BTM_DFP, false, false },
36404 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36405 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36406 { "float128", RS6000_BTM_FLOAT128, false, false },
36407 };
36408
36409 /* Option variables that we want to support inside attribute((target)) and
36410 #pragma GCC target operations. */
36411
36412 struct rs6000_opt_var {
36413 const char *name; /* option name */
36414 size_t global_offset; /* offset of the option in global_options. */
36415 size_t target_offset; /* offset of the option in target options. */
36416 };
36417
36418 static struct rs6000_opt_var const rs6000_opt_vars[] =
36419 {
36420 { "friz",
36421 offsetof (struct gcc_options, x_TARGET_FRIZ),
36422 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36423 { "avoid-indexed-addresses",
36424 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36425 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36426 { "paired",
36427 offsetof (struct gcc_options, x_rs6000_paired_float),
36428 offsetof (struct cl_target_option, x_rs6000_paired_float), },
36429 { "longcall",
36430 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36431 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36432 { "optimize-swaps",
36433 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36434 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36435 { "allow-movmisalign",
36436 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36437 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36438 { "allow-df-permute",
36439 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
36440 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
36441 { "sched-groups",
36442 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36443 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36444 { "always-hint",
36445 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36446 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36447 { "align-branch-targets",
36448 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36449 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36450 { "vectorize-builtins",
36451 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
36452 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
36453 { "tls-markers",
36454 offsetof (struct gcc_options, x_tls_markers),
36455 offsetof (struct cl_target_option, x_tls_markers), },
36456 { "sched-prolog",
36457 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36458 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36459 { "sched-epilog",
36460 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36461 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36462 };
36463
36464 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36465 parsing. Return true if there were no errors. */
36466
36467 static bool
36468 rs6000_inner_target_options (tree args, bool attr_p)
36469 {
36470 bool ret = true;
36471
36472 if (args == NULL_TREE)
36473 ;
36474
36475 else if (TREE_CODE (args) == STRING_CST)
36476 {
36477 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36478 char *q;
36479
36480 while ((q = strtok (p, ",")) != NULL)
36481 {
36482 bool error_p = false;
36483 bool not_valid_p = false;
36484 const char *cpu_opt = NULL;
36485
36486 p = NULL;
36487 if (strncmp (q, "cpu=", 4) == 0)
36488 {
36489 int cpu_index = rs6000_cpu_name_lookup (q+4);
36490 if (cpu_index >= 0)
36491 rs6000_cpu_index = cpu_index;
36492 else
36493 {
36494 error_p = true;
36495 cpu_opt = q+4;
36496 }
36497 }
36498 else if (strncmp (q, "tune=", 5) == 0)
36499 {
36500 int tune_index = rs6000_cpu_name_lookup (q+5);
36501 if (tune_index >= 0)
36502 rs6000_tune_index = tune_index;
36503 else
36504 {
36505 error_p = true;
36506 cpu_opt = q+5;
36507 }
36508 }
36509 else
36510 {
36511 size_t i;
36512 bool invert = false;
36513 char *r = q;
36514
36515 error_p = true;
36516 if (strncmp (r, "no-", 3) == 0)
36517 {
36518 invert = true;
36519 r += 3;
36520 }
36521
36522 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36523 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36524 {
36525 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36526
36527 if (!rs6000_opt_masks[i].valid_target)
36528 not_valid_p = true;
36529 else
36530 {
36531 error_p = false;
36532 rs6000_isa_flags_explicit |= mask;
36533
36534 /* VSX needs altivec, so -mvsx automagically sets
36535 altivec and disables -mavoid-indexed-addresses. */
36536 if (!invert)
36537 {
36538 if (mask == OPTION_MASK_VSX)
36539 {
36540 mask |= OPTION_MASK_ALTIVEC;
36541 TARGET_AVOID_XFORM = 0;
36542 }
36543 }
36544
36545 if (rs6000_opt_masks[i].invert)
36546 invert = !invert;
36547
36548 if (invert)
36549 rs6000_isa_flags &= ~mask;
36550 else
36551 rs6000_isa_flags |= mask;
36552 }
36553 break;
36554 }
36555
36556 if (error_p && !not_valid_p)
36557 {
36558 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36559 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36560 {
36561 size_t j = rs6000_opt_vars[i].global_offset;
36562 *((int *) ((char *)&global_options + j)) = !invert;
36563 error_p = false;
36564 not_valid_p = false;
36565 break;
36566 }
36567 }
36568 }
36569
36570 if (error_p)
36571 {
36572 const char *eprefix, *esuffix;
36573
36574 ret = false;
36575 if (attr_p)
36576 {
36577 eprefix = "__attribute__((__target__(";
36578 esuffix = ")))";
36579 }
36580 else
36581 {
36582 eprefix = "#pragma GCC target ";
36583 esuffix = "";
36584 }
36585
36586 if (cpu_opt)
36587 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
36588 q, esuffix);
36589 else if (not_valid_p)
36590 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
36591 else
36592 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
36593 }
36594 }
36595 }
36596
36597 else if (TREE_CODE (args) == TREE_LIST)
36598 {
36599 do
36600 {
36601 tree value = TREE_VALUE (args);
36602 if (value)
36603 {
36604 bool ret2 = rs6000_inner_target_options (value, attr_p);
36605 if (!ret2)
36606 ret = false;
36607 }
36608 args = TREE_CHAIN (args);
36609 }
36610 while (args != NULL_TREE);
36611 }
36612
36613 else
36614 {
36615 error ("attribute %<target%> argument not a string");
36616 return false;
36617 }
36618
36619 return ret;
36620 }
36621
36622 /* Print out the target options as a list for -mdebug=target. */
36623
36624 static void
36625 rs6000_debug_target_options (tree args, const char *prefix)
36626 {
36627 if (args == NULL_TREE)
36628 fprintf (stderr, "%s<NULL>", prefix);
36629
36630 else if (TREE_CODE (args) == STRING_CST)
36631 {
36632 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36633 char *q;
36634
36635 while ((q = strtok (p, ",")) != NULL)
36636 {
36637 p = NULL;
36638 fprintf (stderr, "%s\"%s\"", prefix, q);
36639 prefix = ", ";
36640 }
36641 }
36642
36643 else if (TREE_CODE (args) == TREE_LIST)
36644 {
36645 do
36646 {
36647 tree value = TREE_VALUE (args);
36648 if (value)
36649 {
36650 rs6000_debug_target_options (value, prefix);
36651 prefix = ", ";
36652 }
36653 args = TREE_CHAIN (args);
36654 }
36655 while (args != NULL_TREE);
36656 }
36657
36658 else
36659 gcc_unreachable ();
36660
36661 return;
36662 }
36663
36664 \f
36665 /* Hook to validate attribute((target("..."))). */
36666
36667 static bool
36668 rs6000_valid_attribute_p (tree fndecl,
36669 tree ARG_UNUSED (name),
36670 tree args,
36671 int flags)
36672 {
36673 struct cl_target_option cur_target;
36674 bool ret;
36675 tree old_optimize = build_optimization_node (&global_options);
36676 tree new_target, new_optimize;
36677 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36678
36679 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36680
36681 if (TARGET_DEBUG_TARGET)
36682 {
36683 tree tname = DECL_NAME (fndecl);
36684 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36685 if (tname)
36686 fprintf (stderr, "function: %.*s\n",
36687 (int) IDENTIFIER_LENGTH (tname),
36688 IDENTIFIER_POINTER (tname));
36689 else
36690 fprintf (stderr, "function: unknown\n");
36691
36692 fprintf (stderr, "args:");
36693 rs6000_debug_target_options (args, " ");
36694 fprintf (stderr, "\n");
36695
36696 if (flags)
36697 fprintf (stderr, "flags: 0x%x\n", flags);
36698
36699 fprintf (stderr, "--------------------\n");
36700 }
36701
36702 /* attribute((target("default"))) does nothing, beyond
36703 affecting multi-versioning. */
36704 if (TREE_VALUE (args)
36705 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36706 && TREE_CHAIN (args) == NULL_TREE
36707 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36708 return true;
36709
36710 old_optimize = build_optimization_node (&global_options);
36711 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36712
36713 /* If the function changed the optimization levels as well as setting target
36714 options, start with the optimizations specified. */
36715 if (func_optimize && func_optimize != old_optimize)
36716 cl_optimization_restore (&global_options,
36717 TREE_OPTIMIZATION (func_optimize));
36718
36719 /* The target attributes may also change some optimization flags, so update
36720 the optimization options if necessary. */
36721 cl_target_option_save (&cur_target, &global_options);
36722 rs6000_cpu_index = rs6000_tune_index = -1;
36723 ret = rs6000_inner_target_options (args, true);
36724
36725 /* Set up any additional state. */
36726 if (ret)
36727 {
36728 ret = rs6000_option_override_internal (false);
36729 new_target = build_target_option_node (&global_options);
36730 }
36731 else
36732 new_target = NULL;
36733
36734 new_optimize = build_optimization_node (&global_options);
36735
36736 if (!new_target)
36737 ret = false;
36738
36739 else if (fndecl)
36740 {
36741 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36742
36743 if (old_optimize != new_optimize)
36744 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36745 }
36746
36747 cl_target_option_restore (&global_options, &cur_target);
36748
36749 if (old_optimize != new_optimize)
36750 cl_optimization_restore (&global_options,
36751 TREE_OPTIMIZATION (old_optimize));
36752
36753 return ret;
36754 }
36755
36756 \f
36757 /* Hook to validate the current #pragma GCC target and set the state, and
36758 update the macros based on what was changed. If ARGS is NULL, then
36759 POP_TARGET is used to reset the options. */
36760
36761 bool
36762 rs6000_pragma_target_parse (tree args, tree pop_target)
36763 {
36764 tree prev_tree = build_target_option_node (&global_options);
36765 tree cur_tree;
36766 struct cl_target_option *prev_opt, *cur_opt;
36767 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36768 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36769
36770 if (TARGET_DEBUG_TARGET)
36771 {
36772 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36773 fprintf (stderr, "args:");
36774 rs6000_debug_target_options (args, " ");
36775 fprintf (stderr, "\n");
36776
36777 if (pop_target)
36778 {
36779 fprintf (stderr, "pop_target:\n");
36780 debug_tree (pop_target);
36781 }
36782 else
36783 fprintf (stderr, "pop_target: <NULL>\n");
36784
36785 fprintf (stderr, "--------------------\n");
36786 }
36787
36788 if (! args)
36789 {
36790 cur_tree = ((pop_target)
36791 ? pop_target
36792 : target_option_default_node);
36793 cl_target_option_restore (&global_options,
36794 TREE_TARGET_OPTION (cur_tree));
36795 }
36796 else
36797 {
36798 rs6000_cpu_index = rs6000_tune_index = -1;
36799 if (!rs6000_inner_target_options (args, false)
36800 || !rs6000_option_override_internal (false)
36801 || (cur_tree = build_target_option_node (&global_options))
36802 == NULL_TREE)
36803 {
36804 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36805 fprintf (stderr, "invalid pragma\n");
36806
36807 return false;
36808 }
36809 }
36810
36811 target_option_current_node = cur_tree;
36812
36813 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36814 change the macros that are defined. */
36815 if (rs6000_target_modify_macros_ptr)
36816 {
36817 prev_opt = TREE_TARGET_OPTION (prev_tree);
36818 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36819 prev_flags = prev_opt->x_rs6000_isa_flags;
36820
36821 cur_opt = TREE_TARGET_OPTION (cur_tree);
36822 cur_flags = cur_opt->x_rs6000_isa_flags;
36823 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36824
36825 diff_bumask = (prev_bumask ^ cur_bumask);
36826 diff_flags = (prev_flags ^ cur_flags);
36827
36828 if ((diff_flags != 0) || (diff_bumask != 0))
36829 {
36830 /* Delete old macros. */
36831 rs6000_target_modify_macros_ptr (false,
36832 prev_flags & diff_flags,
36833 prev_bumask & diff_bumask);
36834
36835 /* Define new macros. */
36836 rs6000_target_modify_macros_ptr (true,
36837 cur_flags & diff_flags,
36838 cur_bumask & diff_bumask);
36839 }
36840 }
36841
36842 return true;
36843 }
36844
36845 \f
36846 /* Remember the last target of rs6000_set_current_function. */
36847 static GTY(()) tree rs6000_previous_fndecl;
36848
36849 /* Establish appropriate back-end context for processing the function
36850 FNDECL. The argument might be NULL to indicate processing at top
36851 level, outside of any function scope. */
36852 static void
36853 rs6000_set_current_function (tree fndecl)
36854 {
36855 tree old_tree = (rs6000_previous_fndecl
36856 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
36857 : NULL_TREE);
36858
36859 tree new_tree = (fndecl
36860 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
36861 : NULL_TREE);
36862
36863 if (TARGET_DEBUG_TARGET)
36864 {
36865 bool print_final = false;
36866 fprintf (stderr, "\n==================== rs6000_set_current_function");
36867
36868 if (fndecl)
36869 fprintf (stderr, ", fndecl %s (%p)",
36870 (DECL_NAME (fndecl)
36871 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36872 : "<unknown>"), (void *)fndecl);
36873
36874 if (rs6000_previous_fndecl)
36875 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36876
36877 fprintf (stderr, "\n");
36878 if (new_tree)
36879 {
36880 fprintf (stderr, "\nnew fndecl target specific options:\n");
36881 debug_tree (new_tree);
36882 print_final = true;
36883 }
36884
36885 if (old_tree)
36886 {
36887 fprintf (stderr, "\nold fndecl target specific options:\n");
36888 debug_tree (old_tree);
36889 print_final = true;
36890 }
36891
36892 if (print_final)
36893 fprintf (stderr, "--------------------\n");
36894 }
36895
36896 /* Only change the context if the function changes. This hook is called
36897 several times in the course of compiling a function, and we don't want to
36898 slow things down too much or call target_reinit when it isn't safe. */
36899 if (fndecl && fndecl != rs6000_previous_fndecl)
36900 {
36901 rs6000_previous_fndecl = fndecl;
36902 if (old_tree == new_tree)
36903 ;
36904
36905 else if (new_tree && new_tree != target_option_default_node)
36906 {
36907 cl_target_option_restore (&global_options,
36908 TREE_TARGET_OPTION (new_tree));
36909 if (TREE_TARGET_GLOBALS (new_tree))
36910 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36911 else
36912 TREE_TARGET_GLOBALS (new_tree)
36913 = save_target_globals_default_opts ();
36914 }
36915
36916 else if (old_tree && old_tree != target_option_default_node)
36917 {
36918 new_tree = target_option_current_node;
36919 cl_target_option_restore (&global_options,
36920 TREE_TARGET_OPTION (new_tree));
36921 if (TREE_TARGET_GLOBALS (new_tree))
36922 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36923 else if (new_tree == target_option_default_node)
36924 restore_target_globals (&default_target_globals);
36925 else
36926 TREE_TARGET_GLOBALS (new_tree)
36927 = save_target_globals_default_opts ();
36928 }
36929 }
36930 }
36931
36932 \f
36933 /* Save the current options */
36934
36935 static void
36936 rs6000_function_specific_save (struct cl_target_option *ptr,
36937 struct gcc_options *opts)
36938 {
36939 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36940 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36941 }
36942
36943 /* Restore the current options */
36944
36945 static void
36946 rs6000_function_specific_restore (struct gcc_options *opts,
36947 struct cl_target_option *ptr)
36948
36949 {
36950 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36951 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36952 (void) rs6000_option_override_internal (false);
36953 }
36954
36955 /* Print the current options */
36956
36957 static void
36958 rs6000_function_specific_print (FILE *file, int indent,
36959 struct cl_target_option *ptr)
36960 {
36961 rs6000_print_isa_options (file, indent, "Isa options set",
36962 ptr->x_rs6000_isa_flags);
36963
36964 rs6000_print_isa_options (file, indent, "Isa options explicit",
36965 ptr->x_rs6000_isa_flags_explicit);
36966 }
36967
36968 /* Helper function to print the current isa or misc options on a line. */
36969
36970 static void
36971 rs6000_print_options_internal (FILE *file,
36972 int indent,
36973 const char *string,
36974 HOST_WIDE_INT flags,
36975 const char *prefix,
36976 const struct rs6000_opt_mask *opts,
36977 size_t num_elements)
36978 {
36979 size_t i;
36980 size_t start_column = 0;
36981 size_t cur_column;
36982 size_t max_column = 120;
36983 size_t prefix_len = strlen (prefix);
36984 size_t comma_len = 0;
36985 const char *comma = "";
36986
36987 if (indent)
36988 start_column += fprintf (file, "%*s", indent, "");
36989
36990 if (!flags)
36991 {
36992 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36993 return;
36994 }
36995
36996 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36997
36998 /* Print the various mask options. */
36999 cur_column = start_column;
37000 for (i = 0; i < num_elements; i++)
37001 {
37002 bool invert = opts[i].invert;
37003 const char *name = opts[i].name;
37004 const char *no_str = "";
37005 HOST_WIDE_INT mask = opts[i].mask;
37006 size_t len = comma_len + prefix_len + strlen (name);
37007
37008 if (!invert)
37009 {
37010 if ((flags & mask) == 0)
37011 {
37012 no_str = "no-";
37013 len += sizeof ("no-") - 1;
37014 }
37015
37016 flags &= ~mask;
37017 }
37018
37019 else
37020 {
37021 if ((flags & mask) != 0)
37022 {
37023 no_str = "no-";
37024 len += sizeof ("no-") - 1;
37025 }
37026
37027 flags |= mask;
37028 }
37029
37030 cur_column += len;
37031 if (cur_column > max_column)
37032 {
37033 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37034 cur_column = start_column + len;
37035 comma = "";
37036 }
37037
37038 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37039 comma = ", ";
37040 comma_len = sizeof (", ") - 1;
37041 }
37042
37043 fputs ("\n", file);
37044 }
37045
37046 /* Helper function to print the current isa options on a line. */
37047
37048 static void
37049 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37050 HOST_WIDE_INT flags)
37051 {
37052 rs6000_print_options_internal (file, indent, string, flags, "-m",
37053 &rs6000_opt_masks[0],
37054 ARRAY_SIZE (rs6000_opt_masks));
37055 }
37056
37057 static void
37058 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37059 HOST_WIDE_INT flags)
37060 {
37061 rs6000_print_options_internal (file, indent, string, flags, "",
37062 &rs6000_builtin_mask_names[0],
37063 ARRAY_SIZE (rs6000_builtin_mask_names));
37064 }
37065
37066 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37067 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37068 -mvsx-timode, -mupper-regs-df).
37069
37070 If the user used -mno-power8-vector, we need to turn off all of the implicit
37071 ISA 2.07 and 3.0 options that relate to the vector unit.
37072
37073 If the user used -mno-power9-vector, we need to turn off all of the implicit
37074 ISA 3.0 options that relate to the vector unit.
37075
37076 This function does not handle explicit options such as the user specifying
37077 -mdirect-move. These are handled in rs6000_option_override_internal, and
37078 the appropriate error is given if needed.
37079
37080 We return a mask of all of the implicit options that should not be enabled
37081 by default. */
37082
37083 static HOST_WIDE_INT
37084 rs6000_disable_incompatible_switches (void)
37085 {
37086 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
37087 size_t i, j;
37088
37089 static const struct {
37090 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
37091 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
37092 const char *const name; /* name of the switch. */
37093 } flags[] = {
37094 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
37095 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
37096 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
37097 };
37098
37099 for (i = 0; i < ARRAY_SIZE (flags); i++)
37100 {
37101 HOST_WIDE_INT no_flag = flags[i].no_flag;
37102
37103 if ((rs6000_isa_flags & no_flag) == 0
37104 && (rs6000_isa_flags_explicit & no_flag) != 0)
37105 {
37106 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
37107 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
37108 & rs6000_isa_flags
37109 & dep_flags);
37110
37111 if (set_flags)
37112 {
37113 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
37114 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
37115 {
37116 set_flags &= ~rs6000_opt_masks[j].mask;
37117 error ("-mno-%s turns off -m%s",
37118 flags[i].name,
37119 rs6000_opt_masks[j].name);
37120 }
37121
37122 gcc_assert (!set_flags);
37123 }
37124
37125 rs6000_isa_flags &= ~dep_flags;
37126 ignore_masks |= no_flag | dep_flags;
37127 }
37128 }
37129
37130 if (!TARGET_P9_VECTOR
37131 && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0
37132 && TARGET_P9_DFORM_BOTH > 0)
37133 {
37134 error ("-mno-power9-vector turns off -mpower9-dform");
37135 TARGET_P9_DFORM_BOTH = 0;
37136 }
37137
37138 return ignore_masks;
37139 }
37140
37141 \f
37142 /* Helper function for printing the function name when debugging. */
37143
37144 static const char *
37145 get_decl_name (tree fn)
37146 {
37147 tree name;
37148
37149 if (!fn)
37150 return "<null>";
37151
37152 name = DECL_NAME (fn);
37153 if (!name)
37154 return "<no-name>";
37155
37156 return IDENTIFIER_POINTER (name);
37157 }
37158
37159 /* Return the clone id of the target we are compiling code for in a target
37160 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37161 the priority list for the target clones (ordered from lowest to
37162 highest). */
37163
37164 static int
37165 rs6000_clone_priority (tree fndecl)
37166 {
37167 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37168 HOST_WIDE_INT isa_masks;
37169 int ret = CLONE_DEFAULT;
37170 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37171 const char *attrs_str = NULL;
37172
37173 attrs = TREE_VALUE (TREE_VALUE (attrs));
37174 attrs_str = TREE_STRING_POINTER (attrs);
37175
37176 /* Return priority zero for default function. Return the ISA needed for the
37177 function if it is not the default. */
37178 if (strcmp (attrs_str, "default") != 0)
37179 {
37180 if (fn_opts == NULL_TREE)
37181 fn_opts = target_option_default_node;
37182
37183 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37184 isa_masks = rs6000_isa_flags;
37185 else
37186 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37187
37188 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37189 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37190 break;
37191 }
37192
37193 if (TARGET_DEBUG_TARGET)
37194 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37195 get_decl_name (fndecl), ret);
37196
37197 return ret;
37198 }
37199
37200 /* This compares the priority of target features in function DECL1 and DECL2.
37201 It returns positive value if DECL1 is higher priority, negative value if
37202 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37203 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37204
37205 static int
37206 rs6000_compare_version_priority (tree decl1, tree decl2)
37207 {
37208 int priority1 = rs6000_clone_priority (decl1);
37209 int priority2 = rs6000_clone_priority (decl2);
37210 int ret = priority1 - priority2;
37211
37212 if (TARGET_DEBUG_TARGET)
37213 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37214 get_decl_name (decl1), get_decl_name (decl2), ret);
37215
37216 return ret;
37217 }
37218
37219 /* Make a dispatcher declaration for the multi-versioned function DECL.
37220 Calls to DECL function will be replaced with calls to the dispatcher
37221 by the front-end. Returns the decl of the dispatcher function. */
37222
37223 static tree
37224 rs6000_get_function_versions_dispatcher (void *decl)
37225 {
37226 tree fn = (tree) decl;
37227 struct cgraph_node *node = NULL;
37228 struct cgraph_node *default_node = NULL;
37229 struct cgraph_function_version_info *node_v = NULL;
37230 struct cgraph_function_version_info *first_v = NULL;
37231
37232 tree dispatch_decl = NULL;
37233
37234 struct cgraph_function_version_info *default_version_info = NULL;
37235 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37236
37237 if (TARGET_DEBUG_TARGET)
37238 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37239 get_decl_name (fn));
37240
37241 node = cgraph_node::get (fn);
37242 gcc_assert (node != NULL);
37243
37244 node_v = node->function_version ();
37245 gcc_assert (node_v != NULL);
37246
37247 if (node_v->dispatcher_resolver != NULL)
37248 return node_v->dispatcher_resolver;
37249
37250 /* Find the default version and make it the first node. */
37251 first_v = node_v;
37252 /* Go to the beginning of the chain. */
37253 while (first_v->prev != NULL)
37254 first_v = first_v->prev;
37255
37256 default_version_info = first_v;
37257 while (default_version_info != NULL)
37258 {
37259 const tree decl2 = default_version_info->this_node->decl;
37260 if (is_function_default_version (decl2))
37261 break;
37262 default_version_info = default_version_info->next;
37263 }
37264
37265 /* If there is no default node, just return NULL. */
37266 if (default_version_info == NULL)
37267 return NULL;
37268
37269 /* Make default info the first node. */
37270 if (first_v != default_version_info)
37271 {
37272 default_version_info->prev->next = default_version_info->next;
37273 if (default_version_info->next)
37274 default_version_info->next->prev = default_version_info->prev;
37275 first_v->prev = default_version_info;
37276 default_version_info->next = first_v;
37277 default_version_info->prev = NULL;
37278 }
37279
37280 default_node = default_version_info->this_node;
37281
37282 if (targetm.has_ifunc_p ())
37283 {
37284 struct cgraph_function_version_info *it_v = NULL;
37285 struct cgraph_node *dispatcher_node = NULL;
37286 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37287
37288 /* Right now, the dispatching is done via ifunc. */
37289 dispatch_decl = make_dispatcher_decl (default_node->decl);
37290
37291 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37292 gcc_assert (dispatcher_node != NULL);
37293 dispatcher_node->dispatcher_function = 1;
37294 dispatcher_version_info
37295 = dispatcher_node->insert_new_function_version ();
37296 dispatcher_version_info->next = default_version_info;
37297 dispatcher_node->definition = 1;
37298
37299 /* Set the dispatcher for all the versions. */
37300 it_v = default_version_info;
37301 while (it_v != NULL)
37302 {
37303 it_v->dispatcher_resolver = dispatch_decl;
37304 it_v = it_v->next;
37305 }
37306 }
37307 else
37308 {
37309 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37310 "multiversioning needs ifunc which is not supported "
37311 "on this target");
37312 }
37313
37314 return dispatch_decl;
37315 }
37316
37317 /* Make the resolver function decl to dispatch the versions of a multi-
37318 versioned function, DEFAULT_DECL. Create an empty basic block in the
37319 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37320 function. */
37321
37322 static tree
37323 make_resolver_func (const tree default_decl,
37324 const tree dispatch_decl,
37325 basic_block *empty_bb)
37326 {
37327 /* IFUNC's have to be globally visible. So, if the default_decl is
37328 not, then the name of the IFUNC should be made unique. */
37329 bool is_uniq = (TREE_PUBLIC (default_decl) == 0);
37330
37331 /* Append the filename to the resolver function if the versions are
37332 not externally visible. This is because the resolver function has
37333 to be externally visible for the loader to find it. So, appending
37334 the filename will prevent conflicts with a resolver function from
37335 another module which is based on the same version name. */
37336 char *resolver_name = make_unique_name (default_decl, "resolver", is_uniq);
37337
37338 /* The resolver function should return a (void *). */
37339 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37340 tree decl = build_fn_decl (resolver_name, type);
37341 tree decl_name = get_identifier (resolver_name);
37342 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37343
37344 DECL_NAME (decl) = decl_name;
37345 TREE_USED (decl) = 1;
37346 DECL_ARTIFICIAL (decl) = 1;
37347 DECL_IGNORED_P (decl) = 0;
37348 /* IFUNC resolvers have to be externally visible. */
37349 TREE_PUBLIC (decl) = 1;
37350 DECL_UNINLINABLE (decl) = 1;
37351
37352 /* Resolver is not external, body is generated. */
37353 DECL_EXTERNAL (decl) = 0;
37354 DECL_EXTERNAL (dispatch_decl) = 0;
37355
37356 DECL_CONTEXT (decl) = NULL_TREE;
37357 DECL_INITIAL (decl) = make_node (BLOCK);
37358 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37359
37360 if (DECL_COMDAT_GROUP (default_decl) || TREE_PUBLIC (default_decl))
37361 {
37362 /* In this case, each translation unit with a call to this
37363 versioned function will put out a resolver. Ensure it
37364 is comdat to keep just one copy. */
37365 DECL_COMDAT (decl) = 1;
37366 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
37367 }
37368
37369 /* Build result decl and add to function_decl. */
37370 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37371 DECL_ARTIFICIAL (t) = 1;
37372 DECL_IGNORED_P (t) = 1;
37373 DECL_RESULT (decl) = t;
37374
37375 gimplify_function_tree (decl);
37376 push_cfun (DECL_STRUCT_FUNCTION (decl));
37377 *empty_bb = init_lowered_empty_function (decl, false,
37378 profile_count::uninitialized ());
37379
37380 cgraph_node::add_new_function (decl, true);
37381 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37382
37383 pop_cfun ();
37384
37385 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37386 DECL_ATTRIBUTES (dispatch_decl)
37387 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37388
37389 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37390 XDELETEVEC (resolver_name);
37391 return decl;
37392 }
37393
37394 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37395 return a pointer to VERSION_DECL if we are running on a machine that
37396 supports the index CLONE_ISA hardware architecture bits. This function will
37397 be called during version dispatch to decide which function version to
37398 execute. It returns the basic block at the end, to which more conditions
37399 can be added. */
37400
37401 static basic_block
37402 add_condition_to_bb (tree function_decl, tree version_decl,
37403 int clone_isa, basic_block new_bb)
37404 {
37405 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37406
37407 gcc_assert (new_bb != NULL);
37408 gimple_seq gseq = bb_seq (new_bb);
37409
37410
37411 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37412 build_fold_addr_expr (version_decl));
37413 tree result_var = create_tmp_var (ptr_type_node);
37414 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37415 gimple *return_stmt = gimple_build_return (result_var);
37416
37417 if (clone_isa == CLONE_DEFAULT)
37418 {
37419 gimple_seq_add_stmt (&gseq, convert_stmt);
37420 gimple_seq_add_stmt (&gseq, return_stmt);
37421 set_bb_seq (new_bb, gseq);
37422 gimple_set_bb (convert_stmt, new_bb);
37423 gimple_set_bb (return_stmt, new_bb);
37424 pop_cfun ();
37425 return new_bb;
37426 }
37427
37428 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37429 tree cond_var = create_tmp_var (bool_int_type_node);
37430 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37431 const char *arg_str = rs6000_clone_map[clone_isa].name;
37432 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37433 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37434 gimple_call_set_lhs (call_cond_stmt, cond_var);
37435
37436 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37437 gimple_set_bb (call_cond_stmt, new_bb);
37438 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37439
37440 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37441 NULL_TREE, NULL_TREE);
37442 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37443 gimple_set_bb (if_else_stmt, new_bb);
37444 gimple_seq_add_stmt (&gseq, if_else_stmt);
37445
37446 gimple_seq_add_stmt (&gseq, convert_stmt);
37447 gimple_seq_add_stmt (&gseq, return_stmt);
37448 set_bb_seq (new_bb, gseq);
37449
37450 basic_block bb1 = new_bb;
37451 edge e12 = split_block (bb1, if_else_stmt);
37452 basic_block bb2 = e12->dest;
37453 e12->flags &= ~EDGE_FALLTHRU;
37454 e12->flags |= EDGE_TRUE_VALUE;
37455
37456 edge e23 = split_block (bb2, return_stmt);
37457 gimple_set_bb (convert_stmt, bb2);
37458 gimple_set_bb (return_stmt, bb2);
37459
37460 basic_block bb3 = e23->dest;
37461 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37462
37463 remove_edge (e23);
37464 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37465
37466 pop_cfun ();
37467 return bb3;
37468 }
37469
37470 /* This function generates the dispatch function for multi-versioned functions.
37471 DISPATCH_DECL is the function which will contain the dispatch logic.
37472 FNDECLS are the function choices for dispatch, and is a tree chain.
37473 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37474 code is generated. */
37475
37476 static int
37477 dispatch_function_versions (tree dispatch_decl,
37478 void *fndecls_p,
37479 basic_block *empty_bb)
37480 {
37481 int ix;
37482 tree ele;
37483 vec<tree> *fndecls;
37484 tree clones[CLONE_MAX];
37485
37486 if (TARGET_DEBUG_TARGET)
37487 fputs ("dispatch_function_versions, top\n", stderr);
37488
37489 gcc_assert (dispatch_decl != NULL
37490 && fndecls_p != NULL
37491 && empty_bb != NULL);
37492
37493 /* fndecls_p is actually a vector. */
37494 fndecls = static_cast<vec<tree> *> (fndecls_p);
37495
37496 /* At least one more version other than the default. */
37497 gcc_assert (fndecls->length () >= 2);
37498
37499 /* The first version in the vector is the default decl. */
37500 memset ((void *) clones, '\0', sizeof (clones));
37501 clones[CLONE_DEFAULT] = (*fndecls)[0];
37502
37503 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37504 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37505 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37506 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37507 to insert the code here to do the call. */
37508
37509 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37510 {
37511 int priority = rs6000_clone_priority (ele);
37512 if (!clones[priority])
37513 clones[priority] = ele;
37514 }
37515
37516 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37517 if (clones[ix])
37518 {
37519 if (TARGET_DEBUG_TARGET)
37520 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37521 ix, get_decl_name (clones[ix]));
37522
37523 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37524 *empty_bb);
37525 }
37526
37527 return 0;
37528 }
37529
37530 /* Generate the dispatching code body to dispatch multi-versioned function
37531 DECL. The target hook is called to process the "target" attributes and
37532 provide the code to dispatch the right function at run-time. NODE points
37533 to the dispatcher decl whose body will be created. */
37534
37535 static tree
37536 rs6000_generate_version_dispatcher_body (void *node_p)
37537 {
37538 tree resolver;
37539 basic_block empty_bb;
37540 struct cgraph_node *node = (cgraph_node *) node_p;
37541 struct cgraph_function_version_info *ninfo = node->function_version ();
37542
37543 if (ninfo->dispatcher_resolver)
37544 return ninfo->dispatcher_resolver;
37545
37546 /* node is going to be an alias, so remove the finalized bit. */
37547 node->definition = false;
37548
37549 /* The first version in the chain corresponds to the default version. */
37550 ninfo->dispatcher_resolver = resolver
37551 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37552
37553 if (TARGET_DEBUG_TARGET)
37554 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37555 get_decl_name (resolver));
37556
37557 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37558 auto_vec<tree, 2> fn_ver_vec;
37559
37560 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37561 vinfo;
37562 vinfo = vinfo->next)
37563 {
37564 struct cgraph_node *version = vinfo->this_node;
37565 /* Check for virtual functions here again, as by this time it should
37566 have been determined if this function needs a vtable index or
37567 not. This happens for methods in derived classes that override
37568 virtual methods in base classes but are not explicitly marked as
37569 virtual. */
37570 if (DECL_VINDEX (version->decl))
37571 sorry ("Virtual function multiversioning not supported");
37572
37573 fn_ver_vec.safe_push (version->decl);
37574 }
37575
37576 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37577 cgraph_edge::rebuild_edges ();
37578 pop_cfun ();
37579 return resolver;
37580 }
37581
37582 \f
37583 /* Hook to determine if one function can safely inline another. */
37584
37585 static bool
37586 rs6000_can_inline_p (tree caller, tree callee)
37587 {
37588 bool ret = false;
37589 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37590 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37591
37592 /* If callee has no option attributes, then it is ok to inline. */
37593 if (!callee_tree)
37594 ret = true;
37595
37596 /* If caller has no option attributes, but callee does then it is not ok to
37597 inline. */
37598 else if (!caller_tree)
37599 ret = false;
37600
37601 else
37602 {
37603 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37604 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37605
37606 /* Callee's options should a subset of the caller's, i.e. a vsx function
37607 can inline an altivec function but a non-vsx function can't inline a
37608 vsx function. */
37609 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37610 == callee_opts->x_rs6000_isa_flags)
37611 ret = true;
37612 }
37613
37614 if (TARGET_DEBUG_TARGET)
37615 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37616 get_decl_name (caller), get_decl_name (callee),
37617 (ret ? "can" : "cannot"));
37618
37619 return ret;
37620 }
37621 \f
37622 /* Allocate a stack temp and fixup the address so it meets the particular
37623 memory requirements (either offetable or REG+REG addressing). */
37624
37625 rtx
37626 rs6000_allocate_stack_temp (machine_mode mode,
37627 bool offsettable_p,
37628 bool reg_reg_p)
37629 {
37630 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37631 rtx addr = XEXP (stack, 0);
37632 int strict_p = (reload_in_progress || reload_completed);
37633
37634 if (!legitimate_indirect_address_p (addr, strict_p))
37635 {
37636 if (offsettable_p
37637 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37638 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37639
37640 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37641 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37642 }
37643
37644 return stack;
37645 }
37646
37647 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37648 to such a form to deal with memory reference instructions like STFIWX that
37649 only take reg+reg addressing. */
37650
37651 rtx
37652 rs6000_address_for_fpconvert (rtx x)
37653 {
37654 int strict_p = (reload_in_progress || reload_completed);
37655 rtx addr;
37656
37657 gcc_assert (MEM_P (x));
37658 addr = XEXP (x, 0);
37659 if (! legitimate_indirect_address_p (addr, strict_p)
37660 && ! legitimate_indexed_address_p (addr, strict_p))
37661 {
37662 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37663 {
37664 rtx reg = XEXP (addr, 0);
37665 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37666 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37667 gcc_assert (REG_P (reg));
37668 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37669 addr = reg;
37670 }
37671 else if (GET_CODE (addr) == PRE_MODIFY)
37672 {
37673 rtx reg = XEXP (addr, 0);
37674 rtx expr = XEXP (addr, 1);
37675 gcc_assert (REG_P (reg));
37676 gcc_assert (GET_CODE (expr) == PLUS);
37677 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37678 addr = reg;
37679 }
37680
37681 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37682 }
37683
37684 return x;
37685 }
37686
37687 /* Given a memory reference, if it is not in the form for altivec memory
37688 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37689 convert to the altivec format. */
37690
37691 rtx
37692 rs6000_address_for_altivec (rtx x)
37693 {
37694 gcc_assert (MEM_P (x));
37695 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
37696 {
37697 rtx addr = XEXP (x, 0);
37698 int strict_p = (reload_in_progress || reload_completed);
37699
37700 if (!legitimate_indexed_address_p (addr, strict_p)
37701 && !legitimate_indirect_address_p (addr, strict_p))
37702 addr = copy_to_mode_reg (Pmode, addr);
37703
37704 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
37705 x = change_address (x, GET_MODE (x), addr);
37706 }
37707
37708 return x;
37709 }
37710
37711 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37712
37713 On the RS/6000, all integer constants are acceptable, most won't be valid
37714 for particular insns, though. Only easy FP constants are acceptable. */
37715
37716 static bool
37717 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37718 {
37719 if (TARGET_ELF && tls_referenced_p (x))
37720 return false;
37721
37722 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37723 || GET_MODE (x) == VOIDmode
37724 || (TARGET_POWERPC64 && mode == DImode)
37725 || easy_fp_constant (x, mode)
37726 || easy_vector_constant (x, mode));
37727 }
37728
37729 \f
37730 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37731
37732 static bool
37733 chain_already_loaded (rtx_insn *last)
37734 {
37735 for (; last != NULL; last = PREV_INSN (last))
37736 {
37737 if (NONJUMP_INSN_P (last))
37738 {
37739 rtx patt = PATTERN (last);
37740
37741 if (GET_CODE (patt) == SET)
37742 {
37743 rtx lhs = XEXP (patt, 0);
37744
37745 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37746 return true;
37747 }
37748 }
37749 }
37750 return false;
37751 }
37752
37753 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37754
37755 void
37756 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37757 {
37758 const bool direct_call_p
37759 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37760 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37761 rtx toc_load = NULL_RTX;
37762 rtx toc_restore = NULL_RTX;
37763 rtx func_addr;
37764 rtx abi_reg = NULL_RTX;
37765 rtx call[4];
37766 int n_call;
37767 rtx insn;
37768
37769 /* Handle longcall attributes. */
37770 if (INTVAL (cookie) & CALL_LONG)
37771 func_desc = rs6000_longcall_ref (func_desc);
37772
37773 /* Handle indirect calls. */
37774 if (GET_CODE (func_desc) != SYMBOL_REF
37775 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37776 {
37777 /* Save the TOC into its reserved slot before the call,
37778 and prepare to restore it after the call. */
37779 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37780 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37781 rtx stack_toc_mem = gen_frame_mem (Pmode,
37782 gen_rtx_PLUS (Pmode, stack_ptr,
37783 stack_toc_offset));
37784 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37785 gen_rtvec (1, stack_toc_offset),
37786 UNSPEC_TOCSLOT);
37787 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37788
37789 /* Can we optimize saving the TOC in the prologue or
37790 do we need to do it at every call? */
37791 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37792 cfun->machine->save_toc_in_prologue = true;
37793 else
37794 {
37795 MEM_VOLATILE_P (stack_toc_mem) = 1;
37796 emit_move_insn (stack_toc_mem, toc_reg);
37797 }
37798
37799 if (DEFAULT_ABI == ABI_ELFv2)
37800 {
37801 /* A function pointer in the ELFv2 ABI is just a plain address, but
37802 the ABI requires it to be loaded into r12 before the call. */
37803 func_addr = gen_rtx_REG (Pmode, 12);
37804 emit_move_insn (func_addr, func_desc);
37805 abi_reg = func_addr;
37806 }
37807 else
37808 {
37809 /* A function pointer under AIX is a pointer to a data area whose
37810 first word contains the actual address of the function, whose
37811 second word contains a pointer to its TOC, and whose third word
37812 contains a value to place in the static chain register (r11).
37813 Note that if we load the static chain, our "trampoline" need
37814 not have any executable code. */
37815
37816 /* Load up address of the actual function. */
37817 func_desc = force_reg (Pmode, func_desc);
37818 func_addr = gen_reg_rtx (Pmode);
37819 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37820
37821 /* Prepare to load the TOC of the called function. Note that the
37822 TOC load must happen immediately before the actual call so
37823 that unwinding the TOC registers works correctly. See the
37824 comment in frob_update_context. */
37825 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37826 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37827 gen_rtx_PLUS (Pmode, func_desc,
37828 func_toc_offset));
37829 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37830
37831 /* If we have a static chain, load it up. But, if the call was
37832 originally direct, the 3rd word has not been written since no
37833 trampoline has been built, so we ought not to load it, lest we
37834 override a static chain value. */
37835 if (!direct_call_p
37836 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37837 && !chain_already_loaded (get_current_sequence ()->next->last))
37838 {
37839 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37840 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37841 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37842 gen_rtx_PLUS (Pmode, func_desc,
37843 func_sc_offset));
37844 emit_move_insn (sc_reg, func_sc_mem);
37845 abi_reg = sc_reg;
37846 }
37847 }
37848 }
37849 else
37850 {
37851 /* Direct calls use the TOC: for local calls, the callee will
37852 assume the TOC register is set; for non-local calls, the
37853 PLT stub needs the TOC register. */
37854 abi_reg = toc_reg;
37855 func_addr = func_desc;
37856 }
37857
37858 /* Create the call. */
37859 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37860 if (value != NULL_RTX)
37861 call[0] = gen_rtx_SET (value, call[0]);
37862 n_call = 1;
37863
37864 if (toc_load)
37865 call[n_call++] = toc_load;
37866 if (toc_restore)
37867 call[n_call++] = toc_restore;
37868
37869 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37870
37871 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37872 insn = emit_call_insn (insn);
37873
37874 /* Mention all registers defined by the ABI to hold information
37875 as uses in CALL_INSN_FUNCTION_USAGE. */
37876 if (abi_reg)
37877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37878 }
37879
37880 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37881
37882 void
37883 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37884 {
37885 rtx call[2];
37886 rtx insn;
37887
37888 gcc_assert (INTVAL (cookie) == 0);
37889
37890 /* Create the call. */
37891 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37892 if (value != NULL_RTX)
37893 call[0] = gen_rtx_SET (value, call[0]);
37894
37895 call[1] = simple_return_rtx;
37896
37897 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37898 insn = emit_call_insn (insn);
37899
37900 /* Note use of the TOC register. */
37901 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37902 }
37903
37904 /* Return whether we need to always update the saved TOC pointer when we update
37905 the stack pointer. */
37906
37907 static bool
37908 rs6000_save_toc_in_prologue_p (void)
37909 {
37910 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37911 }
37912
37913 #ifdef HAVE_GAS_HIDDEN
37914 # define USE_HIDDEN_LINKONCE 1
37915 #else
37916 # define USE_HIDDEN_LINKONCE 0
37917 #endif
37918
37919 /* Fills in the label name that should be used for a 476 link stack thunk. */
37920
37921 void
37922 get_ppc476_thunk_name (char name[32])
37923 {
37924 gcc_assert (TARGET_LINK_STACK);
37925
37926 if (USE_HIDDEN_LINKONCE)
37927 sprintf (name, "__ppc476.get_thunk");
37928 else
37929 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37930 }
37931
37932 /* This function emits the simple thunk routine that is used to preserve
37933 the link stack on the 476 cpu. */
37934
37935 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37936 static void
37937 rs6000_code_end (void)
37938 {
37939 char name[32];
37940 tree decl;
37941
37942 if (!TARGET_LINK_STACK)
37943 return;
37944
37945 get_ppc476_thunk_name (name);
37946
37947 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37948 build_function_type_list (void_type_node, NULL_TREE));
37949 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37950 NULL_TREE, void_type_node);
37951 TREE_PUBLIC (decl) = 1;
37952 TREE_STATIC (decl) = 1;
37953
37954 #if RS6000_WEAK
37955 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
37956 {
37957 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37958 targetm.asm_out.unique_section (decl, 0);
37959 switch_to_section (get_named_section (decl, NULL, 0));
37960 DECL_WEAK (decl) = 1;
37961 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37962 targetm.asm_out.globalize_label (asm_out_file, name);
37963 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37964 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37965 }
37966 else
37967 #endif
37968 {
37969 switch_to_section (text_section);
37970 ASM_OUTPUT_LABEL (asm_out_file, name);
37971 }
37972
37973 DECL_INITIAL (decl) = make_node (BLOCK);
37974 current_function_decl = decl;
37975 allocate_struct_function (decl, false);
37976 init_function_start (decl);
37977 first_function_block_is_cold = false;
37978 /* Make sure unwind info is emitted for the thunk if needed. */
37979 final_start_function (emit_barrier (), asm_out_file, 1);
37980
37981 fputs ("\tblr\n", asm_out_file);
37982
37983 final_end_function ();
37984 init_insn_lengths ();
37985 free_after_compilation (cfun);
37986 set_cfun (NULL);
37987 current_function_decl = NULL;
37988 }
37989
37990 /* Add r30 to hard reg set if the prologue sets it up and it is not
37991 pic_offset_table_rtx. */
37992
37993 static void
37994 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37995 {
37996 if (!TARGET_SINGLE_PIC_BASE
37997 && TARGET_TOC
37998 && TARGET_MINIMAL_TOC
37999 && !constant_pool_empty_p ())
38000 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38001 if (cfun->machine->split_stack_argp_used)
38002 add_to_hard_reg_set (&set->set, Pmode, 12);
38003 }
38004
38005 \f
38006 /* Helper function for rs6000_split_logical to emit a logical instruction after
38007 spliting the operation to single GPR registers.
38008
38009 DEST is the destination register.
38010 OP1 and OP2 are the input source registers.
38011 CODE is the base operation (AND, IOR, XOR, NOT).
38012 MODE is the machine mode.
38013 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38014 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38015 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38016
38017 static void
38018 rs6000_split_logical_inner (rtx dest,
38019 rtx op1,
38020 rtx op2,
38021 enum rtx_code code,
38022 machine_mode mode,
38023 bool complement_final_p,
38024 bool complement_op1_p,
38025 bool complement_op2_p)
38026 {
38027 rtx bool_rtx;
38028
38029 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38030 if (op2 && GET_CODE (op2) == CONST_INT
38031 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38032 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38033 {
38034 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38035 HOST_WIDE_INT value = INTVAL (op2) & mask;
38036
38037 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38038 if (code == AND)
38039 {
38040 if (value == 0)
38041 {
38042 emit_insn (gen_rtx_SET (dest, const0_rtx));
38043 return;
38044 }
38045
38046 else if (value == mask)
38047 {
38048 if (!rtx_equal_p (dest, op1))
38049 emit_insn (gen_rtx_SET (dest, op1));
38050 return;
38051 }
38052 }
38053
38054 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38055 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38056 else if (code == IOR || code == XOR)
38057 {
38058 if (value == 0)
38059 {
38060 if (!rtx_equal_p (dest, op1))
38061 emit_insn (gen_rtx_SET (dest, op1));
38062 return;
38063 }
38064 }
38065 }
38066
38067 if (code == AND && mode == SImode
38068 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38069 {
38070 emit_insn (gen_andsi3 (dest, op1, op2));
38071 return;
38072 }
38073
38074 if (complement_op1_p)
38075 op1 = gen_rtx_NOT (mode, op1);
38076
38077 if (complement_op2_p)
38078 op2 = gen_rtx_NOT (mode, op2);
38079
38080 /* For canonical RTL, if only one arm is inverted it is the first. */
38081 if (!complement_op1_p && complement_op2_p)
38082 std::swap (op1, op2);
38083
38084 bool_rtx = ((code == NOT)
38085 ? gen_rtx_NOT (mode, op1)
38086 : gen_rtx_fmt_ee (code, mode, op1, op2));
38087
38088 if (complement_final_p)
38089 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38090
38091 emit_insn (gen_rtx_SET (dest, bool_rtx));
38092 }
38093
38094 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38095 operations are split immediately during RTL generation to allow for more
38096 optimizations of the AND/IOR/XOR.
38097
38098 OPERANDS is an array containing the destination and two input operands.
38099 CODE is the base operation (AND, IOR, XOR, NOT).
38100 MODE is the machine mode.
38101 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38102 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38103 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38104 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38105 formation of the AND instructions. */
38106
38107 static void
38108 rs6000_split_logical_di (rtx operands[3],
38109 enum rtx_code code,
38110 bool complement_final_p,
38111 bool complement_op1_p,
38112 bool complement_op2_p)
38113 {
38114 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38115 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38116 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38117 enum hi_lo { hi = 0, lo = 1 };
38118 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38119 size_t i;
38120
38121 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38122 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38123 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38124 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38125
38126 if (code == NOT)
38127 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38128 else
38129 {
38130 if (GET_CODE (operands[2]) != CONST_INT)
38131 {
38132 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38133 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38134 }
38135 else
38136 {
38137 HOST_WIDE_INT value = INTVAL (operands[2]);
38138 HOST_WIDE_INT value_hi_lo[2];
38139
38140 gcc_assert (!complement_final_p);
38141 gcc_assert (!complement_op1_p);
38142 gcc_assert (!complement_op2_p);
38143
38144 value_hi_lo[hi] = value >> 32;
38145 value_hi_lo[lo] = value & lower_32bits;
38146
38147 for (i = 0; i < 2; i++)
38148 {
38149 HOST_WIDE_INT sub_value = value_hi_lo[i];
38150
38151 if (sub_value & sign_bit)
38152 sub_value |= upper_32bits;
38153
38154 op2_hi_lo[i] = GEN_INT (sub_value);
38155
38156 /* If this is an AND instruction, check to see if we need to load
38157 the value in a register. */
38158 if (code == AND && sub_value != -1 && sub_value != 0
38159 && !and_operand (op2_hi_lo[i], SImode))
38160 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38161 }
38162 }
38163 }
38164
38165 for (i = 0; i < 2; i++)
38166 {
38167 /* Split large IOR/XOR operations. */
38168 if ((code == IOR || code == XOR)
38169 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38170 && !complement_final_p
38171 && !complement_op1_p
38172 && !complement_op2_p
38173 && !logical_const_operand (op2_hi_lo[i], SImode))
38174 {
38175 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38176 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38177 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38178 rtx tmp = gen_reg_rtx (SImode);
38179
38180 /* Make sure the constant is sign extended. */
38181 if ((hi_16bits & sign_bit) != 0)
38182 hi_16bits |= upper_32bits;
38183
38184 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38185 code, SImode, false, false, false);
38186
38187 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38188 code, SImode, false, false, false);
38189 }
38190 else
38191 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38192 code, SImode, complement_final_p,
38193 complement_op1_p, complement_op2_p);
38194 }
38195
38196 return;
38197 }
38198
38199 /* Split the insns that make up boolean operations operating on multiple GPR
38200 registers. The boolean MD patterns ensure that the inputs either are
38201 exactly the same as the output registers, or there is no overlap.
38202
38203 OPERANDS is an array containing the destination and two input operands.
38204 CODE is the base operation (AND, IOR, XOR, NOT).
38205 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38206 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38207 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38208
38209 void
38210 rs6000_split_logical (rtx operands[3],
38211 enum rtx_code code,
38212 bool complement_final_p,
38213 bool complement_op1_p,
38214 bool complement_op2_p)
38215 {
38216 machine_mode mode = GET_MODE (operands[0]);
38217 machine_mode sub_mode;
38218 rtx op0, op1, op2;
38219 int sub_size, regno0, regno1, nregs, i;
38220
38221 /* If this is DImode, use the specialized version that can run before
38222 register allocation. */
38223 if (mode == DImode && !TARGET_POWERPC64)
38224 {
38225 rs6000_split_logical_di (operands, code, complement_final_p,
38226 complement_op1_p, complement_op2_p);
38227 return;
38228 }
38229
38230 op0 = operands[0];
38231 op1 = operands[1];
38232 op2 = (code == NOT) ? NULL_RTX : operands[2];
38233 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38234 sub_size = GET_MODE_SIZE (sub_mode);
38235 regno0 = REGNO (op0);
38236 regno1 = REGNO (op1);
38237
38238 gcc_assert (reload_completed);
38239 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38240 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38241
38242 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38243 gcc_assert (nregs > 1);
38244
38245 if (op2 && REG_P (op2))
38246 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38247
38248 for (i = 0; i < nregs; i++)
38249 {
38250 int offset = i * sub_size;
38251 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38252 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38253 rtx sub_op2 = ((code == NOT)
38254 ? NULL_RTX
38255 : simplify_subreg (sub_mode, op2, mode, offset));
38256
38257 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38258 complement_final_p, complement_op1_p,
38259 complement_op2_p);
38260 }
38261
38262 return;
38263 }
38264
38265 \f
38266 /* Return true if the peephole2 can combine a load involving a combination of
38267 an addis instruction and a load with an offset that can be fused together on
38268 a power8. */
38269
38270 bool
38271 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38272 rtx addis_value, /* addis value. */
38273 rtx target, /* target register that is loaded. */
38274 rtx mem) /* bottom part of the memory addr. */
38275 {
38276 rtx addr;
38277 rtx base_reg;
38278
38279 /* Validate arguments. */
38280 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38281 return false;
38282
38283 if (!base_reg_operand (target, GET_MODE (target)))
38284 return false;
38285
38286 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38287 return false;
38288
38289 /* Allow sign/zero extension. */
38290 if (GET_CODE (mem) == ZERO_EXTEND
38291 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38292 mem = XEXP (mem, 0);
38293
38294 if (!MEM_P (mem))
38295 return false;
38296
38297 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38298 return false;
38299
38300 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38301 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38302 return false;
38303
38304 /* Validate that the register used to load the high value is either the
38305 register being loaded, or we can safely replace its use.
38306
38307 This function is only called from the peephole2 pass and we assume that
38308 there are 2 instructions in the peephole (addis and load), so we want to
38309 check if the target register was not used in the memory address and the
38310 register to hold the addis result is dead after the peephole. */
38311 if (REGNO (addis_reg) != REGNO (target))
38312 {
38313 if (reg_mentioned_p (target, mem))
38314 return false;
38315
38316 if (!peep2_reg_dead_p (2, addis_reg))
38317 return false;
38318
38319 /* If the target register being loaded is the stack pointer, we must
38320 avoid loading any other value into it, even temporarily. */
38321 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38322 return false;
38323 }
38324
38325 base_reg = XEXP (addr, 0);
38326 return REGNO (addis_reg) == REGNO (base_reg);
38327 }
38328
38329 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38330 sequence. We adjust the addis register to use the target register. If the
38331 load sign extends, we adjust the code to do the zero extending load, and an
38332 explicit sign extension later since the fusion only covers zero extending
38333 loads.
38334
38335 The operands are:
38336 operands[0] register set with addis (to be replaced with target)
38337 operands[1] value set via addis
38338 operands[2] target register being loaded
38339 operands[3] D-form memory reference using operands[0]. */
38340
38341 void
38342 expand_fusion_gpr_load (rtx *operands)
38343 {
38344 rtx addis_value = operands[1];
38345 rtx target = operands[2];
38346 rtx orig_mem = operands[3];
38347 rtx new_addr, new_mem, orig_addr, offset;
38348 enum rtx_code plus_or_lo_sum;
38349 machine_mode target_mode = GET_MODE (target);
38350 machine_mode extend_mode = target_mode;
38351 machine_mode ptr_mode = Pmode;
38352 enum rtx_code extend = UNKNOWN;
38353
38354 if (GET_CODE (orig_mem) == ZERO_EXTEND
38355 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38356 {
38357 extend = GET_CODE (orig_mem);
38358 orig_mem = XEXP (orig_mem, 0);
38359 target_mode = GET_MODE (orig_mem);
38360 }
38361
38362 gcc_assert (MEM_P (orig_mem));
38363
38364 orig_addr = XEXP (orig_mem, 0);
38365 plus_or_lo_sum = GET_CODE (orig_addr);
38366 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38367
38368 offset = XEXP (orig_addr, 1);
38369 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38370 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38371
38372 if (extend != UNKNOWN)
38373 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38374
38375 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38376 UNSPEC_FUSION_GPR);
38377 emit_insn (gen_rtx_SET (target, new_mem));
38378
38379 if (extend == SIGN_EXTEND)
38380 {
38381 int sub_off = ((BYTES_BIG_ENDIAN)
38382 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38383 : 0);
38384 rtx sign_reg
38385 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38386
38387 emit_insn (gen_rtx_SET (target,
38388 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38389 }
38390
38391 return;
38392 }
38393
38394 /* Emit the addis instruction that will be part of a fused instruction
38395 sequence. */
38396
38397 void
38398 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
38399 const char *mode_name)
38400 {
38401 rtx fuse_ops[10];
38402 char insn_template[80];
38403 const char *addis_str = NULL;
38404 const char *comment_str = ASM_COMMENT_START;
38405
38406 if (*comment_str == ' ')
38407 comment_str++;
38408
38409 /* Emit the addis instruction. */
38410 fuse_ops[0] = target;
38411 if (satisfies_constraint_L (addis_value))
38412 {
38413 fuse_ops[1] = addis_value;
38414 addis_str = "lis %0,%v1";
38415 }
38416
38417 else if (GET_CODE (addis_value) == PLUS)
38418 {
38419 rtx op0 = XEXP (addis_value, 0);
38420 rtx op1 = XEXP (addis_value, 1);
38421
38422 if (REG_P (op0) && CONST_INT_P (op1)
38423 && satisfies_constraint_L (op1))
38424 {
38425 fuse_ops[1] = op0;
38426 fuse_ops[2] = op1;
38427 addis_str = "addis %0,%1,%v2";
38428 }
38429 }
38430
38431 else if (GET_CODE (addis_value) == HIGH)
38432 {
38433 rtx value = XEXP (addis_value, 0);
38434 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38435 {
38436 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38437 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38438 if (TARGET_ELF)
38439 addis_str = "addis %0,%2,%1@toc@ha";
38440
38441 else if (TARGET_XCOFF)
38442 addis_str = "addis %0,%1@u(%2)";
38443
38444 else
38445 gcc_unreachable ();
38446 }
38447
38448 else if (GET_CODE (value) == PLUS)
38449 {
38450 rtx op0 = XEXP (value, 0);
38451 rtx op1 = XEXP (value, 1);
38452
38453 if (GET_CODE (op0) == UNSPEC
38454 && XINT (op0, 1) == UNSPEC_TOCREL
38455 && CONST_INT_P (op1))
38456 {
38457 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38458 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38459 fuse_ops[3] = op1;
38460 if (TARGET_ELF)
38461 addis_str = "addis %0,%2,%1+%3@toc@ha";
38462
38463 else if (TARGET_XCOFF)
38464 addis_str = "addis %0,%1+%3@u(%2)";
38465
38466 else
38467 gcc_unreachable ();
38468 }
38469 }
38470
38471 else if (satisfies_constraint_L (value))
38472 {
38473 fuse_ops[1] = value;
38474 addis_str = "lis %0,%v1";
38475 }
38476
38477 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38478 {
38479 fuse_ops[1] = value;
38480 addis_str = "lis %0,%1@ha";
38481 }
38482 }
38483
38484 if (!addis_str)
38485 fatal_insn ("Could not generate addis value for fusion", addis_value);
38486
38487 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
38488 comment, mode_name);
38489 output_asm_insn (insn_template, fuse_ops);
38490 }
38491
38492 /* Emit a D-form load or store instruction that is the second instruction
38493 of a fusion sequence. */
38494
38495 void
38496 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
38497 const char *insn_str)
38498 {
38499 rtx fuse_ops[10];
38500 char insn_template[80];
38501
38502 fuse_ops[0] = load_store_reg;
38503 fuse_ops[1] = addis_reg;
38504
38505 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38506 {
38507 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38508 fuse_ops[2] = offset;
38509 output_asm_insn (insn_template, fuse_ops);
38510 }
38511
38512 else if (GET_CODE (offset) == UNSPEC
38513 && XINT (offset, 1) == UNSPEC_TOCREL)
38514 {
38515 if (TARGET_ELF)
38516 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38517
38518 else if (TARGET_XCOFF)
38519 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38520
38521 else
38522 gcc_unreachable ();
38523
38524 fuse_ops[2] = XVECEXP (offset, 0, 0);
38525 output_asm_insn (insn_template, fuse_ops);
38526 }
38527
38528 else if (GET_CODE (offset) == PLUS
38529 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38530 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38531 && CONST_INT_P (XEXP (offset, 1)))
38532 {
38533 rtx tocrel_unspec = XEXP (offset, 0);
38534 if (TARGET_ELF)
38535 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38536
38537 else if (TARGET_XCOFF)
38538 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38539
38540 else
38541 gcc_unreachable ();
38542
38543 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38544 fuse_ops[3] = XEXP (offset, 1);
38545 output_asm_insn (insn_template, fuse_ops);
38546 }
38547
38548 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38549 {
38550 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38551
38552 fuse_ops[2] = offset;
38553 output_asm_insn (insn_template, fuse_ops);
38554 }
38555
38556 else
38557 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38558
38559 return;
38560 }
38561
38562 /* Wrap a TOC address that can be fused to indicate that special fusion
38563 processing is needed. */
38564
38565 rtx
38566 fusion_wrap_memory_address (rtx old_mem)
38567 {
38568 rtx old_addr = XEXP (old_mem, 0);
38569 rtvec v = gen_rtvec (1, old_addr);
38570 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38571 return replace_equiv_address_nv (old_mem, new_addr, false);
38572 }
38573
38574 /* Given an address, convert it into the addis and load offset parts. Addresses
38575 created during the peephole2 process look like:
38576 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38577 (unspec [(...)] UNSPEC_TOCREL))
38578
38579 Addresses created via toc fusion look like:
38580 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38581
38582 static void
38583 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38584 {
38585 rtx hi, lo;
38586
38587 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38588 {
38589 lo = XVECEXP (addr, 0, 0);
38590 hi = gen_rtx_HIGH (Pmode, lo);
38591 }
38592 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38593 {
38594 hi = XEXP (addr, 0);
38595 lo = XEXP (addr, 1);
38596 }
38597 else
38598 gcc_unreachable ();
38599
38600 *p_hi = hi;
38601 *p_lo = lo;
38602 }
38603
38604 /* Return a string to fuse an addis instruction with a gpr load to the same
38605 register that we loaded up the addis instruction. The address that is used
38606 is the logical address that was formed during peephole2:
38607 (lo_sum (high) (low-part))
38608
38609 Or the address is the TOC address that is wrapped before register allocation:
38610 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38611
38612 The code is complicated, so we call output_asm_insn directly, and just
38613 return "". */
38614
38615 const char *
38616 emit_fusion_gpr_load (rtx target, rtx mem)
38617 {
38618 rtx addis_value;
38619 rtx addr;
38620 rtx load_offset;
38621 const char *load_str = NULL;
38622 const char *mode_name = NULL;
38623 machine_mode mode;
38624
38625 if (GET_CODE (mem) == ZERO_EXTEND)
38626 mem = XEXP (mem, 0);
38627
38628 gcc_assert (REG_P (target) && MEM_P (mem));
38629
38630 addr = XEXP (mem, 0);
38631 fusion_split_address (addr, &addis_value, &load_offset);
38632
38633 /* Now emit the load instruction to the same register. */
38634 mode = GET_MODE (mem);
38635 switch (mode)
38636 {
38637 case QImode:
38638 mode_name = "char";
38639 load_str = "lbz";
38640 break;
38641
38642 case HImode:
38643 mode_name = "short";
38644 load_str = "lhz";
38645 break;
38646
38647 case SImode:
38648 case SFmode:
38649 mode_name = (mode == SFmode) ? "float" : "int";
38650 load_str = "lwz";
38651 break;
38652
38653 case DImode:
38654 case DFmode:
38655 gcc_assert (TARGET_POWERPC64);
38656 mode_name = (mode == DFmode) ? "double" : "long";
38657 load_str = "ld";
38658 break;
38659
38660 default:
38661 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38662 }
38663
38664 /* Emit the addis instruction. */
38665 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
38666
38667 /* Emit the D-form load instruction. */
38668 emit_fusion_load_store (target, target, load_offset, load_str);
38669
38670 return "";
38671 }
38672 \f
38673
38674 /* Return true if the peephole2 can combine a load/store involving a
38675 combination of an addis instruction and the memory operation. This was
38676 added to the ISA 3.0 (power9) hardware. */
38677
38678 bool
38679 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38680 rtx addis_value, /* addis value. */
38681 rtx dest, /* destination (memory or register). */
38682 rtx src) /* source (register or memory). */
38683 {
38684 rtx addr, mem, offset;
38685 machine_mode mode = GET_MODE (src);
38686
38687 /* Validate arguments. */
38688 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38689 return false;
38690
38691 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38692 return false;
38693
38694 /* Ignore extend operations that are part of the load. */
38695 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38696 src = XEXP (src, 0);
38697
38698 /* Test for memory<-register or register<-memory. */
38699 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38700 {
38701 if (!MEM_P (dest))
38702 return false;
38703
38704 mem = dest;
38705 }
38706
38707 else if (MEM_P (src))
38708 {
38709 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38710 return false;
38711
38712 mem = src;
38713 }
38714
38715 else
38716 return false;
38717
38718 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38719 if (GET_CODE (addr) == PLUS)
38720 {
38721 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38722 return false;
38723
38724 return satisfies_constraint_I (XEXP (addr, 1));
38725 }
38726
38727 else if (GET_CODE (addr) == LO_SUM)
38728 {
38729 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38730 return false;
38731
38732 offset = XEXP (addr, 1);
38733 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38734 return small_toc_ref (offset, GET_MODE (offset));
38735
38736 else if (TARGET_ELF && !TARGET_POWERPC64)
38737 return CONSTANT_P (offset);
38738 }
38739
38740 return false;
38741 }
38742
38743 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38744 load sequence.
38745
38746 The operands are:
38747 operands[0] register set with addis
38748 operands[1] value set via addis
38749 operands[2] target register being loaded
38750 operands[3] D-form memory reference using operands[0].
38751
38752 This is similar to the fusion introduced with power8, except it scales to
38753 both loads/stores and does not require the result register to be the same as
38754 the base register. At the moment, we only do this if register set with addis
38755 is dead. */
38756
38757 void
38758 expand_fusion_p9_load (rtx *operands)
38759 {
38760 rtx tmp_reg = operands[0];
38761 rtx addis_value = operands[1];
38762 rtx target = operands[2];
38763 rtx orig_mem = operands[3];
38764 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38765 enum rtx_code plus_or_lo_sum;
38766 machine_mode target_mode = GET_MODE (target);
38767 machine_mode extend_mode = target_mode;
38768 machine_mode ptr_mode = Pmode;
38769 enum rtx_code extend = UNKNOWN;
38770
38771 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38772 {
38773 extend = GET_CODE (orig_mem);
38774 orig_mem = XEXP (orig_mem, 0);
38775 target_mode = GET_MODE (orig_mem);
38776 }
38777
38778 gcc_assert (MEM_P (orig_mem));
38779
38780 orig_addr = XEXP (orig_mem, 0);
38781 plus_or_lo_sum = GET_CODE (orig_addr);
38782 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38783
38784 offset = XEXP (orig_addr, 1);
38785 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38786 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38787
38788 if (extend != UNKNOWN)
38789 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38790
38791 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38792 UNSPEC_FUSION_P9);
38793
38794 set = gen_rtx_SET (target, new_mem);
38795 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38796 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38797 emit_insn (insn);
38798
38799 return;
38800 }
38801
38802 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38803 store sequence.
38804
38805 The operands are:
38806 operands[0] register set with addis
38807 operands[1] value set via addis
38808 operands[2] target D-form memory being stored to
38809 operands[3] register being stored
38810
38811 This is similar to the fusion introduced with power8, except it scales to
38812 both loads/stores and does not require the result register to be the same as
38813 the base register. At the moment, we only do this if register set with addis
38814 is dead. */
38815
38816 void
38817 expand_fusion_p9_store (rtx *operands)
38818 {
38819 rtx tmp_reg = operands[0];
38820 rtx addis_value = operands[1];
38821 rtx orig_mem = operands[2];
38822 rtx src = operands[3];
38823 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38824 enum rtx_code plus_or_lo_sum;
38825 machine_mode target_mode = GET_MODE (orig_mem);
38826 machine_mode ptr_mode = Pmode;
38827
38828 gcc_assert (MEM_P (orig_mem));
38829
38830 orig_addr = XEXP (orig_mem, 0);
38831 plus_or_lo_sum = GET_CODE (orig_addr);
38832 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38833
38834 offset = XEXP (orig_addr, 1);
38835 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38836 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38837
38838 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38839 UNSPEC_FUSION_P9);
38840
38841 set = gen_rtx_SET (new_mem, new_src);
38842 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38843 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38844 emit_insn (insn);
38845
38846 return;
38847 }
38848
38849 /* Return a string to fuse an addis instruction with a load using extended
38850 fusion. The address that is used is the logical address that was formed
38851 during peephole2: (lo_sum (high) (low-part))
38852
38853 The code is complicated, so we call output_asm_insn directly, and just
38854 return "". */
38855
38856 const char *
38857 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38858 {
38859 machine_mode mode = GET_MODE (reg);
38860 rtx hi;
38861 rtx lo;
38862 rtx addr;
38863 const char *load_string;
38864 int r;
38865
38866 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38867 {
38868 mem = XEXP (mem, 0);
38869 mode = GET_MODE (mem);
38870 }
38871
38872 if (GET_CODE (reg) == SUBREG)
38873 {
38874 gcc_assert (SUBREG_BYTE (reg) == 0);
38875 reg = SUBREG_REG (reg);
38876 }
38877
38878 if (!REG_P (reg))
38879 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38880
38881 r = REGNO (reg);
38882 if (FP_REGNO_P (r))
38883 {
38884 if (mode == SFmode)
38885 load_string = "lfs";
38886 else if (mode == DFmode || mode == DImode)
38887 load_string = "lfd";
38888 else
38889 gcc_unreachable ();
38890 }
38891 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
38892 {
38893 if (mode == SFmode)
38894 load_string = "lxssp";
38895 else if (mode == DFmode || mode == DImode)
38896 load_string = "lxsd";
38897 else
38898 gcc_unreachable ();
38899 }
38900 else if (INT_REGNO_P (r))
38901 {
38902 switch (mode)
38903 {
38904 case QImode:
38905 load_string = "lbz";
38906 break;
38907 case HImode:
38908 load_string = "lhz";
38909 break;
38910 case SImode:
38911 case SFmode:
38912 load_string = "lwz";
38913 break;
38914 case DImode:
38915 case DFmode:
38916 if (!TARGET_POWERPC64)
38917 gcc_unreachable ();
38918 load_string = "ld";
38919 break;
38920 default:
38921 gcc_unreachable ();
38922 }
38923 }
38924 else
38925 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38926
38927 if (!MEM_P (mem))
38928 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38929
38930 addr = XEXP (mem, 0);
38931 fusion_split_address (addr, &hi, &lo);
38932
38933 /* Emit the addis instruction. */
38934 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
38935
38936 /* Emit the D-form load instruction. */
38937 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38938
38939 return "";
38940 }
38941
38942 /* Return a string to fuse an addis instruction with a store using extended
38943 fusion. The address that is used is the logical address that was formed
38944 during peephole2: (lo_sum (high) (low-part))
38945
38946 The code is complicated, so we call output_asm_insn directly, and just
38947 return "". */
38948
38949 const char *
38950 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38951 {
38952 machine_mode mode = GET_MODE (reg);
38953 rtx hi;
38954 rtx lo;
38955 rtx addr;
38956 const char *store_string;
38957 int r;
38958
38959 if (GET_CODE (reg) == SUBREG)
38960 {
38961 gcc_assert (SUBREG_BYTE (reg) == 0);
38962 reg = SUBREG_REG (reg);
38963 }
38964
38965 if (!REG_P (reg))
38966 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38967
38968 r = REGNO (reg);
38969 if (FP_REGNO_P (r))
38970 {
38971 if (mode == SFmode)
38972 store_string = "stfs";
38973 else if (mode == DFmode)
38974 store_string = "stfd";
38975 else
38976 gcc_unreachable ();
38977 }
38978 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
38979 {
38980 if (mode == SFmode)
38981 store_string = "stxssp";
38982 else if (mode == DFmode || mode == DImode)
38983 store_string = "stxsd";
38984 else
38985 gcc_unreachable ();
38986 }
38987 else if (INT_REGNO_P (r))
38988 {
38989 switch (mode)
38990 {
38991 case QImode:
38992 store_string = "stb";
38993 break;
38994 case HImode:
38995 store_string = "sth";
38996 break;
38997 case SImode:
38998 case SFmode:
38999 store_string = "stw";
39000 break;
39001 case DImode:
39002 case DFmode:
39003 if (!TARGET_POWERPC64)
39004 gcc_unreachable ();
39005 store_string = "std";
39006 break;
39007 default:
39008 gcc_unreachable ();
39009 }
39010 }
39011 else
39012 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39013
39014 if (!MEM_P (mem))
39015 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39016
39017 addr = XEXP (mem, 0);
39018 fusion_split_address (addr, &hi, &lo);
39019
39020 /* Emit the addis instruction. */
39021 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
39022
39023 /* Emit the D-form load instruction. */
39024 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39025
39026 return "";
39027 }
39028
39029 \f
39030 /* Analyze vector computations and remove unnecessary doubleword
39031 swaps (xxswapdi instructions). This pass is performed only
39032 for little-endian VSX code generation.
39033
39034 For this specific case, loads and stores of 4x32 and 2x64 vectors
39035 are inefficient. These are implemented using the lvx2dx and
39036 stvx2dx instructions, which invert the order of doublewords in
39037 a vector register. Thus the code generation inserts an xxswapdi
39038 after each such load, and prior to each such store. (For spill
39039 code after register assignment, an additional xxswapdi is inserted
39040 following each store in order to return a hard register to its
39041 unpermuted value.)
39042
39043 The extra xxswapdi instructions reduce performance. This can be
39044 particularly bad for vectorized code. The purpose of this pass
39045 is to reduce the number of xxswapdi instructions required for
39046 correctness.
39047
39048 The primary insight is that much code that operates on vectors
39049 does not care about the relative order of elements in a register,
39050 so long as the correct memory order is preserved. If we have
39051 a computation where all input values are provided by lvxd2x/xxswapdi
39052 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
39053 and all intermediate computations are pure SIMD (independent of
39054 element order), then all the xxswapdi's associated with the loads
39055 and stores may be removed.
39056
39057 This pass uses some of the infrastructure and logical ideas from
39058 the "web" pass in web.c. We create maximal webs of computations
39059 fitting the description above using union-find. Each such web is
39060 then optimized by removing its unnecessary xxswapdi instructions.
39061
39062 The pass is placed prior to global optimization so that we can
39063 perform the optimization in the safest and simplest way possible;
39064 that is, by replacing each xxswapdi insn with a register copy insn.
39065 Subsequent forward propagation will remove copies where possible.
39066
39067 There are some operations sensitive to element order for which we
39068 can still allow the operation, provided we modify those operations.
39069 These include CONST_VECTORs, for which we must swap the first and
39070 second halves of the constant vector; and SUBREGs, for which we
39071 must adjust the byte offset to account for the swapped doublewords.
39072 A remaining opportunity would be non-immediate-form splats, for
39073 which we should adjust the selected lane of the input. We should
39074 also make code generation adjustments for sum-across operations,
39075 since this is a common vectorizer reduction.
39076
39077 Because we run prior to the first split, we can see loads and stores
39078 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
39079 vector loads and stores that have not yet been split into a permuting
39080 load/store and a swap. (One way this can happen is with a builtin
39081 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
39082 than deleting a swap, we convert the load/store into a permuting
39083 load/store (which effectively removes the swap). */
39084
39085 /* Notes on Permutes
39086
39087 We do not currently handle computations that contain permutes. There
39088 is a general transformation that can be performed correctly, but it
39089 may introduce more expensive code than it replaces. To handle these
39090 would require a cost model to determine when to perform the optimization.
39091 This commentary records how this could be done if desired.
39092
39093 The most general permute is something like this (example for V16QI):
39094
39095 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
39096 (parallel [(const_int a0) (const_int a1)
39097 ...
39098 (const_int a14) (const_int a15)]))
39099
39100 where a0,...,a15 are in [0,31] and select elements from op1 and op2
39101 to produce in the result.
39102
39103 Regardless of mode, we can convert the PARALLEL to a mask of 16
39104 byte-element selectors. Let's call this M, with M[i] representing
39105 the ith byte-element selector value. Then if we swap doublewords
39106 throughout the computation, we can get correct behavior by replacing
39107 M with M' as follows:
39108
39109 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
39110 { ((M[i]+8)%16)+16 : M[i] in [16,31]
39111
39112 This seems promising at first, since we are just replacing one mask
39113 with another. But certain masks are preferable to others. If M
39114 is a mask that matches a vmrghh pattern, for example, M' certainly
39115 will not. Instead of a single vmrghh, we would generate a load of
39116 M' and a vperm. So we would need to know how many xxswapd's we can
39117 remove as a result of this transformation to determine if it's
39118 profitable; and preferably the logic would need to be aware of all
39119 the special preferable masks.
39120
39121 Another form of permute is an UNSPEC_VPERM, in which the mask is
39122 already in a register. In some cases, this mask may be a constant
39123 that we can discover with ud-chains, in which case the above
39124 transformation is ok. However, the common usage here is for the
39125 mask to be produced by an UNSPEC_LVSL, in which case the mask
39126 cannot be known at compile time. In such a case we would have to
39127 generate several instructions to compute M' as above at run time,
39128 and a cost model is needed again.
39129
39130 However, when the mask M for an UNSPEC_VPERM is loaded from the
39131 constant pool, we can replace M with M' as above at no cost
39132 beyond adding a constant pool entry. */
39133
39134 /* This is based on the union-find logic in web.c. web_entry_base is
39135 defined in df.h. */
39136 class swap_web_entry : public web_entry_base
39137 {
39138 public:
39139 /* Pointer to the insn. */
39140 rtx_insn *insn;
39141 /* Set if insn contains a mention of a vector register. All other
39142 fields are undefined if this field is unset. */
39143 unsigned int is_relevant : 1;
39144 /* Set if insn is a load. */
39145 unsigned int is_load : 1;
39146 /* Set if insn is a store. */
39147 unsigned int is_store : 1;
39148 /* Set if insn is a doubleword swap. This can either be a register swap
39149 or a permuting load or store (test is_load and is_store for this). */
39150 unsigned int is_swap : 1;
39151 /* Set if the insn has a live-in use of a parameter register. */
39152 unsigned int is_live_in : 1;
39153 /* Set if the insn has a live-out def of a return register. */
39154 unsigned int is_live_out : 1;
39155 /* Set if the insn contains a subreg reference of a vector register. */
39156 unsigned int contains_subreg : 1;
39157 /* Set if the insn contains a 128-bit integer operand. */
39158 unsigned int is_128_int : 1;
39159 /* Set if this is a call-insn. */
39160 unsigned int is_call : 1;
39161 /* Set if this insn does not perform a vector operation for which
39162 element order matters, or if we know how to fix it up if it does.
39163 Undefined if is_swap is set. */
39164 unsigned int is_swappable : 1;
39165 /* A nonzero value indicates what kind of special handling for this
39166 insn is required if doublewords are swapped. Undefined if
39167 is_swappable is not set. */
39168 unsigned int special_handling : 4;
39169 /* Set if the web represented by this entry cannot be optimized. */
39170 unsigned int web_not_optimizable : 1;
39171 /* Set if this insn should be deleted. */
39172 unsigned int will_delete : 1;
39173 };
39174
39175 enum special_handling_values {
39176 SH_NONE = 0,
39177 SH_CONST_VECTOR,
39178 SH_SUBREG,
39179 SH_NOSWAP_LD,
39180 SH_NOSWAP_ST,
39181 SH_EXTRACT,
39182 SH_SPLAT,
39183 SH_XXPERMDI,
39184 SH_CONCAT,
39185 SH_VPERM
39186 };
39187
39188 /* Union INSN with all insns containing definitions that reach USE.
39189 Detect whether USE is live-in to the current function. */
39190 static void
39191 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
39192 {
39193 struct df_link *link = DF_REF_CHAIN (use);
39194
39195 if (!link)
39196 insn_entry[INSN_UID (insn)].is_live_in = 1;
39197
39198 while (link)
39199 {
39200 if (DF_REF_IS_ARTIFICIAL (link->ref))
39201 insn_entry[INSN_UID (insn)].is_live_in = 1;
39202
39203 if (DF_REF_INSN_INFO (link->ref))
39204 {
39205 rtx def_insn = DF_REF_INSN (link->ref);
39206 (void)unionfind_union (insn_entry + INSN_UID (insn),
39207 insn_entry + INSN_UID (def_insn));
39208 }
39209
39210 link = link->next;
39211 }
39212 }
39213
39214 /* Union INSN with all insns containing uses reached from DEF.
39215 Detect whether DEF is live-out from the current function. */
39216 static void
39217 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
39218 {
39219 struct df_link *link = DF_REF_CHAIN (def);
39220
39221 if (!link)
39222 insn_entry[INSN_UID (insn)].is_live_out = 1;
39223
39224 while (link)
39225 {
39226 /* This could be an eh use or some other artificial use;
39227 we treat these all the same (killing the optimization). */
39228 if (DF_REF_IS_ARTIFICIAL (link->ref))
39229 insn_entry[INSN_UID (insn)].is_live_out = 1;
39230
39231 if (DF_REF_INSN_INFO (link->ref))
39232 {
39233 rtx use_insn = DF_REF_INSN (link->ref);
39234 (void)unionfind_union (insn_entry + INSN_UID (insn),
39235 insn_entry + INSN_UID (use_insn));
39236 }
39237
39238 link = link->next;
39239 }
39240 }
39241
39242 /* Return 1 iff INSN is a load insn, including permuting loads that
39243 represent an lvxd2x instruction; else return 0. */
39244 static unsigned int
39245 insn_is_load_p (rtx insn)
39246 {
39247 rtx body = PATTERN (insn);
39248
39249 if (GET_CODE (body) == SET)
39250 {
39251 if (GET_CODE (SET_SRC (body)) == MEM)
39252 return 1;
39253
39254 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
39255 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
39256 return 1;
39257
39258 return 0;
39259 }
39260
39261 if (GET_CODE (body) != PARALLEL)
39262 return 0;
39263
39264 rtx set = XVECEXP (body, 0, 0);
39265
39266 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
39267 return 1;
39268
39269 return 0;
39270 }
39271
39272 /* Return 1 iff INSN is a store insn, including permuting stores that
39273 represent an stvxd2x instruction; else return 0. */
39274 static unsigned int
39275 insn_is_store_p (rtx insn)
39276 {
39277 rtx body = PATTERN (insn);
39278 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
39279 return 1;
39280 if (GET_CODE (body) != PARALLEL)
39281 return 0;
39282 rtx set = XVECEXP (body, 0, 0);
39283 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
39284 return 1;
39285 return 0;
39286 }
39287
39288 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
39289 a permuting load, or a permuting store. */
39290 static unsigned int
39291 insn_is_swap_p (rtx insn)
39292 {
39293 rtx body = PATTERN (insn);
39294 if (GET_CODE (body) != SET)
39295 return 0;
39296 rtx rhs = SET_SRC (body);
39297 if (GET_CODE (rhs) != VEC_SELECT)
39298 return 0;
39299 rtx parallel = XEXP (rhs, 1);
39300 if (GET_CODE (parallel) != PARALLEL)
39301 return 0;
39302 unsigned int len = XVECLEN (parallel, 0);
39303 if (len != 2 && len != 4 && len != 8 && len != 16)
39304 return 0;
39305 for (unsigned int i = 0; i < len / 2; ++i)
39306 {
39307 rtx op = XVECEXP (parallel, 0, i);
39308 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
39309 return 0;
39310 }
39311 for (unsigned int i = len / 2; i < len; ++i)
39312 {
39313 rtx op = XVECEXP (parallel, 0, i);
39314 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
39315 return 0;
39316 }
39317 return 1;
39318 }
39319
39320 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
39321 static bool
39322 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
39323 {
39324 unsigned uid = INSN_UID (insn);
39325 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
39326 return false;
39327
39328 const_rtx tocrel_base;
39329
39330 /* Find the unique use in the swap and locate its def. If the def
39331 isn't unique, punt. */
39332 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39333 df_ref use;
39334 FOR_EACH_INSN_INFO_USE (use, insn_info)
39335 {
39336 struct df_link *def_link = DF_REF_CHAIN (use);
39337 if (!def_link || def_link->next)
39338 return false;
39339
39340 rtx def_insn = DF_REF_INSN (def_link->ref);
39341 unsigned uid2 = INSN_UID (def_insn);
39342 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
39343 return false;
39344
39345 rtx body = PATTERN (def_insn);
39346 if (GET_CODE (body) != SET
39347 || GET_CODE (SET_SRC (body)) != VEC_SELECT
39348 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
39349 return false;
39350
39351 rtx mem = XEXP (SET_SRC (body), 0);
39352 rtx base_reg = XEXP (mem, 0);
39353
39354 df_ref base_use;
39355 insn_info = DF_INSN_INFO_GET (def_insn);
39356 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
39357 {
39358 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
39359 continue;
39360
39361 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
39362 if (!base_def_link || base_def_link->next)
39363 return false;
39364
39365 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
39366 rtx tocrel_body = PATTERN (tocrel_insn);
39367 rtx base, offset;
39368 if (GET_CODE (tocrel_body) != SET)
39369 return false;
39370 /* There is an extra level of indirection for small/large
39371 code models. */
39372 rtx tocrel_expr = SET_SRC (tocrel_body);
39373 if (GET_CODE (tocrel_expr) == MEM)
39374 tocrel_expr = XEXP (tocrel_expr, 0);
39375 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
39376 return false;
39377 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
39378 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
39379 return false;
39380 }
39381 }
39382 return true;
39383 }
39384
39385 /* Return TRUE iff OP matches a V2DF reduction pattern. See the
39386 definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */
39387 static bool
39388 v2df_reduction_p (rtx op)
39389 {
39390 if (GET_MODE (op) != V2DFmode)
39391 return false;
39392
39393 enum rtx_code code = GET_CODE (op);
39394 if (code != PLUS && code != SMIN && code != SMAX)
39395 return false;
39396
39397 rtx concat = XEXP (op, 0);
39398 if (GET_CODE (concat) != VEC_CONCAT)
39399 return false;
39400
39401 rtx select0 = XEXP (concat, 0);
39402 rtx select1 = XEXP (concat, 1);
39403 if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
39404 return false;
39405
39406 rtx reg0 = XEXP (select0, 0);
39407 rtx reg1 = XEXP (select1, 0);
39408 if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
39409 return false;
39410
39411 rtx parallel0 = XEXP (select0, 1);
39412 rtx parallel1 = XEXP (select1, 1);
39413 if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
39414 return false;
39415
39416 if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
39417 || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
39418 return false;
39419
39420 return true;
39421 }
39422
39423 /* Return 1 iff OP is an operand that will not be affected by having
39424 vector doublewords swapped in memory. */
39425 static unsigned int
39426 rtx_is_swappable_p (rtx op, unsigned int *special)
39427 {
39428 enum rtx_code code = GET_CODE (op);
39429 int i, j;
39430 rtx parallel;
39431
39432 switch (code)
39433 {
39434 case LABEL_REF:
39435 case SYMBOL_REF:
39436 case CLOBBER:
39437 case REG:
39438 return 1;
39439
39440 case VEC_CONCAT:
39441 case ASM_INPUT:
39442 case ASM_OPERANDS:
39443 return 0;
39444
39445 case CONST_VECTOR:
39446 {
39447 *special = SH_CONST_VECTOR;
39448 return 1;
39449 }
39450
39451 case VEC_DUPLICATE:
39452 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
39453 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
39454 it represents a vector splat for which we can do special
39455 handling. */
39456 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
39457 return 1;
39458 else if (REG_P (XEXP (op, 0))
39459 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39460 /* This catches V2DF and V2DI splat, at a minimum. */
39461 return 1;
39462 else if (GET_CODE (XEXP (op, 0)) == TRUNCATE
39463 && REG_P (XEXP (XEXP (op, 0), 0))
39464 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
39465 /* This catches splat of a truncated value. */
39466 return 1;
39467 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
39468 /* If the duplicated item is from a select, defer to the select
39469 processing to see if we can change the lane for the splat. */
39470 return rtx_is_swappable_p (XEXP (op, 0), special);
39471 else
39472 return 0;
39473
39474 case VEC_SELECT:
39475 /* A vec_extract operation is ok if we change the lane. */
39476 if (GET_CODE (XEXP (op, 0)) == REG
39477 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
39478 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39479 && XVECLEN (parallel, 0) == 1
39480 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
39481 {
39482 *special = SH_EXTRACT;
39483 return 1;
39484 }
39485 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
39486 XXPERMDI is a swap operation, it will be identified by
39487 insn_is_swap_p and therefore we won't get here. */
39488 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
39489 && (GET_MODE (XEXP (op, 0)) == V4DFmode
39490 || GET_MODE (XEXP (op, 0)) == V4DImode)
39491 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
39492 && XVECLEN (parallel, 0) == 2
39493 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
39494 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
39495 {
39496 *special = SH_XXPERMDI;
39497 return 1;
39498 }
39499 else if (v2df_reduction_p (op))
39500 return 1;
39501 else
39502 return 0;
39503
39504 case UNSPEC:
39505 {
39506 /* Various operations are unsafe for this optimization, at least
39507 without significant additional work. Permutes are obviously
39508 problematic, as both the permute control vector and the ordering
39509 of the target values are invalidated by doubleword swapping.
39510 Vector pack and unpack modify the number of vector lanes.
39511 Merge-high/low will not operate correctly on swapped operands.
39512 Vector shifts across element boundaries are clearly uncool,
39513 as are vector select and concatenate operations. Vector
39514 sum-across instructions define one operand with a specific
39515 order-dependent element, so additional fixup code would be
39516 needed to make those work. Vector set and non-immediate-form
39517 vector splat are element-order sensitive. A few of these
39518 cases might be workable with special handling if required.
39519 Adding cost modeling would be appropriate in some cases. */
39520 int val = XINT (op, 1);
39521 switch (val)
39522 {
39523 default:
39524 break;
39525 case UNSPEC_VMRGH_DIRECT:
39526 case UNSPEC_VMRGL_DIRECT:
39527 case UNSPEC_VPACK_SIGN_SIGN_SAT:
39528 case UNSPEC_VPACK_SIGN_UNS_SAT:
39529 case UNSPEC_VPACK_UNS_UNS_MOD:
39530 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
39531 case UNSPEC_VPACK_UNS_UNS_SAT:
39532 case UNSPEC_VPERM:
39533 case UNSPEC_VPERM_UNS:
39534 case UNSPEC_VPERMHI:
39535 case UNSPEC_VPERMSI:
39536 case UNSPEC_VPKPX:
39537 case UNSPEC_VSLDOI:
39538 case UNSPEC_VSLO:
39539 case UNSPEC_VSRO:
39540 case UNSPEC_VSUM2SWS:
39541 case UNSPEC_VSUM4S:
39542 case UNSPEC_VSUM4UBS:
39543 case UNSPEC_VSUMSWS:
39544 case UNSPEC_VSUMSWS_DIRECT:
39545 case UNSPEC_VSX_CONCAT:
39546 case UNSPEC_VSX_SET:
39547 case UNSPEC_VSX_SLDWI:
39548 case UNSPEC_VUNPACK_HI_SIGN:
39549 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
39550 case UNSPEC_VUNPACK_LO_SIGN:
39551 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
39552 case UNSPEC_VUPKHPX:
39553 case UNSPEC_VUPKHS_V4SF:
39554 case UNSPEC_VUPKHU_V4SF:
39555 case UNSPEC_VUPKLPX:
39556 case UNSPEC_VUPKLS_V4SF:
39557 case UNSPEC_VUPKLU_V4SF:
39558 case UNSPEC_VSX_CVDPSPN:
39559 case UNSPEC_VSX_CVSPDP:
39560 case UNSPEC_VSX_CVSPDPN:
39561 case UNSPEC_VSX_EXTRACT:
39562 case UNSPEC_VSX_VSLO:
39563 case UNSPEC_VSX_VEC_INIT:
39564 return 0;
39565 case UNSPEC_VSPLT_DIRECT:
39566 case UNSPEC_VSX_XXSPLTD:
39567 *special = SH_SPLAT;
39568 return 1;
39569 case UNSPEC_REDUC_PLUS:
39570 case UNSPEC_REDUC:
39571 return 1;
39572 }
39573 }
39574
39575 default:
39576 break;
39577 }
39578
39579 const char *fmt = GET_RTX_FORMAT (code);
39580 int ok = 1;
39581
39582 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39583 if (fmt[i] == 'e' || fmt[i] == 'u')
39584 {
39585 unsigned int special_op = SH_NONE;
39586 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
39587 if (special_op == SH_NONE)
39588 continue;
39589 /* Ensure we never have two kinds of special handling
39590 for the same insn. */
39591 if (*special != SH_NONE && *special != special_op)
39592 return 0;
39593 *special = special_op;
39594 }
39595 else if (fmt[i] == 'E')
39596 for (j = 0; j < XVECLEN (op, i); ++j)
39597 {
39598 unsigned int special_op = SH_NONE;
39599 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
39600 if (special_op == SH_NONE)
39601 continue;
39602 /* Ensure we never have two kinds of special handling
39603 for the same insn. */
39604 if (*special != SH_NONE && *special != special_op)
39605 return 0;
39606 *special = special_op;
39607 }
39608
39609 return ok;
39610 }
39611
39612 /* Return 1 iff INSN is an operand that will not be affected by
39613 having vector doublewords swapped in memory (in which case
39614 *SPECIAL is unchanged), or that can be modified to be correct
39615 if vector doublewords are swapped in memory (in which case
39616 *SPECIAL is changed to a value indicating how). */
39617 static unsigned int
39618 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
39619 unsigned int *special)
39620 {
39621 /* Calls are always bad. */
39622 if (GET_CODE (insn) == CALL_INSN)
39623 return 0;
39624
39625 /* Loads and stores seen here are not permuting, but we can still
39626 fix them up by converting them to permuting ones. Exceptions:
39627 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
39628 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
39629 for the SET source. Also we must now make an exception for lvx
39630 and stvx when they are not in the UNSPEC_LVX/STVX form (with the
39631 explicit "& -16") since this leads to unrecognizable insns. */
39632 rtx body = PATTERN (insn);
39633 int i = INSN_UID (insn);
39634
39635 if (insn_entry[i].is_load)
39636 {
39637 if (GET_CODE (body) == SET)
39638 {
39639 rtx rhs = SET_SRC (body);
39640 /* Even without a swap, the RHS might be a vec_select for, say,
39641 a byte-reversing load. */
39642 if (GET_CODE (rhs) != MEM)
39643 return 0;
39644 if (GET_CODE (XEXP (rhs, 0)) == AND)
39645 return 0;
39646
39647 *special = SH_NOSWAP_LD;
39648 return 1;
39649 }
39650 else
39651 return 0;
39652 }
39653
39654 if (insn_entry[i].is_store)
39655 {
39656 if (GET_CODE (body) == SET
39657 && GET_CODE (SET_SRC (body)) != UNSPEC)
39658 {
39659 rtx lhs = SET_DEST (body);
39660 /* Even without a swap, the LHS might be a vec_select for, say,
39661 a byte-reversing store. */
39662 if (GET_CODE (lhs) != MEM)
39663 return 0;
39664 if (GET_CODE (XEXP (lhs, 0)) == AND)
39665 return 0;
39666
39667 *special = SH_NOSWAP_ST;
39668 return 1;
39669 }
39670 else
39671 return 0;
39672 }
39673
39674 /* A convert to single precision can be left as is provided that
39675 all of its uses are in xxspltw instructions that splat BE element
39676 zero. */
39677 if (GET_CODE (body) == SET
39678 && GET_CODE (SET_SRC (body)) == UNSPEC
39679 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
39680 {
39681 df_ref def;
39682 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39683
39684 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39685 {
39686 struct df_link *link = DF_REF_CHAIN (def);
39687 if (!link)
39688 return 0;
39689
39690 for (; link; link = link->next) {
39691 rtx use_insn = DF_REF_INSN (link->ref);
39692 rtx use_body = PATTERN (use_insn);
39693 if (GET_CODE (use_body) != SET
39694 || GET_CODE (SET_SRC (use_body)) != UNSPEC
39695 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
39696 || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
39697 return 0;
39698 }
39699 }
39700
39701 return 1;
39702 }
39703
39704 /* A concatenation of two doublewords is ok if we reverse the
39705 order of the inputs. */
39706 if (GET_CODE (body) == SET
39707 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
39708 && (GET_MODE (SET_SRC (body)) == V2DFmode
39709 || GET_MODE (SET_SRC (body)) == V2DImode))
39710 {
39711 *special = SH_CONCAT;
39712 return 1;
39713 }
39714
39715 /* V2DF reductions are always swappable. */
39716 if (GET_CODE (body) == PARALLEL)
39717 {
39718 rtx expr = XVECEXP (body, 0, 0);
39719 if (GET_CODE (expr) == SET
39720 && v2df_reduction_p (SET_SRC (expr)))
39721 return 1;
39722 }
39723
39724 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
39725 constant pool. */
39726 if (GET_CODE (body) == SET
39727 && GET_CODE (SET_SRC (body)) == UNSPEC
39728 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
39729 && XVECLEN (SET_SRC (body), 0) == 3
39730 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
39731 {
39732 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
39733 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39734 df_ref use;
39735 FOR_EACH_INSN_INFO_USE (use, insn_info)
39736 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
39737 {
39738 struct df_link *def_link = DF_REF_CHAIN (use);
39739 /* Punt if multiple definitions for this reg. */
39740 if (def_link && !def_link->next &&
39741 const_load_sequence_p (insn_entry,
39742 DF_REF_INSN (def_link->ref)))
39743 {
39744 *special = SH_VPERM;
39745 return 1;
39746 }
39747 }
39748 }
39749
39750 /* Otherwise check the operands for vector lane violations. */
39751 return rtx_is_swappable_p (body, special);
39752 }
39753
39754 enum chain_purpose { FOR_LOADS, FOR_STORES };
39755
39756 /* Return true if the UD or DU chain headed by LINK is non-empty,
39757 and every entry on the chain references an insn that is a
39758 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
39759 register swap must have only permuting loads as reaching defs.
39760 If PURPOSE is FOR_STORES, each such register swap must have only
39761 register swaps or permuting stores as reached uses. */
39762 static bool
39763 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
39764 enum chain_purpose purpose)
39765 {
39766 if (!link)
39767 return false;
39768
39769 for (; link; link = link->next)
39770 {
39771 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
39772 continue;
39773
39774 if (DF_REF_IS_ARTIFICIAL (link->ref))
39775 return false;
39776
39777 rtx reached_insn = DF_REF_INSN (link->ref);
39778 unsigned uid = INSN_UID (reached_insn);
39779 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
39780
39781 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
39782 || insn_entry[uid].is_store)
39783 return false;
39784
39785 if (purpose == FOR_LOADS)
39786 {
39787 df_ref use;
39788 FOR_EACH_INSN_INFO_USE (use, insn_info)
39789 {
39790 struct df_link *swap_link = DF_REF_CHAIN (use);
39791
39792 while (swap_link)
39793 {
39794 if (DF_REF_IS_ARTIFICIAL (link->ref))
39795 return false;
39796
39797 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
39798 unsigned uid2 = INSN_UID (swap_def_insn);
39799
39800 /* Only permuting loads are allowed. */
39801 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
39802 return false;
39803
39804 swap_link = swap_link->next;
39805 }
39806 }
39807 }
39808 else if (purpose == FOR_STORES)
39809 {
39810 df_ref def;
39811 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39812 {
39813 struct df_link *swap_link = DF_REF_CHAIN (def);
39814
39815 while (swap_link)
39816 {
39817 if (DF_REF_IS_ARTIFICIAL (link->ref))
39818 return false;
39819
39820 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
39821 unsigned uid2 = INSN_UID (swap_use_insn);
39822
39823 /* Permuting stores or register swaps are allowed. */
39824 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
39825 return false;
39826
39827 swap_link = swap_link->next;
39828 }
39829 }
39830 }
39831 }
39832
39833 return true;
39834 }
39835
39836 /* Mark the xxswapdi instructions associated with permuting loads and
39837 stores for removal. Note that we only flag them for deletion here,
39838 as there is a possibility of a swap being reached from multiple
39839 loads, etc. */
39840 static void
39841 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
39842 {
39843 rtx insn = insn_entry[i].insn;
39844 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
39845
39846 if (insn_entry[i].is_load)
39847 {
39848 df_ref def;
39849 FOR_EACH_INSN_INFO_DEF (def, insn_info)
39850 {
39851 struct df_link *link = DF_REF_CHAIN (def);
39852
39853 /* We know by now that these are swaps, so we can delete
39854 them confidently. */
39855 while (link)
39856 {
39857 rtx use_insn = DF_REF_INSN (link->ref);
39858 insn_entry[INSN_UID (use_insn)].will_delete = 1;
39859 link = link->next;
39860 }
39861 }
39862 }
39863 else if (insn_entry[i].is_store)
39864 {
39865 df_ref use;
39866 FOR_EACH_INSN_INFO_USE (use, insn_info)
39867 {
39868 /* Ignore uses for addressability. */
39869 machine_mode mode = GET_MODE (DF_REF_REG (use));
39870 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
39871 continue;
39872
39873 struct df_link *link = DF_REF_CHAIN (use);
39874
39875 /* We know by now that these are swaps, so we can delete
39876 them confidently. */
39877 while (link)
39878 {
39879 rtx def_insn = DF_REF_INSN (link->ref);
39880 insn_entry[INSN_UID (def_insn)].will_delete = 1;
39881 link = link->next;
39882 }
39883 }
39884 }
39885 }
39886
39887 /* OP is either a CONST_VECTOR or an expression containing one.
39888 Swap the first half of the vector with the second in the first
39889 case. Recurse to find it in the second. */
39890 static void
39891 swap_const_vector_halves (rtx op)
39892 {
39893 int i;
39894 enum rtx_code code = GET_CODE (op);
39895 if (GET_CODE (op) == CONST_VECTOR)
39896 {
39897 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
39898 for (i = 0; i < half_units; ++i)
39899 {
39900 rtx temp = CONST_VECTOR_ELT (op, i);
39901 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
39902 CONST_VECTOR_ELT (op, i + half_units) = temp;
39903 }
39904 }
39905 else
39906 {
39907 int j;
39908 const char *fmt = GET_RTX_FORMAT (code);
39909 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39910 if (fmt[i] == 'e' || fmt[i] == 'u')
39911 swap_const_vector_halves (XEXP (op, i));
39912 else if (fmt[i] == 'E')
39913 for (j = 0; j < XVECLEN (op, i); ++j)
39914 swap_const_vector_halves (XVECEXP (op, i, j));
39915 }
39916 }
39917
39918 /* Find all subregs of a vector expression that perform a narrowing,
39919 and adjust the subreg index to account for doubleword swapping. */
39920 static void
39921 adjust_subreg_index (rtx op)
39922 {
39923 enum rtx_code code = GET_CODE (op);
39924 if (code == SUBREG
39925 && (GET_MODE_SIZE (GET_MODE (op))
39926 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
39927 {
39928 unsigned int index = SUBREG_BYTE (op);
39929 if (index < 8)
39930 index += 8;
39931 else
39932 index -= 8;
39933 SUBREG_BYTE (op) = index;
39934 }
39935
39936 const char *fmt = GET_RTX_FORMAT (code);
39937 int i,j;
39938 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
39939 if (fmt[i] == 'e' || fmt[i] == 'u')
39940 adjust_subreg_index (XEXP (op, i));
39941 else if (fmt[i] == 'E')
39942 for (j = 0; j < XVECLEN (op, i); ++j)
39943 adjust_subreg_index (XVECEXP (op, i, j));
39944 }
39945
39946 /* Convert the non-permuting load INSN to a permuting one. */
39947 static void
39948 permute_load (rtx_insn *insn)
39949 {
39950 rtx body = PATTERN (insn);
39951 rtx mem_op = SET_SRC (body);
39952 rtx tgt_reg = SET_DEST (body);
39953 machine_mode mode = GET_MODE (tgt_reg);
39954 int n_elts = GET_MODE_NUNITS (mode);
39955 int half_elts = n_elts / 2;
39956 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
39957 int i, j;
39958 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
39959 XVECEXP (par, 0, i) = GEN_INT (j);
39960 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
39961 XVECEXP (par, 0, i) = GEN_INT (j);
39962 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
39963 SET_SRC (body) = sel;
39964 INSN_CODE (insn) = -1; /* Force re-recognition. */
39965 df_insn_rescan (insn);
39966
39967 if (dump_file)
39968 fprintf (dump_file, "Replacing load %d with permuted load\n",
39969 INSN_UID (insn));
39970 }
39971
39972 /* Convert the non-permuting store INSN to a permuting one. */
39973 static void
39974 permute_store (rtx_insn *insn)
39975 {
39976 rtx body = PATTERN (insn);
39977 rtx src_reg = SET_SRC (body);
39978 machine_mode mode = GET_MODE (src_reg);
39979 int n_elts = GET_MODE_NUNITS (mode);
39980 int half_elts = n_elts / 2;
39981 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
39982 int i, j;
39983 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
39984 XVECEXP (par, 0, i) = GEN_INT (j);
39985 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
39986 XVECEXP (par, 0, i) = GEN_INT (j);
39987 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
39988 SET_SRC (body) = sel;
39989 INSN_CODE (insn) = -1; /* Force re-recognition. */
39990 df_insn_rescan (insn);
39991
39992 if (dump_file)
39993 fprintf (dump_file, "Replacing store %d with permuted store\n",
39994 INSN_UID (insn));
39995 }
39996
39997 /* Given OP that contains a vector extract operation, adjust the index
39998 of the extracted lane to account for the doubleword swap. */
39999 static void
40000 adjust_extract (rtx_insn *insn)
40001 {
40002 rtx pattern = PATTERN (insn);
40003 if (GET_CODE (pattern) == PARALLEL)
40004 pattern = XVECEXP (pattern, 0, 0);
40005 rtx src = SET_SRC (pattern);
40006 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
40007 account for that. */
40008 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
40009 rtx par = XEXP (sel, 1);
40010 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
40011 int lane = INTVAL (XVECEXP (par, 0, 0));
40012 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40013 XVECEXP (par, 0, 0) = GEN_INT (lane);
40014 INSN_CODE (insn) = -1; /* Force re-recognition. */
40015 df_insn_rescan (insn);
40016
40017 if (dump_file)
40018 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
40019 }
40020
40021 /* Given OP that contains a vector direct-splat operation, adjust the index
40022 of the source lane to account for the doubleword swap. */
40023 static void
40024 adjust_splat (rtx_insn *insn)
40025 {
40026 rtx body = PATTERN (insn);
40027 rtx unspec = XEXP (body, 1);
40028 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
40029 int lane = INTVAL (XVECEXP (unspec, 0, 1));
40030 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
40031 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
40032 INSN_CODE (insn) = -1; /* Force re-recognition. */
40033 df_insn_rescan (insn);
40034
40035 if (dump_file)
40036 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
40037 }
40038
40039 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
40040 swap), reverse the order of the source operands and adjust the indices
40041 of the source lanes to account for doubleword reversal. */
40042 static void
40043 adjust_xxpermdi (rtx_insn *insn)
40044 {
40045 rtx set = PATTERN (insn);
40046 rtx select = XEXP (set, 1);
40047 rtx concat = XEXP (select, 0);
40048 rtx src0 = XEXP (concat, 0);
40049 XEXP (concat, 0) = XEXP (concat, 1);
40050 XEXP (concat, 1) = src0;
40051 rtx parallel = XEXP (select, 1);
40052 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
40053 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
40054 int new_lane0 = 3 - lane1;
40055 int new_lane1 = 3 - lane0;
40056 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
40057 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
40058 INSN_CODE (insn) = -1; /* Force re-recognition. */
40059 df_insn_rescan (insn);
40060
40061 if (dump_file)
40062 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
40063 }
40064
40065 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
40066 reverse the order of those inputs. */
40067 static void
40068 adjust_concat (rtx_insn *insn)
40069 {
40070 rtx set = PATTERN (insn);
40071 rtx concat = XEXP (set, 1);
40072 rtx src0 = XEXP (concat, 0);
40073 XEXP (concat, 0) = XEXP (concat, 1);
40074 XEXP (concat, 1) = src0;
40075 INSN_CODE (insn) = -1; /* Force re-recognition. */
40076 df_insn_rescan (insn);
40077
40078 if (dump_file)
40079 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
40080 }
40081
40082 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
40083 constant pool to reflect swapped doublewords. */
40084 static void
40085 adjust_vperm (rtx_insn *insn)
40086 {
40087 /* We previously determined that the UNSPEC_VPERM was fed by a
40088 swap of a swapping load of a TOC-relative constant pool symbol.
40089 Find the MEM in the swapping load and replace it with a MEM for
40090 the adjusted mask constant. */
40091 rtx set = PATTERN (insn);
40092 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
40093
40094 /* Find the swap. */
40095 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40096 df_ref use;
40097 rtx_insn *swap_insn = 0;
40098 FOR_EACH_INSN_INFO_USE (use, insn_info)
40099 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
40100 {
40101 struct df_link *def_link = DF_REF_CHAIN (use);
40102 gcc_assert (def_link && !def_link->next);
40103 swap_insn = DF_REF_INSN (def_link->ref);
40104 break;
40105 }
40106 gcc_assert (swap_insn);
40107
40108 /* Find the load. */
40109 insn_info = DF_INSN_INFO_GET (swap_insn);
40110 rtx_insn *load_insn = 0;
40111 FOR_EACH_INSN_INFO_USE (use, insn_info)
40112 {
40113 struct df_link *def_link = DF_REF_CHAIN (use);
40114 gcc_assert (def_link && !def_link->next);
40115 load_insn = DF_REF_INSN (def_link->ref);
40116 break;
40117 }
40118 gcc_assert (load_insn);
40119
40120 /* Find the TOC-relative symbol access. */
40121 insn_info = DF_INSN_INFO_GET (load_insn);
40122 rtx_insn *tocrel_insn = 0;
40123 FOR_EACH_INSN_INFO_USE (use, insn_info)
40124 {
40125 struct df_link *def_link = DF_REF_CHAIN (use);
40126 gcc_assert (def_link && !def_link->next);
40127 tocrel_insn = DF_REF_INSN (def_link->ref);
40128 break;
40129 }
40130 gcc_assert (tocrel_insn);
40131
40132 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
40133 to set tocrel_base; otherwise it would be unnecessary as we've
40134 already established it will return true. */
40135 rtx base, offset;
40136 const_rtx tocrel_base;
40137 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
40138 /* There is an extra level of indirection for small/large code models. */
40139 if (GET_CODE (tocrel_expr) == MEM)
40140 tocrel_expr = XEXP (tocrel_expr, 0);
40141 if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
40142 gcc_unreachable ();
40143 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
40144 rtx const_vector = get_pool_constant (base);
40145 /* With the extra indirection, get_pool_constant will produce the
40146 real constant from the reg_equal expression, so get the real
40147 constant. */
40148 if (GET_CODE (const_vector) == SYMBOL_REF)
40149 const_vector = get_pool_constant (const_vector);
40150 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
40151
40152 /* Create an adjusted mask from the initial mask. */
40153 unsigned int new_mask[16], i, val;
40154 for (i = 0; i < 16; ++i) {
40155 val = INTVAL (XVECEXP (const_vector, 0, i));
40156 if (val < 16)
40157 new_mask[i] = (val + 8) % 16;
40158 else
40159 new_mask[i] = ((val + 8) % 16) + 16;
40160 }
40161
40162 /* Create a new CONST_VECTOR and a MEM that references it. */
40163 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
40164 for (i = 0; i < 16; ++i)
40165 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
40166 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
40167 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
40168 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
40169 can't recognize. Force the SYMBOL_REF into a register. */
40170 if (!REG_P (XEXP (new_mem, 0))) {
40171 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
40172 XEXP (new_mem, 0) = base_reg;
40173 /* Move the newly created insn ahead of the load insn. */
40174 rtx_insn *force_insn = get_last_insn ();
40175 remove_insn (force_insn);
40176 rtx_insn *before_load_insn = PREV_INSN (load_insn);
40177 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
40178 df_insn_rescan (before_load_insn);
40179 df_insn_rescan (force_insn);
40180 }
40181
40182 /* Replace the MEM in the load instruction and rescan it. */
40183 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
40184 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
40185 df_insn_rescan (load_insn);
40186
40187 if (dump_file)
40188 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
40189 }
40190
40191 /* The insn described by INSN_ENTRY[I] can be swapped, but only
40192 with special handling. Take care of that here. */
40193 static void
40194 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
40195 {
40196 rtx_insn *insn = insn_entry[i].insn;
40197 rtx body = PATTERN (insn);
40198
40199 switch (insn_entry[i].special_handling)
40200 {
40201 default:
40202 gcc_unreachable ();
40203 case SH_CONST_VECTOR:
40204 {
40205 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
40206 gcc_assert (GET_CODE (body) == SET);
40207 rtx rhs = SET_SRC (body);
40208 swap_const_vector_halves (rhs);
40209 if (dump_file)
40210 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
40211 break;
40212 }
40213 case SH_SUBREG:
40214 /* A subreg of the same size is already safe. For subregs that
40215 select a smaller portion of a reg, adjust the index for
40216 swapped doublewords. */
40217 adjust_subreg_index (body);
40218 if (dump_file)
40219 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
40220 break;
40221 case SH_NOSWAP_LD:
40222 /* Convert a non-permuting load to a permuting one. */
40223 permute_load (insn);
40224 break;
40225 case SH_NOSWAP_ST:
40226 /* Convert a non-permuting store to a permuting one. */
40227 permute_store (insn);
40228 break;
40229 case SH_EXTRACT:
40230 /* Change the lane on an extract operation. */
40231 adjust_extract (insn);
40232 break;
40233 case SH_SPLAT:
40234 /* Change the lane on a direct-splat operation. */
40235 adjust_splat (insn);
40236 break;
40237 case SH_XXPERMDI:
40238 /* Change the lanes on an XXPERMDI operation. */
40239 adjust_xxpermdi (insn);
40240 break;
40241 case SH_CONCAT:
40242 /* Reverse the order of a concatenation operation. */
40243 adjust_concat (insn);
40244 break;
40245 case SH_VPERM:
40246 /* Change the mask loaded from the constant pool for a VPERM. */
40247 adjust_vperm (insn);
40248 break;
40249 }
40250 }
40251
40252 /* Find the insn from the Ith table entry, which is known to be a
40253 register swap Y = SWAP(X). Replace it with a copy Y = X. */
40254 static void
40255 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
40256 {
40257 rtx_insn *insn = insn_entry[i].insn;
40258 rtx body = PATTERN (insn);
40259 rtx src_reg = XEXP (SET_SRC (body), 0);
40260 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
40261 rtx_insn *new_insn = emit_insn_before (copy, insn);
40262 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
40263 df_insn_rescan (new_insn);
40264
40265 if (dump_file)
40266 {
40267 unsigned int new_uid = INSN_UID (new_insn);
40268 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
40269 }
40270
40271 df_insn_delete (insn);
40272 remove_insn (insn);
40273 insn->set_deleted ();
40274 }
40275
40276 /* Dump the swap table to DUMP_FILE. */
40277 static void
40278 dump_swap_insn_table (swap_web_entry *insn_entry)
40279 {
40280 int e = get_max_uid ();
40281 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
40282
40283 for (int i = 0; i < e; ++i)
40284 if (insn_entry[i].is_relevant)
40285 {
40286 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
40287 fprintf (dump_file, "%6d %6d ", i,
40288 pred_entry && pred_entry->insn
40289 ? INSN_UID (pred_entry->insn) : 0);
40290 if (insn_entry[i].is_load)
40291 fputs ("load ", dump_file);
40292 if (insn_entry[i].is_store)
40293 fputs ("store ", dump_file);
40294 if (insn_entry[i].is_swap)
40295 fputs ("swap ", dump_file);
40296 if (insn_entry[i].is_live_in)
40297 fputs ("live-in ", dump_file);
40298 if (insn_entry[i].is_live_out)
40299 fputs ("live-out ", dump_file);
40300 if (insn_entry[i].contains_subreg)
40301 fputs ("subreg ", dump_file);
40302 if (insn_entry[i].is_128_int)
40303 fputs ("int128 ", dump_file);
40304 if (insn_entry[i].is_call)
40305 fputs ("call ", dump_file);
40306 if (insn_entry[i].is_swappable)
40307 {
40308 fputs ("swappable ", dump_file);
40309 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
40310 fputs ("special:constvec ", dump_file);
40311 else if (insn_entry[i].special_handling == SH_SUBREG)
40312 fputs ("special:subreg ", dump_file);
40313 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
40314 fputs ("special:load ", dump_file);
40315 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
40316 fputs ("special:store ", dump_file);
40317 else if (insn_entry[i].special_handling == SH_EXTRACT)
40318 fputs ("special:extract ", dump_file);
40319 else if (insn_entry[i].special_handling == SH_SPLAT)
40320 fputs ("special:splat ", dump_file);
40321 else if (insn_entry[i].special_handling == SH_XXPERMDI)
40322 fputs ("special:xxpermdi ", dump_file);
40323 else if (insn_entry[i].special_handling == SH_CONCAT)
40324 fputs ("special:concat ", dump_file);
40325 else if (insn_entry[i].special_handling == SH_VPERM)
40326 fputs ("special:vperm ", dump_file);
40327 }
40328 if (insn_entry[i].web_not_optimizable)
40329 fputs ("unoptimizable ", dump_file);
40330 if (insn_entry[i].will_delete)
40331 fputs ("delete ", dump_file);
40332 fputs ("\n", dump_file);
40333 }
40334 fputs ("\n", dump_file);
40335 }
40336
40337 /* Return RTX with its address canonicalized to (reg) or (+ reg reg).
40338 Here RTX is an (& addr (const_int -16)). Always return a new copy
40339 to avoid problems with combine. */
40340 static rtx
40341 alignment_with_canonical_addr (rtx align)
40342 {
40343 rtx canon;
40344 rtx addr = XEXP (align, 0);
40345
40346 if (REG_P (addr))
40347 canon = addr;
40348
40349 else if (GET_CODE (addr) == PLUS)
40350 {
40351 rtx addrop0 = XEXP (addr, 0);
40352 rtx addrop1 = XEXP (addr, 1);
40353
40354 if (!REG_P (addrop0))
40355 addrop0 = force_reg (GET_MODE (addrop0), addrop0);
40356
40357 if (!REG_P (addrop1))
40358 addrop1 = force_reg (GET_MODE (addrop1), addrop1);
40359
40360 canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1);
40361 }
40362
40363 else
40364 canon = force_reg (GET_MODE (addr), addr);
40365
40366 return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
40367 }
40368
40369 /* Check whether an rtx is an alignment mask, and if so, return
40370 a fully-expanded rtx for the masking operation. */
40371 static rtx
40372 alignment_mask (rtx_insn *insn)
40373 {
40374 rtx body = PATTERN (insn);
40375
40376 if (GET_CODE (body) != SET
40377 || GET_CODE (SET_SRC (body)) != AND
40378 || !REG_P (XEXP (SET_SRC (body), 0)))
40379 return 0;
40380
40381 rtx mask = XEXP (SET_SRC (body), 1);
40382
40383 if (GET_CODE (mask) == CONST_INT)
40384 {
40385 if (INTVAL (mask) == -16)
40386 return alignment_with_canonical_addr (SET_SRC (body));
40387 else
40388 return 0;
40389 }
40390
40391 if (!REG_P (mask))
40392 return 0;
40393
40394 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40395 df_ref use;
40396 rtx real_mask = 0;
40397
40398 FOR_EACH_INSN_INFO_USE (use, insn_info)
40399 {
40400 if (!rtx_equal_p (DF_REF_REG (use), mask))
40401 continue;
40402
40403 struct df_link *def_link = DF_REF_CHAIN (use);
40404 if (!def_link || def_link->next)
40405 return 0;
40406
40407 rtx_insn *const_insn = DF_REF_INSN (def_link->ref);
40408 rtx const_body = PATTERN (const_insn);
40409 if (GET_CODE (const_body) != SET)
40410 return 0;
40411
40412 real_mask = SET_SRC (const_body);
40413
40414 if (GET_CODE (real_mask) != CONST_INT
40415 || INTVAL (real_mask) != -16)
40416 return 0;
40417 }
40418
40419 if (real_mask == 0)
40420 return 0;
40421
40422 return alignment_with_canonical_addr (SET_SRC (body));
40423 }
40424
40425 /* Given INSN that's a load or store based at BASE_REG, look for a
40426 feeding computation that aligns its address on a 16-byte boundary. */
40427 static rtx
40428 find_alignment_op (rtx_insn *insn, rtx base_reg)
40429 {
40430 df_ref base_use;
40431 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40432 rtx and_operation = 0;
40433
40434 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
40435 {
40436 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
40437 continue;
40438
40439 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
40440 if (!base_def_link || base_def_link->next)
40441 break;
40442
40443 /* With stack-protector code enabled, and possibly in other
40444 circumstances, there may not be an associated insn for
40445 the def. */
40446 if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
40447 break;
40448
40449 rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
40450 and_operation = alignment_mask (and_insn);
40451 if (and_operation != 0)
40452 break;
40453 }
40454
40455 return and_operation;
40456 }
40457
40458 struct del_info { bool replace; rtx_insn *replace_insn; };
40459
40460 /* If INSN is the load for an lvx pattern, put it in canonical form. */
40461 static void
40462 recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
40463 {
40464 rtx body = PATTERN (insn);
40465 gcc_assert (GET_CODE (body) == SET
40466 && GET_CODE (SET_SRC (body)) == VEC_SELECT
40467 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM);
40468
40469 rtx mem = XEXP (SET_SRC (body), 0);
40470 rtx base_reg = XEXP (mem, 0);
40471
40472 rtx and_operation = find_alignment_op (insn, base_reg);
40473
40474 if (and_operation != 0)
40475 {
40476 df_ref def;
40477 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40478 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40479 {
40480 struct df_link *link = DF_REF_CHAIN (def);
40481 if (!link || link->next)
40482 break;
40483
40484 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
40485 if (!insn_is_swap_p (swap_insn)
40486 || insn_is_load_p (swap_insn)
40487 || insn_is_store_p (swap_insn))
40488 break;
40489
40490 /* Expected lvx pattern found. Change the swap to
40491 a copy, and propagate the AND operation into the
40492 load. */
40493 to_delete[INSN_UID (swap_insn)].replace = true;
40494 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
40495
40496 XEXP (mem, 0) = and_operation;
40497 SET_SRC (body) = mem;
40498 INSN_CODE (insn) = -1; /* Force re-recognition. */
40499 df_insn_rescan (insn);
40500
40501 if (dump_file)
40502 fprintf (dump_file, "lvx opportunity found at %d\n",
40503 INSN_UID (insn));
40504 }
40505 }
40506 }
40507
40508 /* If INSN is the store for an stvx pattern, put it in canonical form. */
40509 static void
40510 recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
40511 {
40512 rtx body = PATTERN (insn);
40513 gcc_assert (GET_CODE (body) == SET
40514 && GET_CODE (SET_DEST (body)) == MEM
40515 && GET_CODE (SET_SRC (body)) == VEC_SELECT);
40516 rtx mem = SET_DEST (body);
40517 rtx base_reg = XEXP (mem, 0);
40518
40519 rtx and_operation = find_alignment_op (insn, base_reg);
40520
40521 if (and_operation != 0)
40522 {
40523 rtx src_reg = XEXP (SET_SRC (body), 0);
40524 df_ref src_use;
40525 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40526 FOR_EACH_INSN_INFO_USE (src_use, insn_info)
40527 {
40528 if (!rtx_equal_p (DF_REF_REG (src_use), src_reg))
40529 continue;
40530
40531 struct df_link *link = DF_REF_CHAIN (src_use);
40532 if (!link || link->next)
40533 break;
40534
40535 rtx_insn *swap_insn = DF_REF_INSN (link->ref);
40536 if (!insn_is_swap_p (swap_insn)
40537 || insn_is_load_p (swap_insn)
40538 || insn_is_store_p (swap_insn))
40539 break;
40540
40541 /* Expected stvx pattern found. Change the swap to
40542 a copy, and propagate the AND operation into the
40543 store. */
40544 to_delete[INSN_UID (swap_insn)].replace = true;
40545 to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
40546
40547 XEXP (mem, 0) = and_operation;
40548 SET_SRC (body) = src_reg;
40549 INSN_CODE (insn) = -1; /* Force re-recognition. */
40550 df_insn_rescan (insn);
40551
40552 if (dump_file)
40553 fprintf (dump_file, "stvx opportunity found at %d\n",
40554 INSN_UID (insn));
40555 }
40556 }
40557 }
40558
40559 /* Look for patterns created from builtin lvx and stvx calls, and
40560 canonicalize them to be properly recognized as such. */
40561 static void
40562 recombine_lvx_stvx_patterns (function *fun)
40563 {
40564 int i;
40565 basic_block bb;
40566 rtx_insn *insn;
40567
40568 int num_insns = get_max_uid ();
40569 del_info *to_delete = XCNEWVEC (del_info, num_insns);
40570
40571 FOR_ALL_BB_FN (bb, fun)
40572 FOR_BB_INSNS (bb, insn)
40573 {
40574 if (!NONDEBUG_INSN_P (insn))
40575 continue;
40576
40577 if (insn_is_load_p (insn) && insn_is_swap_p (insn))
40578 recombine_lvx_pattern (insn, to_delete);
40579 else if (insn_is_store_p (insn) && insn_is_swap_p (insn))
40580 recombine_stvx_pattern (insn, to_delete);
40581 }
40582
40583 /* Turning swaps into copies is delayed until now, to avoid problems
40584 with deleting instructions during the insn walk. */
40585 for (i = 0; i < num_insns; i++)
40586 if (to_delete[i].replace)
40587 {
40588 rtx swap_body = PATTERN (to_delete[i].replace_insn);
40589 rtx src_reg = XEXP (SET_SRC (swap_body), 0);
40590 rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg);
40591 rtx_insn *new_insn = emit_insn_before (copy,
40592 to_delete[i].replace_insn);
40593 set_block_for_insn (new_insn,
40594 BLOCK_FOR_INSN (to_delete[i].replace_insn));
40595 df_insn_rescan (new_insn);
40596 df_insn_delete (to_delete[i].replace_insn);
40597 remove_insn (to_delete[i].replace_insn);
40598 to_delete[i].replace_insn->set_deleted ();
40599 }
40600
40601 free (to_delete);
40602 }
40603
40604 /* Main entry point for this pass. */
40605 unsigned int
40606 rs6000_analyze_swaps (function *fun)
40607 {
40608 swap_web_entry *insn_entry;
40609 basic_block bb;
40610 rtx_insn *insn, *curr_insn = 0;
40611
40612 /* Dataflow analysis for use-def chains. */
40613 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
40614 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
40615 df_analyze ();
40616 df_set_flags (DF_DEFER_INSN_RESCAN);
40617
40618 /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */
40619 recombine_lvx_stvx_patterns (fun);
40620
40621 /* Allocate structure to represent webs of insns. */
40622 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
40623
40624 /* Walk the insns to gather basic data. */
40625 FOR_ALL_BB_FN (bb, fun)
40626 FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
40627 {
40628 unsigned int uid = INSN_UID (insn);
40629 if (NONDEBUG_INSN_P (insn))
40630 {
40631 insn_entry[uid].insn = insn;
40632
40633 if (GET_CODE (insn) == CALL_INSN)
40634 insn_entry[uid].is_call = 1;
40635
40636 /* Walk the uses and defs to see if we mention vector regs.
40637 Record any constraints on optimization of such mentions. */
40638 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40639 df_ref mention;
40640 FOR_EACH_INSN_INFO_USE (mention, insn_info)
40641 {
40642 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40643 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40644
40645 /* If a use gets its value from a call insn, it will be
40646 a hard register and will look like (reg:V4SI 3 3).
40647 The df analysis creates two mentions for GPR3 and GPR4,
40648 both DImode. We must recognize this and treat it as a
40649 vector mention to ensure the call is unioned with this
40650 use. */
40651 if (mode == DImode && DF_REF_INSN_INFO (mention))
40652 {
40653 rtx feeder = DF_REF_INSN (mention);
40654 /* FIXME: It is pretty hard to get from the df mention
40655 to the mode of the use in the insn. We arbitrarily
40656 pick a vector mode here, even though the use might
40657 be a real DImode. We can be too conservative
40658 (create a web larger than necessary) because of
40659 this, so consider eventually fixing this. */
40660 if (GET_CODE (feeder) == CALL_INSN)
40661 mode = V4SImode;
40662 }
40663
40664 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40665 {
40666 insn_entry[uid].is_relevant = 1;
40667 if (mode == TImode || mode == V1TImode
40668 || FLOAT128_VECTOR_P (mode))
40669 insn_entry[uid].is_128_int = 1;
40670 if (DF_REF_INSN_INFO (mention))
40671 insn_entry[uid].contains_subreg
40672 = !rtx_equal_p (DF_REF_REG (mention),
40673 DF_REF_REAL_REG (mention));
40674 union_defs (insn_entry, insn, mention);
40675 }
40676 }
40677 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
40678 {
40679 /* We use DF_REF_REAL_REG here to get inside any subregs. */
40680 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
40681
40682 /* If we're loading up a hard vector register for a call,
40683 it looks like (set (reg:V4SI 9 9) (...)). The df
40684 analysis creates two mentions for GPR9 and GPR10, both
40685 DImode. So relying on the mode from the mentions
40686 isn't sufficient to ensure we union the call into the
40687 web with the parameter setup code. */
40688 if (mode == DImode && GET_CODE (insn) == SET
40689 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
40690 mode = GET_MODE (SET_DEST (insn));
40691
40692 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
40693 {
40694 insn_entry[uid].is_relevant = 1;
40695 if (mode == TImode || mode == V1TImode
40696 || FLOAT128_VECTOR_P (mode))
40697 insn_entry[uid].is_128_int = 1;
40698 if (DF_REF_INSN_INFO (mention))
40699 insn_entry[uid].contains_subreg
40700 = !rtx_equal_p (DF_REF_REG (mention),
40701 DF_REF_REAL_REG (mention));
40702 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
40703 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
40704 insn_entry[uid].is_live_out = 1;
40705 union_uses (insn_entry, insn, mention);
40706 }
40707 }
40708
40709 if (insn_entry[uid].is_relevant)
40710 {
40711 /* Determine if this is a load or store. */
40712 insn_entry[uid].is_load = insn_is_load_p (insn);
40713 insn_entry[uid].is_store = insn_is_store_p (insn);
40714
40715 /* Determine if this is a doubleword swap. If not,
40716 determine whether it can legally be swapped. */
40717 if (insn_is_swap_p (insn))
40718 insn_entry[uid].is_swap = 1;
40719 else
40720 {
40721 unsigned int special = SH_NONE;
40722 insn_entry[uid].is_swappable
40723 = insn_is_swappable_p (insn_entry, insn, &special);
40724 if (special != SH_NONE && insn_entry[uid].contains_subreg)
40725 insn_entry[uid].is_swappable = 0;
40726 else if (special != SH_NONE)
40727 insn_entry[uid].special_handling = special;
40728 else if (insn_entry[uid].contains_subreg)
40729 insn_entry[uid].special_handling = SH_SUBREG;
40730 }
40731 }
40732 }
40733 }
40734
40735 if (dump_file)
40736 {
40737 fprintf (dump_file, "\nSwap insn entry table when first built\n");
40738 dump_swap_insn_table (insn_entry);
40739 }
40740
40741 /* Record unoptimizable webs. */
40742 unsigned e = get_max_uid (), i;
40743 for (i = 0; i < e; ++i)
40744 {
40745 if (!insn_entry[i].is_relevant)
40746 continue;
40747
40748 swap_web_entry *root
40749 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
40750
40751 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
40752 || (insn_entry[i].contains_subreg
40753 && insn_entry[i].special_handling != SH_SUBREG)
40754 || insn_entry[i].is_128_int || insn_entry[i].is_call
40755 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
40756 root->web_not_optimizable = 1;
40757
40758 /* If we have loads or stores that aren't permuting then the
40759 optimization isn't appropriate. */
40760 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
40761 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
40762 root->web_not_optimizable = 1;
40763
40764 /* If we have permuting loads or stores that are not accompanied
40765 by a register swap, the optimization isn't appropriate. */
40766 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
40767 {
40768 rtx insn = insn_entry[i].insn;
40769 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40770 df_ref def;
40771
40772 FOR_EACH_INSN_INFO_DEF (def, insn_info)
40773 {
40774 struct df_link *link = DF_REF_CHAIN (def);
40775
40776 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
40777 {
40778 root->web_not_optimizable = 1;
40779 break;
40780 }
40781 }
40782 }
40783 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
40784 {
40785 rtx insn = insn_entry[i].insn;
40786 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
40787 df_ref use;
40788
40789 FOR_EACH_INSN_INFO_USE (use, insn_info)
40790 {
40791 struct df_link *link = DF_REF_CHAIN (use);
40792
40793 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
40794 {
40795 root->web_not_optimizable = 1;
40796 break;
40797 }
40798 }
40799 }
40800 }
40801
40802 if (dump_file)
40803 {
40804 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
40805 dump_swap_insn_table (insn_entry);
40806 }
40807
40808 /* For each load and store in an optimizable web (which implies
40809 the loads and stores are permuting), find the associated
40810 register swaps and mark them for removal. Due to various
40811 optimizations we may mark the same swap more than once. Also
40812 perform special handling for swappable insns that require it. */
40813 for (i = 0; i < e; ++i)
40814 if ((insn_entry[i].is_load || insn_entry[i].is_store)
40815 && insn_entry[i].is_swap)
40816 {
40817 swap_web_entry* root_entry
40818 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40819 if (!root_entry->web_not_optimizable)
40820 mark_swaps_for_removal (insn_entry, i);
40821 }
40822 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
40823 {
40824 swap_web_entry* root_entry
40825 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
40826 if (!root_entry->web_not_optimizable)
40827 handle_special_swappables (insn_entry, i);
40828 }
40829
40830 /* Now delete the swaps marked for removal. */
40831 for (i = 0; i < e; ++i)
40832 if (insn_entry[i].will_delete)
40833 replace_swap_with_copy (insn_entry, i);
40834
40835 /* Clean up. */
40836 free (insn_entry);
40837 return 0;
40838 }
40839
40840 const pass_data pass_data_analyze_swaps =
40841 {
40842 RTL_PASS, /* type */
40843 "swaps", /* name */
40844 OPTGROUP_NONE, /* optinfo_flags */
40845 TV_NONE, /* tv_id */
40846 0, /* properties_required */
40847 0, /* properties_provided */
40848 0, /* properties_destroyed */
40849 0, /* todo_flags_start */
40850 TODO_df_finish, /* todo_flags_finish */
40851 };
40852
40853 class pass_analyze_swaps : public rtl_opt_pass
40854 {
40855 public:
40856 pass_analyze_swaps(gcc::context *ctxt)
40857 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
40858 {}
40859
40860 /* opt_pass methods: */
40861 virtual bool gate (function *)
40862 {
40863 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
40864 && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
40865 }
40866
40867 virtual unsigned int execute (function *fun)
40868 {
40869 return rs6000_analyze_swaps (fun);
40870 }
40871
40872 opt_pass *clone ()
40873 {
40874 return new pass_analyze_swaps (m_ctxt);
40875 }
40876
40877 }; // class pass_analyze_swaps
40878
40879 rtl_opt_pass *
40880 make_pass_analyze_swaps (gcc::context *ctxt)
40881 {
40882 return new pass_analyze_swaps (ctxt);
40883 }
40884
40885 #ifdef RS6000_GLIBC_ATOMIC_FENV
40886 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
40887 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
40888 #endif
40889
40890 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
40891
40892 static void
40893 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
40894 {
40895 if (!TARGET_HARD_FLOAT)
40896 {
40897 #ifdef RS6000_GLIBC_ATOMIC_FENV
40898 if (atomic_hold_decl == NULL_TREE)
40899 {
40900 atomic_hold_decl
40901 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40902 get_identifier ("__atomic_feholdexcept"),
40903 build_function_type_list (void_type_node,
40904 double_ptr_type_node,
40905 NULL_TREE));
40906 TREE_PUBLIC (atomic_hold_decl) = 1;
40907 DECL_EXTERNAL (atomic_hold_decl) = 1;
40908 }
40909
40910 if (atomic_clear_decl == NULL_TREE)
40911 {
40912 atomic_clear_decl
40913 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40914 get_identifier ("__atomic_feclearexcept"),
40915 build_function_type_list (void_type_node,
40916 NULL_TREE));
40917 TREE_PUBLIC (atomic_clear_decl) = 1;
40918 DECL_EXTERNAL (atomic_clear_decl) = 1;
40919 }
40920
40921 tree const_double = build_qualified_type (double_type_node,
40922 TYPE_QUAL_CONST);
40923 tree const_double_ptr = build_pointer_type (const_double);
40924 if (atomic_update_decl == NULL_TREE)
40925 {
40926 atomic_update_decl
40927 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
40928 get_identifier ("__atomic_feupdateenv"),
40929 build_function_type_list (void_type_node,
40930 const_double_ptr,
40931 NULL_TREE));
40932 TREE_PUBLIC (atomic_update_decl) = 1;
40933 DECL_EXTERNAL (atomic_update_decl) = 1;
40934 }
40935
40936 tree fenv_var = create_tmp_var_raw (double_type_node);
40937 TREE_ADDRESSABLE (fenv_var) = 1;
40938 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
40939
40940 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
40941 *clear = build_call_expr (atomic_clear_decl, 0);
40942 *update = build_call_expr (atomic_update_decl, 1,
40943 fold_convert (const_double_ptr, fenv_addr));
40944 #endif
40945 return;
40946 }
40947
40948 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
40949 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
40950 tree call_mffs = build_call_expr (mffs, 0);
40951
40952 /* Generates the equivalent of feholdexcept (&fenv_var)
40953
40954 *fenv_var = __builtin_mffs ();
40955 double fenv_hold;
40956 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
40957 __builtin_mtfsf (0xff, fenv_hold); */
40958
40959 /* Mask to clear everything except for the rounding modes and non-IEEE
40960 arithmetic flag. */
40961 const unsigned HOST_WIDE_INT hold_exception_mask =
40962 HOST_WIDE_INT_C (0xffffffff00000007);
40963
40964 tree fenv_var = create_tmp_var_raw (double_type_node);
40965
40966 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
40967
40968 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
40969 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
40970 build_int_cst (uint64_type_node,
40971 hold_exception_mask));
40972
40973 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
40974 fenv_llu_and);
40975
40976 tree hold_mtfsf = build_call_expr (mtfsf, 2,
40977 build_int_cst (unsigned_type_node, 0xff),
40978 fenv_hold_mtfsf);
40979
40980 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
40981
40982 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
40983
40984 double fenv_clear = __builtin_mffs ();
40985 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
40986 __builtin_mtfsf (0xff, fenv_clear); */
40987
40988 /* Mask to clear everything except for the rounding modes and non-IEEE
40989 arithmetic flag. */
40990 const unsigned HOST_WIDE_INT clear_exception_mask =
40991 HOST_WIDE_INT_C (0xffffffff00000000);
40992
40993 tree fenv_clear = create_tmp_var_raw (double_type_node);
40994
40995 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
40996
40997 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
40998 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
40999 fenv_clean_llu,
41000 build_int_cst (uint64_type_node,
41001 clear_exception_mask));
41002
41003 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41004 fenv_clear_llu_and);
41005
41006 tree clear_mtfsf = build_call_expr (mtfsf, 2,
41007 build_int_cst (unsigned_type_node, 0xff),
41008 fenv_clear_mtfsf);
41009
41010 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
41011
41012 /* Generates the equivalent of feupdateenv (&fenv_var)
41013
41014 double old_fenv = __builtin_mffs ();
41015 double fenv_update;
41016 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
41017 (*(uint64_t*)fenv_var 0x1ff80fff);
41018 __builtin_mtfsf (0xff, fenv_update); */
41019
41020 const unsigned HOST_WIDE_INT update_exception_mask =
41021 HOST_WIDE_INT_C (0xffffffff1fffff00);
41022 const unsigned HOST_WIDE_INT new_exception_mask =
41023 HOST_WIDE_INT_C (0x1ff80fff);
41024
41025 tree old_fenv = create_tmp_var_raw (double_type_node);
41026 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
41027
41028 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
41029 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
41030 build_int_cst (uint64_type_node,
41031 update_exception_mask));
41032
41033 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
41034 build_int_cst (uint64_type_node,
41035 new_exception_mask));
41036
41037 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
41038 old_llu_and, new_llu_and);
41039
41040 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
41041 new_llu_mask);
41042
41043 tree update_mtfsf = build_call_expr (mtfsf, 2,
41044 build_int_cst (unsigned_type_node, 0xff),
41045 fenv_update_mtfsf);
41046
41047 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
41048 }
41049
41050 void
41051 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
41052 {
41053 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
41054
41055 rtx_tmp0 = gen_reg_rtx (V2DImode);
41056 rtx_tmp1 = gen_reg_rtx (V2DImode);
41057
41058 /* The destination of the vmrgew instruction layout is:
41059 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
41060 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
41061 vmrgew instruction will be correct. */
41062 if (VECTOR_ELT_ORDER_BIG)
41063 {
41064 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
41065 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
41066 }
41067 else
41068 {
41069 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
41070 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
41071 }
41072
41073 rtx_tmp2 = gen_reg_rtx (V4SFmode);
41074 rtx_tmp3 = gen_reg_rtx (V4SFmode);
41075
41076 if (signed_convert)
41077 {
41078 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
41079 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
41080 }
41081 else
41082 {
41083 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
41084 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
41085 }
41086
41087 if (VECTOR_ELT_ORDER_BIG)
41088 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
41089 else
41090 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
41091 }
41092
41093 void
41094 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
41095 rtx src2)
41096 {
41097 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
41098
41099 rtx_tmp0 = gen_reg_rtx (V2DFmode);
41100 rtx_tmp1 = gen_reg_rtx (V2DFmode);
41101
41102 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
41103 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
41104
41105 rtx_tmp2 = gen_reg_rtx (V4SImode);
41106 rtx_tmp3 = gen_reg_rtx (V4SImode);
41107
41108 if (signed_convert)
41109 {
41110 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
41111 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
41112 }
41113 else
41114 {
41115 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
41116 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
41117 }
41118
41119 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
41120 }
41121
41122 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
41123
41124 static bool
41125 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
41126 optimization_type opt_type)
41127 {
41128 switch (op)
41129 {
41130 case rsqrt_optab:
41131 return (opt_type == OPTIMIZE_FOR_SPEED
41132 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
41133
41134 default:
41135 return true;
41136 }
41137 }
41138 \f
41139 struct gcc_target targetm = TARGET_INITIALIZER;
41140
41141 #include "gt-rs6000.h"