]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
[PATCH, PPC 2/2] Fix Darwin bootstrap after split of rs6000.c.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "params.h"
66 #include "tm-constrs.h"
67 #include "tree-vectorizer.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "tree-ssa-propagate.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "rs6000-internal.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 #ifndef TARGET_NO_PROTOTYPE
88 #define TARGET_NO_PROTOTYPE 0
89 #endif
90
91 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
92 systems will also set long double to be IEEE 128-bit. AIX and Darwin
93 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
94 those systems will not pick up this default. This needs to be after all
95 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
96 properly defined. */
97 #ifndef TARGET_IEEEQUAD_DEFAULT
98 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
99 #define TARGET_IEEEQUAD_DEFAULT 1
100 #else
101 #define TARGET_IEEEQUAD_DEFAULT 0
102 #endif
103 #endif
104
105 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
106
107 /* Support targetm.vectorize.builtin_mask_for_load. */
108 static GTY(()) tree altivec_builtin_mask_for_load;
109
110 /* Set to nonzero once AIX common-mode calls have been defined. */
111 static GTY(()) int common_mode_defined;
112
113 #ifdef USING_ELFOS_H
114 /* Counter for labels which are to be placed in .fixup. */
115 int fixuplabelno = 0;
116 #endif
117
118 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
119 int dot_symbols;
120
121 /* Specify the machine mode that pointers have. After generation of rtl, the
122 compiler makes no further distinction between pointers and any other objects
123 of this machine mode. */
124 scalar_int_mode rs6000_pmode;
125
126 #if TARGET_ELF
127 /* Note whether IEEE 128-bit floating point was passed or returned, either as
128 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
129 floating point. We changed the default C++ mangling for these types and we
130 may want to generate a weak alias of the old mangling (U10__float128) to the
131 new mangling (u9__ieee128). */
132 static bool rs6000_passes_ieee128;
133 #endif
134
135 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
136 name used in current releases (i.e. u9__ieee128). */
137 static bool ieee128_mangling_gcc_8_1;
138
139 /* Width in bits of a pointer. */
140 unsigned rs6000_pointer_size;
141
142 #ifdef HAVE_AS_GNU_ATTRIBUTE
143 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
144 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
145 # endif
146 /* Flag whether floating point values have been passed/returned.
147 Note that this doesn't say whether fprs are used, since the
148 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
149 should be set for soft-float values passed in gprs and ieee128
150 values passed in vsx registers. */
151 static bool rs6000_passes_float;
152 static bool rs6000_passes_long_double;
153 /* Flag whether vector values have been passed/returned. */
154 static bool rs6000_passes_vector;
155 /* Flag whether small (<= 8 byte) structures have been returned. */
156 static bool rs6000_returns_struct;
157 #endif
158
159 /* Value is TRUE if register/mode pair is acceptable. */
160 static bool rs6000_hard_regno_mode_ok_p
161 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
162
163 /* Maximum number of registers needed for a given register class and mode. */
164 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
165
166 /* How many registers are needed for a given register and mode. */
167 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
168
169 /* Map register number to register class. */
170 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
171
172 static int dbg_cost_ctrl;
173
174 /* Built in types. */
175 tree rs6000_builtin_types[RS6000_BTI_MAX];
176 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
177
178 /* Flag to say the TOC is initialized */
179 int toc_initialized, need_toc_init;
180 char toc_label_name[10];
181
182 /* Cached value of rs6000_variable_issue. This is cached in
183 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
184 static short cached_can_issue_more;
185
186 static GTY(()) section *read_only_data_section;
187 static GTY(()) section *private_data_section;
188 static GTY(()) section *tls_data_section;
189 static GTY(()) section *tls_private_data_section;
190 static GTY(()) section *read_only_private_data_section;
191 static GTY(()) section *sdata2_section;
192
193 extern GTY(()) section *toc_section;
194 section *toc_section = 0;
195
196 struct builtin_description
197 {
198 const HOST_WIDE_INT mask;
199 const enum insn_code icode;
200 const char *const name;
201 const enum rs6000_builtins code;
202 };
203
204 /* Describe the vector unit used for modes. */
205 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
206 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
207
208 /* Register classes for various constraints that are based on the target
209 switches. */
210 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
211
212 /* Describe the alignment of a vector. */
213 int rs6000_vector_align[NUM_MACHINE_MODES];
214
215 /* Map selected modes to types for builtins. */
216 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
217
218 /* What modes to automatically generate reciprocal divide estimate (fre) and
219 reciprocal sqrt (frsqrte) for. */
220 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
221
222 /* Masks to determine which reciprocal esitmate instructions to generate
223 automatically. */
224 enum rs6000_recip_mask {
225 RECIP_SF_DIV = 0x001, /* Use divide estimate */
226 RECIP_DF_DIV = 0x002,
227 RECIP_V4SF_DIV = 0x004,
228 RECIP_V2DF_DIV = 0x008,
229
230 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
231 RECIP_DF_RSQRT = 0x020,
232 RECIP_V4SF_RSQRT = 0x040,
233 RECIP_V2DF_RSQRT = 0x080,
234
235 /* Various combination of flags for -mrecip=xxx. */
236 RECIP_NONE = 0,
237 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
238 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
239 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
240
241 RECIP_HIGH_PRECISION = RECIP_ALL,
242
243 /* On low precision machines like the power5, don't enable double precision
244 reciprocal square root estimate, since it isn't accurate enough. */
245 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
246 };
247
248 /* -mrecip options. */
249 static struct
250 {
251 const char *string; /* option name */
252 unsigned int mask; /* mask bits to set */
253 } recip_options[] = {
254 { "all", RECIP_ALL },
255 { "none", RECIP_NONE },
256 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
257 | RECIP_V2DF_DIV) },
258 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
259 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
260 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
261 | RECIP_V2DF_RSQRT) },
262 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
263 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
264 };
265
266 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
267 static const struct
268 {
269 const char *cpu;
270 unsigned int cpuid;
271 } cpu_is_info[] = {
272 { "power9", PPC_PLATFORM_POWER9 },
273 { "power8", PPC_PLATFORM_POWER8 },
274 { "power7", PPC_PLATFORM_POWER7 },
275 { "power6x", PPC_PLATFORM_POWER6X },
276 { "power6", PPC_PLATFORM_POWER6 },
277 { "power5+", PPC_PLATFORM_POWER5_PLUS },
278 { "power5", PPC_PLATFORM_POWER5 },
279 { "ppc970", PPC_PLATFORM_PPC970 },
280 { "power4", PPC_PLATFORM_POWER4 },
281 { "ppca2", PPC_PLATFORM_PPCA2 },
282 { "ppc476", PPC_PLATFORM_PPC476 },
283 { "ppc464", PPC_PLATFORM_PPC464 },
284 { "ppc440", PPC_PLATFORM_PPC440 },
285 { "ppc405", PPC_PLATFORM_PPC405 },
286 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
287 };
288
289 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
290 static const struct
291 {
292 const char *hwcap;
293 int mask;
294 unsigned int id;
295 } cpu_supports_info[] = {
296 /* AT_HWCAP masks. */
297 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
298 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
299 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
300 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
301 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
302 { "booke", PPC_FEATURE_BOOKE, 0 },
303 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
304 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
305 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
306 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
307 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
308 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
309 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
310 { "notb", PPC_FEATURE_NO_TB, 0 },
311 { "pa6t", PPC_FEATURE_PA6T, 0 },
312 { "power4", PPC_FEATURE_POWER4, 0 },
313 { "power5", PPC_FEATURE_POWER5, 0 },
314 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
315 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
316 { "ppc32", PPC_FEATURE_32, 0 },
317 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
318 { "ppc64", PPC_FEATURE_64, 0 },
319 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
320 { "smt", PPC_FEATURE_SMT, 0 },
321 { "spe", PPC_FEATURE_HAS_SPE, 0 },
322 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
323 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
324 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
325
326 /* AT_HWCAP2 masks. */
327 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
328 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
329 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
330 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
331 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
332 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
333 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
334 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
335 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
336 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
337 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
338 { "darn", PPC_FEATURE2_DARN, 1 },
339 { "scv", PPC_FEATURE2_SCV, 1 }
340 };
341
342 /* On PowerPC, we have a limited number of target clones that we care about
343 which means we can use an array to hold the options, rather than having more
344 elaborate data structures to identify each possible variation. Order the
345 clones from the default to the highest ISA. */
346 enum {
347 CLONE_DEFAULT = 0, /* default clone. */
348 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
349 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
350 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
351 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
352 CLONE_MAX
353 };
354
355 /* Map compiler ISA bits into HWCAP names. */
356 struct clone_map {
357 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
358 const char *name; /* name to use in __builtin_cpu_supports. */
359 };
360
361 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
362 { 0, "" }, /* Default options. */
363 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
364 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
365 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
366 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
367 };
368
369
370 /* Newer LIBCs explicitly export this symbol to declare that they provide
371 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
372 reference to this symbol whenever we expand a CPU builtin, so that
373 we never link against an old LIBC. */
374 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
375
376 /* True if we have expanded a CPU builtin. */
377 bool cpu_builtin_p;
378
379 /* Pointer to function (in rs6000-c.c) that can define or undefine target
380 macros that have changed. Languages that don't support the preprocessor
381 don't link in rs6000-c.c, so we can't call it directly. */
382 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
383
384 /* Simplfy register classes into simpler classifications. We assume
385 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
386 check for standard register classes (gpr/floating/altivec/vsx) and
387 floating/vector classes (float/altivec/vsx). */
388
389 enum rs6000_reg_type {
390 NO_REG_TYPE,
391 PSEUDO_REG_TYPE,
392 GPR_REG_TYPE,
393 VSX_REG_TYPE,
394 ALTIVEC_REG_TYPE,
395 FPR_REG_TYPE,
396 SPR_REG_TYPE,
397 CR_REG_TYPE
398 };
399
400 /* Map register class to register type. */
401 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
402
403 /* First/last register type for the 'normal' register types (i.e. general
404 purpose, floating point, altivec, and VSX registers). */
405 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
406
407 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
408
409
410 /* Register classes we care about in secondary reload or go if legitimate
411 address. We only need to worry about GPR, FPR, and Altivec registers here,
412 along an ANY field that is the OR of the 3 register classes. */
413
414 enum rs6000_reload_reg_type {
415 RELOAD_REG_GPR, /* General purpose registers. */
416 RELOAD_REG_FPR, /* Traditional floating point regs. */
417 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
418 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
419 N_RELOAD_REG
420 };
421
422 /* For setting up register classes, loop through the 3 register classes mapping
423 into real registers, and skip the ANY class, which is just an OR of the
424 bits. */
425 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
426 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
427
428 /* Map reload register type to a register in the register class. */
429 struct reload_reg_map_type {
430 const char *name; /* Register class name. */
431 int reg; /* Register in the register class. */
432 };
433
434 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
435 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
436 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
437 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
438 { "Any", -1 }, /* RELOAD_REG_ANY. */
439 };
440
441 /* Mask bits for each register class, indexed per mode. Historically the
442 compiler has been more restrictive which types can do PRE_MODIFY instead of
443 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
444 typedef unsigned char addr_mask_type;
445
446 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
447 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
448 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
449 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
450 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
451 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
452 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
453 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
454
455 /* Register type masks based on the type, of valid addressing modes. */
456 struct rs6000_reg_addr {
457 enum insn_code reload_load; /* INSN to reload for loading. */
458 enum insn_code reload_store; /* INSN to reload for storing. */
459 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
460 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
461 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
462 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
463 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
464 };
465
466 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
467
468 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
469 static inline bool
470 mode_supports_pre_incdec_p (machine_mode mode)
471 {
472 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
473 != 0);
474 }
475
476 /* Helper function to say whether a mode supports PRE_MODIFY. */
477 static inline bool
478 mode_supports_pre_modify_p (machine_mode mode)
479 {
480 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
481 != 0);
482 }
483
484 /* Return true if we have D-form addressing in altivec registers. */
485 static inline bool
486 mode_supports_vmx_dform (machine_mode mode)
487 {
488 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
489 }
490
491 /* Return true if we have D-form addressing in VSX registers. This addressing
492 is more limited than normal d-form addressing in that the offset must be
493 aligned on a 16-byte boundary. */
494 static inline bool
495 mode_supports_dq_form (machine_mode mode)
496 {
497 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
498 != 0);
499 }
500
501 /* Given that there exists at least one variable that is set (produced)
502 by OUT_INSN and read (consumed) by IN_INSN, return true iff
503 IN_INSN represents one or more memory store operations and none of
504 the variables set by OUT_INSN is used by IN_INSN as the address of a
505 store operation. If either IN_INSN or OUT_INSN does not represent
506 a "single" RTL SET expression (as loosely defined by the
507 implementation of the single_set function) or a PARALLEL with only
508 SETs, CLOBBERs, and USEs inside, this function returns false.
509
510 This rs6000-specific version of store_data_bypass_p checks for
511 certain conditions that result in assertion failures (and internal
512 compiler errors) in the generic store_data_bypass_p function and
513 returns false rather than calling store_data_bypass_p if one of the
514 problematic conditions is detected. */
515
516 int
517 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
518 {
519 rtx out_set, in_set;
520 rtx out_pat, in_pat;
521 rtx out_exp, in_exp;
522 int i, j;
523
524 in_set = single_set (in_insn);
525 if (in_set)
526 {
527 if (MEM_P (SET_DEST (in_set)))
528 {
529 out_set = single_set (out_insn);
530 if (!out_set)
531 {
532 out_pat = PATTERN (out_insn);
533 if (GET_CODE (out_pat) == PARALLEL)
534 {
535 for (i = 0; i < XVECLEN (out_pat, 0); i++)
536 {
537 out_exp = XVECEXP (out_pat, 0, i);
538 if ((GET_CODE (out_exp) == CLOBBER)
539 || (GET_CODE (out_exp) == USE))
540 continue;
541 else if (GET_CODE (out_exp) != SET)
542 return false;
543 }
544 }
545 }
546 }
547 }
548 else
549 {
550 in_pat = PATTERN (in_insn);
551 if (GET_CODE (in_pat) != PARALLEL)
552 return false;
553
554 for (i = 0; i < XVECLEN (in_pat, 0); i++)
555 {
556 in_exp = XVECEXP (in_pat, 0, i);
557 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
558 continue;
559 else if (GET_CODE (in_exp) != SET)
560 return false;
561
562 if (MEM_P (SET_DEST (in_exp)))
563 {
564 out_set = single_set (out_insn);
565 if (!out_set)
566 {
567 out_pat = PATTERN (out_insn);
568 if (GET_CODE (out_pat) != PARALLEL)
569 return false;
570 for (j = 0; j < XVECLEN (out_pat, 0); j++)
571 {
572 out_exp = XVECEXP (out_pat, 0, j);
573 if ((GET_CODE (out_exp) == CLOBBER)
574 || (GET_CODE (out_exp) == USE))
575 continue;
576 else if (GET_CODE (out_exp) != SET)
577 return false;
578 }
579 }
580 }
581 }
582 }
583 return store_data_bypass_p (out_insn, in_insn);
584 }
585
586 \f
587 /* Processor costs (relative to an add) */
588
589 const struct processor_costs *rs6000_cost;
590
591 /* Instruction size costs on 32bit processors. */
592 static const
593 struct processor_costs size32_cost = {
594 COSTS_N_INSNS (1), /* mulsi */
595 COSTS_N_INSNS (1), /* mulsi_const */
596 COSTS_N_INSNS (1), /* mulsi_const9 */
597 COSTS_N_INSNS (1), /* muldi */
598 COSTS_N_INSNS (1), /* divsi */
599 COSTS_N_INSNS (1), /* divdi */
600 COSTS_N_INSNS (1), /* fp */
601 COSTS_N_INSNS (1), /* dmul */
602 COSTS_N_INSNS (1), /* sdiv */
603 COSTS_N_INSNS (1), /* ddiv */
604 32, /* cache line size */
605 0, /* l1 cache */
606 0, /* l2 cache */
607 0, /* streams */
608 0, /* SF->DF convert */
609 };
610
611 /* Instruction size costs on 64bit processors. */
612 static const
613 struct processor_costs size64_cost = {
614 COSTS_N_INSNS (1), /* mulsi */
615 COSTS_N_INSNS (1), /* mulsi_const */
616 COSTS_N_INSNS (1), /* mulsi_const9 */
617 COSTS_N_INSNS (1), /* muldi */
618 COSTS_N_INSNS (1), /* divsi */
619 COSTS_N_INSNS (1), /* divdi */
620 COSTS_N_INSNS (1), /* fp */
621 COSTS_N_INSNS (1), /* dmul */
622 COSTS_N_INSNS (1), /* sdiv */
623 COSTS_N_INSNS (1), /* ddiv */
624 128, /* cache line size */
625 0, /* l1 cache */
626 0, /* l2 cache */
627 0, /* streams */
628 0, /* SF->DF convert */
629 };
630
631 /* Instruction costs on RS64A processors. */
632 static const
633 struct processor_costs rs64a_cost = {
634 COSTS_N_INSNS (20), /* mulsi */
635 COSTS_N_INSNS (12), /* mulsi_const */
636 COSTS_N_INSNS (8), /* mulsi_const9 */
637 COSTS_N_INSNS (34), /* muldi */
638 COSTS_N_INSNS (65), /* divsi */
639 COSTS_N_INSNS (67), /* divdi */
640 COSTS_N_INSNS (4), /* fp */
641 COSTS_N_INSNS (4), /* dmul */
642 COSTS_N_INSNS (31), /* sdiv */
643 COSTS_N_INSNS (31), /* ddiv */
644 128, /* cache line size */
645 128, /* l1 cache */
646 2048, /* l2 cache */
647 1, /* streams */
648 0, /* SF->DF convert */
649 };
650
651 /* Instruction costs on MPCCORE processors. */
652 static const
653 struct processor_costs mpccore_cost = {
654 COSTS_N_INSNS (2), /* mulsi */
655 COSTS_N_INSNS (2), /* mulsi_const */
656 COSTS_N_INSNS (2), /* mulsi_const9 */
657 COSTS_N_INSNS (2), /* muldi */
658 COSTS_N_INSNS (6), /* divsi */
659 COSTS_N_INSNS (6), /* divdi */
660 COSTS_N_INSNS (4), /* fp */
661 COSTS_N_INSNS (5), /* dmul */
662 COSTS_N_INSNS (10), /* sdiv */
663 COSTS_N_INSNS (17), /* ddiv */
664 32, /* cache line size */
665 4, /* l1 cache */
666 16, /* l2 cache */
667 1, /* streams */
668 0, /* SF->DF convert */
669 };
670
671 /* Instruction costs on PPC403 processors. */
672 static const
673 struct processor_costs ppc403_cost = {
674 COSTS_N_INSNS (4), /* mulsi */
675 COSTS_N_INSNS (4), /* mulsi_const */
676 COSTS_N_INSNS (4), /* mulsi_const9 */
677 COSTS_N_INSNS (4), /* muldi */
678 COSTS_N_INSNS (33), /* divsi */
679 COSTS_N_INSNS (33), /* divdi */
680 COSTS_N_INSNS (11), /* fp */
681 COSTS_N_INSNS (11), /* dmul */
682 COSTS_N_INSNS (11), /* sdiv */
683 COSTS_N_INSNS (11), /* ddiv */
684 32, /* cache line size */
685 4, /* l1 cache */
686 16, /* l2 cache */
687 1, /* streams */
688 0, /* SF->DF convert */
689 };
690
691 /* Instruction costs on PPC405 processors. */
692 static const
693 struct processor_costs ppc405_cost = {
694 COSTS_N_INSNS (5), /* mulsi */
695 COSTS_N_INSNS (4), /* mulsi_const */
696 COSTS_N_INSNS (3), /* mulsi_const9 */
697 COSTS_N_INSNS (5), /* muldi */
698 COSTS_N_INSNS (35), /* divsi */
699 COSTS_N_INSNS (35), /* divdi */
700 COSTS_N_INSNS (11), /* fp */
701 COSTS_N_INSNS (11), /* dmul */
702 COSTS_N_INSNS (11), /* sdiv */
703 COSTS_N_INSNS (11), /* ddiv */
704 32, /* cache line size */
705 16, /* l1 cache */
706 128, /* l2 cache */
707 1, /* streams */
708 0, /* SF->DF convert */
709 };
710
711 /* Instruction costs on PPC440 processors. */
712 static const
713 struct processor_costs ppc440_cost = {
714 COSTS_N_INSNS (3), /* mulsi */
715 COSTS_N_INSNS (2), /* mulsi_const */
716 COSTS_N_INSNS (2), /* mulsi_const9 */
717 COSTS_N_INSNS (3), /* muldi */
718 COSTS_N_INSNS (34), /* divsi */
719 COSTS_N_INSNS (34), /* divdi */
720 COSTS_N_INSNS (5), /* fp */
721 COSTS_N_INSNS (5), /* dmul */
722 COSTS_N_INSNS (19), /* sdiv */
723 COSTS_N_INSNS (33), /* ddiv */
724 32, /* cache line size */
725 32, /* l1 cache */
726 256, /* l2 cache */
727 1, /* streams */
728 0, /* SF->DF convert */
729 };
730
731 /* Instruction costs on PPC476 processors. */
732 static const
733 struct processor_costs ppc476_cost = {
734 COSTS_N_INSNS (4), /* mulsi */
735 COSTS_N_INSNS (4), /* mulsi_const */
736 COSTS_N_INSNS (4), /* mulsi_const9 */
737 COSTS_N_INSNS (4), /* muldi */
738 COSTS_N_INSNS (11), /* divsi */
739 COSTS_N_INSNS (11), /* divdi */
740 COSTS_N_INSNS (6), /* fp */
741 COSTS_N_INSNS (6), /* dmul */
742 COSTS_N_INSNS (19), /* sdiv */
743 COSTS_N_INSNS (33), /* ddiv */
744 32, /* l1 cache line size */
745 32, /* l1 cache */
746 512, /* l2 cache */
747 1, /* streams */
748 0, /* SF->DF convert */
749 };
750
751 /* Instruction costs on PPC601 processors. */
752 static const
753 struct processor_costs ppc601_cost = {
754 COSTS_N_INSNS (5), /* mulsi */
755 COSTS_N_INSNS (5), /* mulsi_const */
756 COSTS_N_INSNS (5), /* mulsi_const9 */
757 COSTS_N_INSNS (5), /* muldi */
758 COSTS_N_INSNS (36), /* divsi */
759 COSTS_N_INSNS (36), /* divdi */
760 COSTS_N_INSNS (4), /* fp */
761 COSTS_N_INSNS (5), /* dmul */
762 COSTS_N_INSNS (17), /* sdiv */
763 COSTS_N_INSNS (31), /* ddiv */
764 32, /* cache line size */
765 32, /* l1 cache */
766 256, /* l2 cache */
767 1, /* streams */
768 0, /* SF->DF convert */
769 };
770
771 /* Instruction costs on PPC603 processors. */
772 static const
773 struct processor_costs ppc603_cost = {
774 COSTS_N_INSNS (5), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (2), /* mulsi_const9 */
777 COSTS_N_INSNS (5), /* muldi */
778 COSTS_N_INSNS (37), /* divsi */
779 COSTS_N_INSNS (37), /* divdi */
780 COSTS_N_INSNS (3), /* fp */
781 COSTS_N_INSNS (4), /* dmul */
782 COSTS_N_INSNS (18), /* sdiv */
783 COSTS_N_INSNS (33), /* ddiv */
784 32, /* cache line size */
785 8, /* l1 cache */
786 64, /* l2 cache */
787 1, /* streams */
788 0, /* SF->DF convert */
789 };
790
791 /* Instruction costs on PPC604 processors. */
792 static const
793 struct processor_costs ppc604_cost = {
794 COSTS_N_INSNS (4), /* mulsi */
795 COSTS_N_INSNS (4), /* mulsi_const */
796 COSTS_N_INSNS (4), /* mulsi_const9 */
797 COSTS_N_INSNS (4), /* muldi */
798 COSTS_N_INSNS (20), /* divsi */
799 COSTS_N_INSNS (20), /* divdi */
800 COSTS_N_INSNS (3), /* fp */
801 COSTS_N_INSNS (3), /* dmul */
802 COSTS_N_INSNS (18), /* sdiv */
803 COSTS_N_INSNS (32), /* ddiv */
804 32, /* cache line size */
805 16, /* l1 cache */
806 512, /* l2 cache */
807 1, /* streams */
808 0, /* SF->DF convert */
809 };
810
811 /* Instruction costs on PPC604e processors. */
812 static const
813 struct processor_costs ppc604e_cost = {
814 COSTS_N_INSNS (2), /* mulsi */
815 COSTS_N_INSNS (2), /* mulsi_const */
816 COSTS_N_INSNS (2), /* mulsi_const9 */
817 COSTS_N_INSNS (2), /* muldi */
818 COSTS_N_INSNS (20), /* divsi */
819 COSTS_N_INSNS (20), /* divdi */
820 COSTS_N_INSNS (3), /* fp */
821 COSTS_N_INSNS (3), /* dmul */
822 COSTS_N_INSNS (18), /* sdiv */
823 COSTS_N_INSNS (32), /* ddiv */
824 32, /* cache line size */
825 32, /* l1 cache */
826 1024, /* l2 cache */
827 1, /* streams */
828 0, /* SF->DF convert */
829 };
830
831 /* Instruction costs on PPC620 processors. */
832 static const
833 struct processor_costs ppc620_cost = {
834 COSTS_N_INSNS (5), /* mulsi */
835 COSTS_N_INSNS (4), /* mulsi_const */
836 COSTS_N_INSNS (3), /* mulsi_const9 */
837 COSTS_N_INSNS (7), /* muldi */
838 COSTS_N_INSNS (21), /* divsi */
839 COSTS_N_INSNS (37), /* divdi */
840 COSTS_N_INSNS (3), /* fp */
841 COSTS_N_INSNS (3), /* dmul */
842 COSTS_N_INSNS (18), /* sdiv */
843 COSTS_N_INSNS (32), /* ddiv */
844 128, /* cache line size */
845 32, /* l1 cache */
846 1024, /* l2 cache */
847 1, /* streams */
848 0, /* SF->DF convert */
849 };
850
851 /* Instruction costs on PPC630 processors. */
852 static const
853 struct processor_costs ppc630_cost = {
854 COSTS_N_INSNS (5), /* mulsi */
855 COSTS_N_INSNS (4), /* mulsi_const */
856 COSTS_N_INSNS (3), /* mulsi_const9 */
857 COSTS_N_INSNS (7), /* muldi */
858 COSTS_N_INSNS (21), /* divsi */
859 COSTS_N_INSNS (37), /* divdi */
860 COSTS_N_INSNS (3), /* fp */
861 COSTS_N_INSNS (3), /* dmul */
862 COSTS_N_INSNS (17), /* sdiv */
863 COSTS_N_INSNS (21), /* ddiv */
864 128, /* cache line size */
865 64, /* l1 cache */
866 1024, /* l2 cache */
867 1, /* streams */
868 0, /* SF->DF convert */
869 };
870
871 /* Instruction costs on Cell processor. */
872 /* COSTS_N_INSNS (1) ~ one add. */
873 static const
874 struct processor_costs ppccell_cost = {
875 COSTS_N_INSNS (9/2)+2, /* mulsi */
876 COSTS_N_INSNS (6/2), /* mulsi_const */
877 COSTS_N_INSNS (6/2), /* mulsi_const9 */
878 COSTS_N_INSNS (15/2)+2, /* muldi */
879 COSTS_N_INSNS (38/2), /* divsi */
880 COSTS_N_INSNS (70/2), /* divdi */
881 COSTS_N_INSNS (10/2), /* fp */
882 COSTS_N_INSNS (10/2), /* dmul */
883 COSTS_N_INSNS (74/2), /* sdiv */
884 COSTS_N_INSNS (74/2), /* ddiv */
885 128, /* cache line size */
886 32, /* l1 cache */
887 512, /* l2 cache */
888 6, /* streams */
889 0, /* SF->DF convert */
890 };
891
892 /* Instruction costs on PPC750 and PPC7400 processors. */
893 static const
894 struct processor_costs ppc750_cost = {
895 COSTS_N_INSNS (5), /* mulsi */
896 COSTS_N_INSNS (3), /* mulsi_const */
897 COSTS_N_INSNS (2), /* mulsi_const9 */
898 COSTS_N_INSNS (5), /* muldi */
899 COSTS_N_INSNS (17), /* divsi */
900 COSTS_N_INSNS (17), /* divdi */
901 COSTS_N_INSNS (3), /* fp */
902 COSTS_N_INSNS (3), /* dmul */
903 COSTS_N_INSNS (17), /* sdiv */
904 COSTS_N_INSNS (31), /* ddiv */
905 32, /* cache line size */
906 32, /* l1 cache */
907 512, /* l2 cache */
908 1, /* streams */
909 0, /* SF->DF convert */
910 };
911
912 /* Instruction costs on PPC7450 processors. */
913 static const
914 struct processor_costs ppc7450_cost = {
915 COSTS_N_INSNS (4), /* mulsi */
916 COSTS_N_INSNS (3), /* mulsi_const */
917 COSTS_N_INSNS (3), /* mulsi_const9 */
918 COSTS_N_INSNS (4), /* muldi */
919 COSTS_N_INSNS (23), /* divsi */
920 COSTS_N_INSNS (23), /* divdi */
921 COSTS_N_INSNS (5), /* fp */
922 COSTS_N_INSNS (5), /* dmul */
923 COSTS_N_INSNS (21), /* sdiv */
924 COSTS_N_INSNS (35), /* ddiv */
925 32, /* cache line size */
926 32, /* l1 cache */
927 1024, /* l2 cache */
928 1, /* streams */
929 0, /* SF->DF convert */
930 };
931
932 /* Instruction costs on PPC8540 processors. */
933 static const
934 struct processor_costs ppc8540_cost = {
935 COSTS_N_INSNS (4), /* mulsi */
936 COSTS_N_INSNS (4), /* mulsi_const */
937 COSTS_N_INSNS (4), /* mulsi_const9 */
938 COSTS_N_INSNS (4), /* muldi */
939 COSTS_N_INSNS (19), /* divsi */
940 COSTS_N_INSNS (19), /* divdi */
941 COSTS_N_INSNS (4), /* fp */
942 COSTS_N_INSNS (4), /* dmul */
943 COSTS_N_INSNS (29), /* sdiv */
944 COSTS_N_INSNS (29), /* ddiv */
945 32, /* cache line size */
946 32, /* l1 cache */
947 256, /* l2 cache */
948 1, /* prefetch streams /*/
949 0, /* SF->DF convert */
950 };
951
952 /* Instruction costs on E300C2 and E300C3 cores. */
953 static const
954 struct processor_costs ppce300c2c3_cost = {
955 COSTS_N_INSNS (4), /* mulsi */
956 COSTS_N_INSNS (4), /* mulsi_const */
957 COSTS_N_INSNS (4), /* mulsi_const9 */
958 COSTS_N_INSNS (4), /* muldi */
959 COSTS_N_INSNS (19), /* divsi */
960 COSTS_N_INSNS (19), /* divdi */
961 COSTS_N_INSNS (3), /* fp */
962 COSTS_N_INSNS (4), /* dmul */
963 COSTS_N_INSNS (18), /* sdiv */
964 COSTS_N_INSNS (33), /* ddiv */
965 32,
966 16, /* l1 cache */
967 16, /* l2 cache */
968 1, /* prefetch streams /*/
969 0, /* SF->DF convert */
970 };
971
972 /* Instruction costs on PPCE500MC processors. */
973 static const
974 struct processor_costs ppce500mc_cost = {
975 COSTS_N_INSNS (4), /* mulsi */
976 COSTS_N_INSNS (4), /* mulsi_const */
977 COSTS_N_INSNS (4), /* mulsi_const9 */
978 COSTS_N_INSNS (4), /* muldi */
979 COSTS_N_INSNS (14), /* divsi */
980 COSTS_N_INSNS (14), /* divdi */
981 COSTS_N_INSNS (8), /* fp */
982 COSTS_N_INSNS (10), /* dmul */
983 COSTS_N_INSNS (36), /* sdiv */
984 COSTS_N_INSNS (66), /* ddiv */
985 64, /* cache line size */
986 32, /* l1 cache */
987 128, /* l2 cache */
988 1, /* prefetch streams /*/
989 0, /* SF->DF convert */
990 };
991
992 /* Instruction costs on PPCE500MC64 processors. */
993 static const
994 struct processor_costs ppce500mc64_cost = {
995 COSTS_N_INSNS (4), /* mulsi */
996 COSTS_N_INSNS (4), /* mulsi_const */
997 COSTS_N_INSNS (4), /* mulsi_const9 */
998 COSTS_N_INSNS (4), /* muldi */
999 COSTS_N_INSNS (14), /* divsi */
1000 COSTS_N_INSNS (14), /* divdi */
1001 COSTS_N_INSNS (4), /* fp */
1002 COSTS_N_INSNS (10), /* dmul */
1003 COSTS_N_INSNS (36), /* sdiv */
1004 COSTS_N_INSNS (66), /* ddiv */
1005 64, /* cache line size */
1006 32, /* l1 cache */
1007 128, /* l2 cache */
1008 1, /* prefetch streams /*/
1009 0, /* SF->DF convert */
1010 };
1011
1012 /* Instruction costs on PPCE5500 processors. */
1013 static const
1014 struct processor_costs ppce5500_cost = {
1015 COSTS_N_INSNS (5), /* mulsi */
1016 COSTS_N_INSNS (5), /* mulsi_const */
1017 COSTS_N_INSNS (4), /* mulsi_const9 */
1018 COSTS_N_INSNS (5), /* muldi */
1019 COSTS_N_INSNS (14), /* divsi */
1020 COSTS_N_INSNS (14), /* divdi */
1021 COSTS_N_INSNS (7), /* fp */
1022 COSTS_N_INSNS (10), /* dmul */
1023 COSTS_N_INSNS (36), /* sdiv */
1024 COSTS_N_INSNS (66), /* ddiv */
1025 64, /* cache line size */
1026 32, /* l1 cache */
1027 128, /* l2 cache */
1028 1, /* prefetch streams /*/
1029 0, /* SF->DF convert */
1030 };
1031
1032 /* Instruction costs on PPCE6500 processors. */
1033 static const
1034 struct processor_costs ppce6500_cost = {
1035 COSTS_N_INSNS (5), /* mulsi */
1036 COSTS_N_INSNS (5), /* mulsi_const */
1037 COSTS_N_INSNS (4), /* mulsi_const9 */
1038 COSTS_N_INSNS (5), /* muldi */
1039 COSTS_N_INSNS (14), /* divsi */
1040 COSTS_N_INSNS (14), /* divdi */
1041 COSTS_N_INSNS (7), /* fp */
1042 COSTS_N_INSNS (10), /* dmul */
1043 COSTS_N_INSNS (36), /* sdiv */
1044 COSTS_N_INSNS (66), /* ddiv */
1045 64, /* cache line size */
1046 32, /* l1 cache */
1047 128, /* l2 cache */
1048 1, /* prefetch streams /*/
1049 0, /* SF->DF convert */
1050 };
1051
1052 /* Instruction costs on AppliedMicro Titan processors. */
1053 static const
1054 struct processor_costs titan_cost = {
1055 COSTS_N_INSNS (5), /* mulsi */
1056 COSTS_N_INSNS (5), /* mulsi_const */
1057 COSTS_N_INSNS (5), /* mulsi_const9 */
1058 COSTS_N_INSNS (5), /* muldi */
1059 COSTS_N_INSNS (18), /* divsi */
1060 COSTS_N_INSNS (18), /* divdi */
1061 COSTS_N_INSNS (10), /* fp */
1062 COSTS_N_INSNS (10), /* dmul */
1063 COSTS_N_INSNS (46), /* sdiv */
1064 COSTS_N_INSNS (72), /* ddiv */
1065 32, /* cache line size */
1066 32, /* l1 cache */
1067 512, /* l2 cache */
1068 1, /* prefetch streams /*/
1069 0, /* SF->DF convert */
1070 };
1071
1072 /* Instruction costs on POWER4 and POWER5 processors. */
1073 static const
1074 struct processor_costs power4_cost = {
1075 COSTS_N_INSNS (3), /* mulsi */
1076 COSTS_N_INSNS (2), /* mulsi_const */
1077 COSTS_N_INSNS (2), /* mulsi_const9 */
1078 COSTS_N_INSNS (4), /* muldi */
1079 COSTS_N_INSNS (18), /* divsi */
1080 COSTS_N_INSNS (34), /* divdi */
1081 COSTS_N_INSNS (3), /* fp */
1082 COSTS_N_INSNS (3), /* dmul */
1083 COSTS_N_INSNS (17), /* sdiv */
1084 COSTS_N_INSNS (17), /* ddiv */
1085 128, /* cache line size */
1086 32, /* l1 cache */
1087 1024, /* l2 cache */
1088 8, /* prefetch streams /*/
1089 0, /* SF->DF convert */
1090 };
1091
1092 /* Instruction costs on POWER6 processors. */
1093 static const
1094 struct processor_costs power6_cost = {
1095 COSTS_N_INSNS (8), /* mulsi */
1096 COSTS_N_INSNS (8), /* mulsi_const */
1097 COSTS_N_INSNS (8), /* mulsi_const9 */
1098 COSTS_N_INSNS (8), /* muldi */
1099 COSTS_N_INSNS (22), /* divsi */
1100 COSTS_N_INSNS (28), /* divdi */
1101 COSTS_N_INSNS (3), /* fp */
1102 COSTS_N_INSNS (3), /* dmul */
1103 COSTS_N_INSNS (13), /* sdiv */
1104 COSTS_N_INSNS (16), /* ddiv */
1105 128, /* cache line size */
1106 64, /* l1 cache */
1107 2048, /* l2 cache */
1108 16, /* prefetch streams */
1109 0, /* SF->DF convert */
1110 };
1111
1112 /* Instruction costs on POWER7 processors. */
1113 static const
1114 struct processor_costs power7_cost = {
1115 COSTS_N_INSNS (2), /* mulsi */
1116 COSTS_N_INSNS (2), /* mulsi_const */
1117 COSTS_N_INSNS (2), /* mulsi_const9 */
1118 COSTS_N_INSNS (2), /* muldi */
1119 COSTS_N_INSNS (18), /* divsi */
1120 COSTS_N_INSNS (34), /* divdi */
1121 COSTS_N_INSNS (3), /* fp */
1122 COSTS_N_INSNS (3), /* dmul */
1123 COSTS_N_INSNS (13), /* sdiv */
1124 COSTS_N_INSNS (16), /* ddiv */
1125 128, /* cache line size */
1126 32, /* l1 cache */
1127 256, /* l2 cache */
1128 12, /* prefetch streams */
1129 COSTS_N_INSNS (3), /* SF->DF convert */
1130 };
1131
1132 /* Instruction costs on POWER8 processors. */
1133 static const
1134 struct processor_costs power8_cost = {
1135 COSTS_N_INSNS (3), /* mulsi */
1136 COSTS_N_INSNS (3), /* mulsi_const */
1137 COSTS_N_INSNS (3), /* mulsi_const9 */
1138 COSTS_N_INSNS (3), /* muldi */
1139 COSTS_N_INSNS (19), /* divsi */
1140 COSTS_N_INSNS (35), /* divdi */
1141 COSTS_N_INSNS (3), /* fp */
1142 COSTS_N_INSNS (3), /* dmul */
1143 COSTS_N_INSNS (14), /* sdiv */
1144 COSTS_N_INSNS (17), /* ddiv */
1145 128, /* cache line size */
1146 32, /* l1 cache */
1147 256, /* l2 cache */
1148 12, /* prefetch streams */
1149 COSTS_N_INSNS (3), /* SF->DF convert */
1150 };
1151
1152 /* Instruction costs on POWER9 processors. */
1153 static const
1154 struct processor_costs power9_cost = {
1155 COSTS_N_INSNS (3), /* mulsi */
1156 COSTS_N_INSNS (3), /* mulsi_const */
1157 COSTS_N_INSNS (3), /* mulsi_const9 */
1158 COSTS_N_INSNS (3), /* muldi */
1159 COSTS_N_INSNS (8), /* divsi */
1160 COSTS_N_INSNS (12), /* divdi */
1161 COSTS_N_INSNS (3), /* fp */
1162 COSTS_N_INSNS (3), /* dmul */
1163 COSTS_N_INSNS (13), /* sdiv */
1164 COSTS_N_INSNS (18), /* ddiv */
1165 128, /* cache line size */
1166 32, /* l1 cache */
1167 512, /* l2 cache */
1168 8, /* prefetch streams */
1169 COSTS_N_INSNS (3), /* SF->DF convert */
1170 };
1171
1172 /* Instruction costs on POWER A2 processors. */
1173 static const
1174 struct processor_costs ppca2_cost = {
1175 COSTS_N_INSNS (16), /* mulsi */
1176 COSTS_N_INSNS (16), /* mulsi_const */
1177 COSTS_N_INSNS (16), /* mulsi_const9 */
1178 COSTS_N_INSNS (16), /* muldi */
1179 COSTS_N_INSNS (22), /* divsi */
1180 COSTS_N_INSNS (28), /* divdi */
1181 COSTS_N_INSNS (3), /* fp */
1182 COSTS_N_INSNS (3), /* dmul */
1183 COSTS_N_INSNS (59), /* sdiv */
1184 COSTS_N_INSNS (72), /* ddiv */
1185 64,
1186 16, /* l1 cache */
1187 2048, /* l2 cache */
1188 16, /* prefetch streams */
1189 0, /* SF->DF convert */
1190 };
1191
1192 \f
1193 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1194 #undef RS6000_BUILTIN_0
1195 #undef RS6000_BUILTIN_1
1196 #undef RS6000_BUILTIN_2
1197 #undef RS6000_BUILTIN_3
1198 #undef RS6000_BUILTIN_A
1199 #undef RS6000_BUILTIN_D
1200 #undef RS6000_BUILTIN_H
1201 #undef RS6000_BUILTIN_P
1202 #undef RS6000_BUILTIN_X
1203
1204 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1205 { NAME, ICODE, MASK, ATTR },
1206
1207 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1208 { NAME, ICODE, MASK, ATTR },
1209
1210 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1211 { NAME, ICODE, MASK, ATTR },
1212
1213 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1214 { NAME, ICODE, MASK, ATTR },
1215
1216 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1217 { NAME, ICODE, MASK, ATTR },
1218
1219 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1220 { NAME, ICODE, MASK, ATTR },
1221
1222 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1223 { NAME, ICODE, MASK, ATTR },
1224
1225 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1226 { NAME, ICODE, MASK, ATTR },
1227
1228 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1229 { NAME, ICODE, MASK, ATTR },
1230
1231 struct rs6000_builtin_info_type {
1232 const char *name;
1233 const enum insn_code icode;
1234 const HOST_WIDE_INT mask;
1235 const unsigned attr;
1236 };
1237
1238 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1239 {
1240 #include "rs6000-builtin.def"
1241 };
1242
1243 #undef RS6000_BUILTIN_0
1244 #undef RS6000_BUILTIN_1
1245 #undef RS6000_BUILTIN_2
1246 #undef RS6000_BUILTIN_3
1247 #undef RS6000_BUILTIN_A
1248 #undef RS6000_BUILTIN_D
1249 #undef RS6000_BUILTIN_H
1250 #undef RS6000_BUILTIN_P
1251 #undef RS6000_BUILTIN_X
1252
1253 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1254 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1255
1256 \f
1257 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1258 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1259 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1260 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1261 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1262 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1263 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1264 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1265 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1266 bool);
1267 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1268 unsigned int);
1269 static bool is_microcoded_insn (rtx_insn *);
1270 static bool is_nonpipeline_insn (rtx_insn *);
1271 static bool is_cracked_insn (rtx_insn *);
1272 static bool is_load_insn (rtx, rtx *);
1273 static bool is_store_insn (rtx, rtx *);
1274 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1275 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1276 static bool insn_must_be_first_in_group (rtx_insn *);
1277 static bool insn_must_be_last_in_group (rtx_insn *);
1278 static void altivec_init_builtins (void);
1279 static tree builtin_function_type (machine_mode, machine_mode,
1280 machine_mode, machine_mode,
1281 enum rs6000_builtins, const char *name);
1282 static void rs6000_common_init_builtins (void);
1283 static void htm_init_builtins (void);
1284 int easy_vector_constant (rtx, machine_mode);
1285 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1286 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1287 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1288 bool, bool);
1289 #if TARGET_MACHO
1290 static tree get_prev_label (tree);
1291 #endif
1292 static bool rs6000_mode_dependent_address (const_rtx);
1293 static bool rs6000_debug_mode_dependent_address (const_rtx);
1294 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1295 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1296 machine_mode, rtx);
1297 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1298 machine_mode,
1299 rtx);
1300 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1301 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1302 enum reg_class);
1303 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1304 reg_class_t,
1305 reg_class_t);
1306 static bool rs6000_debug_can_change_mode_class (machine_mode,
1307 machine_mode,
1308 reg_class_t);
1309 static rtx rs6000_internal_arg_pointer (void);
1310
1311 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1312 = rs6000_mode_dependent_address;
1313
1314 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1315 machine_mode, rtx)
1316 = rs6000_secondary_reload_class;
1317
1318 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1319 = rs6000_preferred_reload_class;
1320
1321 const int INSN_NOT_AVAILABLE = -1;
1322
1323 static void rs6000_print_isa_options (FILE *, int, const char *,
1324 HOST_WIDE_INT);
1325 static void rs6000_print_builtin_options (FILE *, int, const char *,
1326 HOST_WIDE_INT);
1327 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1328
1329 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1330 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1331 enum rs6000_reg_type,
1332 machine_mode,
1333 secondary_reload_info *,
1334 bool);
1335 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1336 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1337
1338 /* Hash table stuff for keeping track of TOC entries. */
1339
1340 struct GTY((for_user)) toc_hash_struct
1341 {
1342 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1343 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1344 rtx key;
1345 machine_mode key_mode;
1346 int labelno;
1347 };
1348
1349 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1350 {
1351 static hashval_t hash (toc_hash_struct *);
1352 static bool equal (toc_hash_struct *, toc_hash_struct *);
1353 };
1354
1355 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1356
1357 /* Hash table to keep track of the argument types for builtin functions. */
1358
1359 struct GTY((for_user)) builtin_hash_struct
1360 {
1361 tree type;
1362 machine_mode mode[4]; /* return value + 3 arguments. */
1363 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1364 };
1365
1366 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1367 {
1368 static hashval_t hash (builtin_hash_struct *);
1369 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1370 };
1371
1372 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1373
1374 \f
1375 /* Default register names. */
1376 char rs6000_reg_names[][8] =
1377 {
1378 /* GPRs */
1379 "0", "1", "2", "3", "4", "5", "6", "7",
1380 "8", "9", "10", "11", "12", "13", "14", "15",
1381 "16", "17", "18", "19", "20", "21", "22", "23",
1382 "24", "25", "26", "27", "28", "29", "30", "31",
1383 /* FPRs */
1384 "0", "1", "2", "3", "4", "5", "6", "7",
1385 "8", "9", "10", "11", "12", "13", "14", "15",
1386 "16", "17", "18", "19", "20", "21", "22", "23",
1387 "24", "25", "26", "27", "28", "29", "30", "31",
1388 /* VRs */
1389 "0", "1", "2", "3", "4", "5", "6", "7",
1390 "8", "9", "10", "11", "12", "13", "14", "15",
1391 "16", "17", "18", "19", "20", "21", "22", "23",
1392 "24", "25", "26", "27", "28", "29", "30", "31",
1393 /* lr ctr ca ap */
1394 "lr", "ctr", "ca", "ap",
1395 /* cr0..cr7 */
1396 "0", "1", "2", "3", "4", "5", "6", "7",
1397 /* vrsave vscr sfp */
1398 "vrsave", "vscr", "sfp",
1399 };
1400
1401 #ifdef TARGET_REGNAMES
1402 static const char alt_reg_names[][8] =
1403 {
1404 /* GPRs */
1405 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1406 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1407 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1408 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1409 /* FPRs */
1410 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1411 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1412 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1413 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1414 /* VRs */
1415 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1416 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1417 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1418 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1419 /* lr ctr ca ap */
1420 "lr", "ctr", "ca", "ap",
1421 /* cr0..cr7 */
1422 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1423 /* vrsave vscr sfp */
1424 "vrsave", "vscr", "sfp",
1425 };
1426 #endif
1427
1428 /* Table of valid machine attributes. */
1429
1430 static const struct attribute_spec rs6000_attribute_table[] =
1431 {
1432 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1433 affects_type_identity, handler, exclude } */
1434 { "altivec", 1, 1, false, true, false, false,
1435 rs6000_handle_altivec_attribute, NULL },
1436 { "longcall", 0, 0, false, true, true, false,
1437 rs6000_handle_longcall_attribute, NULL },
1438 { "shortcall", 0, 0, false, true, true, false,
1439 rs6000_handle_longcall_attribute, NULL },
1440 { "ms_struct", 0, 0, false, false, false, false,
1441 rs6000_handle_struct_attribute, NULL },
1442 { "gcc_struct", 0, 0, false, false, false, false,
1443 rs6000_handle_struct_attribute, NULL },
1444 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1445 SUBTARGET_ATTRIBUTE_TABLE,
1446 #endif
1447 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1448 };
1449 \f
1450 #ifndef TARGET_PROFILE_KERNEL
1451 #define TARGET_PROFILE_KERNEL 0
1452 #endif
1453 \f
1454 /* Initialize the GCC target structure. */
1455 #undef TARGET_ATTRIBUTE_TABLE
1456 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1457 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1458 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1459 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1460 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1461
1462 #undef TARGET_ASM_ALIGNED_DI_OP
1463 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1464
1465 /* Default unaligned ops are only provided for ELF. Find the ops needed
1466 for non-ELF systems. */
1467 #ifndef OBJECT_FORMAT_ELF
1468 #if TARGET_XCOFF
1469 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1470 64-bit targets. */
1471 #undef TARGET_ASM_UNALIGNED_HI_OP
1472 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1473 #undef TARGET_ASM_UNALIGNED_SI_OP
1474 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1475 #undef TARGET_ASM_UNALIGNED_DI_OP
1476 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1477 #else
1478 /* For Darwin. */
1479 #undef TARGET_ASM_UNALIGNED_HI_OP
1480 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1481 #undef TARGET_ASM_UNALIGNED_SI_OP
1482 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1483 #undef TARGET_ASM_UNALIGNED_DI_OP
1484 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1485 #undef TARGET_ASM_ALIGNED_DI_OP
1486 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1487 #endif
1488 #endif
1489
1490 /* This hook deals with fixups for relocatable code and DI-mode objects
1491 in 64-bit code. */
1492 #undef TARGET_ASM_INTEGER
1493 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1494
1495 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1496 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1497 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1498 #endif
1499
1500 #undef TARGET_SET_UP_BY_PROLOGUE
1501 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1502
1503 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1504 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1505 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1506 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1507 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1508 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1509 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1510 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1511 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1512 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1513 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1514 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1515
1516 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1517 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1518
1519 #undef TARGET_INTERNAL_ARG_POINTER
1520 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1521
1522 #undef TARGET_HAVE_TLS
1523 #define TARGET_HAVE_TLS HAVE_AS_TLS
1524
1525 #undef TARGET_CANNOT_FORCE_CONST_MEM
1526 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1527
1528 #undef TARGET_DELEGITIMIZE_ADDRESS
1529 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1530
1531 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1532 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1533
1534 #undef TARGET_LEGITIMATE_COMBINED_INSN
1535 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1536
1537 #undef TARGET_ASM_FUNCTION_PROLOGUE
1538 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1539 #undef TARGET_ASM_FUNCTION_EPILOGUE
1540 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1541
1542 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1543 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1544
1545 #undef TARGET_LEGITIMIZE_ADDRESS
1546 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1547
1548 #undef TARGET_SCHED_VARIABLE_ISSUE
1549 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1550
1551 #undef TARGET_SCHED_ISSUE_RATE
1552 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1553 #undef TARGET_SCHED_ADJUST_COST
1554 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1555 #undef TARGET_SCHED_ADJUST_PRIORITY
1556 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1557 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1558 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1559 #undef TARGET_SCHED_INIT
1560 #define TARGET_SCHED_INIT rs6000_sched_init
1561 #undef TARGET_SCHED_FINISH
1562 #define TARGET_SCHED_FINISH rs6000_sched_finish
1563 #undef TARGET_SCHED_REORDER
1564 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1565 #undef TARGET_SCHED_REORDER2
1566 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1567
1568 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1569 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1570
1571 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1572 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1573
1574 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1575 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1576 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1577 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1578 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1579 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1580 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1581 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1582
1583 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1584 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1585
1586 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1587 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1588 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1589 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1590 rs6000_builtin_support_vector_misalignment
1591 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1592 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1593 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1594 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1595 rs6000_builtin_vectorization_cost
1596 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1597 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1598 rs6000_preferred_simd_mode
1599 #undef TARGET_VECTORIZE_INIT_COST
1600 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1601 #undef TARGET_VECTORIZE_ADD_STMT_COST
1602 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1603 #undef TARGET_VECTORIZE_FINISH_COST
1604 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1605 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1606 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1607
1608 #undef TARGET_INIT_BUILTINS
1609 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1610 #undef TARGET_BUILTIN_DECL
1611 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1612
1613 #undef TARGET_FOLD_BUILTIN
1614 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1615 #undef TARGET_GIMPLE_FOLD_BUILTIN
1616 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1617
1618 #undef TARGET_EXPAND_BUILTIN
1619 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1620
1621 #undef TARGET_MANGLE_TYPE
1622 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1623
1624 #undef TARGET_INIT_LIBFUNCS
1625 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1626
1627 #if TARGET_MACHO
1628 #undef TARGET_BINDS_LOCAL_P
1629 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1630 #endif
1631
1632 #undef TARGET_MS_BITFIELD_LAYOUT_P
1633 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1634
1635 #undef TARGET_ASM_OUTPUT_MI_THUNK
1636 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1637
1638 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1639 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1640
1641 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1642 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1643
1644 #undef TARGET_REGISTER_MOVE_COST
1645 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1646 #undef TARGET_MEMORY_MOVE_COST
1647 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1648 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1649 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1650 rs6000_ira_change_pseudo_allocno_class
1651 #undef TARGET_CANNOT_COPY_INSN_P
1652 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1653 #undef TARGET_RTX_COSTS
1654 #define TARGET_RTX_COSTS rs6000_rtx_costs
1655 #undef TARGET_ADDRESS_COST
1656 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1657 #undef TARGET_INSN_COST
1658 #define TARGET_INSN_COST rs6000_insn_cost
1659
1660 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1661 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1662
1663 #undef TARGET_PROMOTE_FUNCTION_MODE
1664 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1665
1666 #undef TARGET_RETURN_IN_MEMORY
1667 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1668
1669 #undef TARGET_RETURN_IN_MSB
1670 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1671
1672 #undef TARGET_SETUP_INCOMING_VARARGS
1673 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1674
1675 /* Always strict argument naming on rs6000. */
1676 #undef TARGET_STRICT_ARGUMENT_NAMING
1677 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1678 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1679 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1680 #undef TARGET_SPLIT_COMPLEX_ARG
1681 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1682 #undef TARGET_MUST_PASS_IN_STACK
1683 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1684 #undef TARGET_PASS_BY_REFERENCE
1685 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1686 #undef TARGET_ARG_PARTIAL_BYTES
1687 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1688 #undef TARGET_FUNCTION_ARG_ADVANCE
1689 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1690 #undef TARGET_FUNCTION_ARG
1691 #define TARGET_FUNCTION_ARG rs6000_function_arg
1692 #undef TARGET_FUNCTION_ARG_PADDING
1693 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1694 #undef TARGET_FUNCTION_ARG_BOUNDARY
1695 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1696
1697 #undef TARGET_BUILD_BUILTIN_VA_LIST
1698 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1699
1700 #undef TARGET_EXPAND_BUILTIN_VA_START
1701 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1702
1703 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1704 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1705
1706 #undef TARGET_EH_RETURN_FILTER_MODE
1707 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1708
1709 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1710 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1711
1712 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1713 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1714
1715 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1716 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1717
1718 #undef TARGET_FLOATN_MODE
1719 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1720
1721 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1722 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1723
1724 #undef TARGET_MD_ASM_ADJUST
1725 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1726
1727 #undef TARGET_OPTION_OVERRIDE
1728 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1729
1730 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1731 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1732 rs6000_builtin_vectorized_function
1733
1734 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1735 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1736 rs6000_builtin_md_vectorized_function
1737
1738 #undef TARGET_STACK_PROTECT_GUARD
1739 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1740
1741 #if !TARGET_MACHO
1742 #undef TARGET_STACK_PROTECT_FAIL
1743 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1744 #endif
1745
1746 #ifdef HAVE_AS_TLS
1747 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1748 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1749 #endif
1750
1751 /* Use a 32-bit anchor range. This leads to sequences like:
1752
1753 addis tmp,anchor,high
1754 add dest,tmp,low
1755
1756 where tmp itself acts as an anchor, and can be shared between
1757 accesses to the same 64k page. */
1758 #undef TARGET_MIN_ANCHOR_OFFSET
1759 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1760 #undef TARGET_MAX_ANCHOR_OFFSET
1761 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1762 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1763 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1764 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1765 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1766
1767 #undef TARGET_BUILTIN_RECIPROCAL
1768 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1769
1770 #undef TARGET_SECONDARY_RELOAD
1771 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1772 #undef TARGET_SECONDARY_MEMORY_NEEDED
1773 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1774 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1775 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1776
1777 #undef TARGET_LEGITIMATE_ADDRESS_P
1778 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1779
1780 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1781 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1782
1783 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1784 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1785
1786 #undef TARGET_CAN_ELIMINATE
1787 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1788
1789 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1790 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1791
1792 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1793 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1794
1795 #undef TARGET_TRAMPOLINE_INIT
1796 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1797
1798 #undef TARGET_FUNCTION_VALUE
1799 #define TARGET_FUNCTION_VALUE rs6000_function_value
1800
1801 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1802 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1803
1804 #undef TARGET_OPTION_SAVE
1805 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1806
1807 #undef TARGET_OPTION_RESTORE
1808 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1809
1810 #undef TARGET_OPTION_PRINT
1811 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1812
1813 #undef TARGET_CAN_INLINE_P
1814 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1815
1816 #undef TARGET_SET_CURRENT_FUNCTION
1817 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1818
1819 #undef TARGET_LEGITIMATE_CONSTANT_P
1820 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1821
1822 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1823 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1824
1825 #undef TARGET_CAN_USE_DOLOOP_P
1826 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1827
1828 #undef TARGET_PREDICT_DOLOOP_P
1829 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1830
1831 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1832 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1833
1834 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1835 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1836 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1837 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1838 #undef TARGET_UNWIND_WORD_MODE
1839 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1840
1841 #undef TARGET_OFFLOAD_OPTIONS
1842 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1843
1844 #undef TARGET_C_MODE_FOR_SUFFIX
1845 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1846
1847 #undef TARGET_INVALID_BINARY_OP
1848 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1849
1850 #undef TARGET_OPTAB_SUPPORTED_P
1851 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1852
1853 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1854 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1855
1856 #undef TARGET_COMPARE_VERSION_PRIORITY
1857 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1858
1859 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1860 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1861 rs6000_generate_version_dispatcher_body
1862
1863 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1864 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1865 rs6000_get_function_versions_dispatcher
1866
1867 #undef TARGET_OPTION_FUNCTION_VERSIONS
1868 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1869
1870 #undef TARGET_HARD_REGNO_NREGS
1871 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1872 #undef TARGET_HARD_REGNO_MODE_OK
1873 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1874
1875 #undef TARGET_MODES_TIEABLE_P
1876 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1877
1878 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1879 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1880 rs6000_hard_regno_call_part_clobbered
1881
1882 #undef TARGET_SLOW_UNALIGNED_ACCESS
1883 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1884
1885 #undef TARGET_CAN_CHANGE_MODE_CLASS
1886 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1887
1888 #undef TARGET_CONSTANT_ALIGNMENT
1889 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1890
1891 #undef TARGET_STARTING_FRAME_OFFSET
1892 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1893
1894 #if TARGET_ELF && RS6000_WEAK
1895 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1896 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1897 #endif
1898
1899 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1900 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1901
1902 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1903 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1904 \f
1905
1906 /* Processor table. */
1907 struct rs6000_ptt
1908 {
1909 const char *const name; /* Canonical processor name. */
1910 const enum processor_type processor; /* Processor type enum value. */
1911 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1912 };
1913
1914 static struct rs6000_ptt const processor_target_table[] =
1915 {
1916 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1917 #include "rs6000-cpus.def"
1918 #undef RS6000_CPU
1919 };
1920
1921 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1922 name is invalid. */
1923
1924 static int
1925 rs6000_cpu_name_lookup (const char *name)
1926 {
1927 size_t i;
1928
1929 if (name != NULL)
1930 {
1931 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1932 if (! strcmp (name, processor_target_table[i].name))
1933 return (int)i;
1934 }
1935
1936 return -1;
1937 }
1938
1939 \f
1940 /* Return number of consecutive hard regs needed starting at reg REGNO
1941 to hold something of mode MODE.
1942 This is ordinarily the length in words of a value of mode MODE
1943 but can be less for certain modes in special long registers.
1944
1945 POWER and PowerPC GPRs hold 32 bits worth;
1946 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1947
1948 static int
1949 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1950 {
1951 unsigned HOST_WIDE_INT reg_size;
1952
1953 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1954 128-bit floating point that can go in vector registers, which has VSX
1955 memory addressing. */
1956 if (FP_REGNO_P (regno))
1957 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1958 ? UNITS_PER_VSX_WORD
1959 : UNITS_PER_FP_WORD);
1960
1961 else if (ALTIVEC_REGNO_P (regno))
1962 reg_size = UNITS_PER_ALTIVEC_WORD;
1963
1964 else
1965 reg_size = UNITS_PER_WORD;
1966
1967 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1968 }
1969
1970 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1971 MODE. */
1972 static int
1973 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1974 {
1975 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1976
1977 if (COMPLEX_MODE_P (mode))
1978 mode = GET_MODE_INNER (mode);
1979
1980 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1981 register combinations, and use PTImode where we need to deal with quad
1982 word memory operations. Don't allow quad words in the argument or frame
1983 pointer registers, just registers 0..31. */
1984 if (mode == PTImode)
1985 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1986 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1987 && ((regno & 1) == 0));
1988
1989 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1990 implementations. Don't allow an item to be split between a FP register
1991 and an Altivec register. Allow TImode in all VSX registers if the user
1992 asked for it. */
1993 if (TARGET_VSX && VSX_REGNO_P (regno)
1994 && (VECTOR_MEM_VSX_P (mode)
1995 || FLOAT128_VECTOR_P (mode)
1996 || reg_addr[mode].scalar_in_vmx_p
1997 || mode == TImode
1998 || (TARGET_VADDUQM && mode == V1TImode)))
1999 {
2000 if (FP_REGNO_P (regno))
2001 return FP_REGNO_P (last_regno);
2002
2003 if (ALTIVEC_REGNO_P (regno))
2004 {
2005 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2006 return 0;
2007
2008 return ALTIVEC_REGNO_P (last_regno);
2009 }
2010 }
2011
2012 /* The GPRs can hold any mode, but values bigger than one register
2013 cannot go past R31. */
2014 if (INT_REGNO_P (regno))
2015 return INT_REGNO_P (last_regno);
2016
2017 /* The float registers (except for VSX vector modes) can only hold floating
2018 modes and DImode. */
2019 if (FP_REGNO_P (regno))
2020 {
2021 if (FLOAT128_VECTOR_P (mode))
2022 return false;
2023
2024 if (SCALAR_FLOAT_MODE_P (mode)
2025 && (mode != TDmode || (regno % 2) == 0)
2026 && FP_REGNO_P (last_regno))
2027 return 1;
2028
2029 if (GET_MODE_CLASS (mode) == MODE_INT)
2030 {
2031 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2032 return 1;
2033
2034 if (TARGET_P8_VECTOR && (mode == SImode))
2035 return 1;
2036
2037 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2038 return 1;
2039 }
2040
2041 return 0;
2042 }
2043
2044 /* The CR register can only hold CC modes. */
2045 if (CR_REGNO_P (regno))
2046 return GET_MODE_CLASS (mode) == MODE_CC;
2047
2048 if (CA_REGNO_P (regno))
2049 return mode == Pmode || mode == SImode;
2050
2051 /* AltiVec only in AldyVec registers. */
2052 if (ALTIVEC_REGNO_P (regno))
2053 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2054 || mode == V1TImode);
2055
2056 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2057 and it must be able to fit within the register set. */
2058
2059 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2060 }
2061
2062 /* Implement TARGET_HARD_REGNO_NREGS. */
2063
2064 static unsigned int
2065 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2066 {
2067 return rs6000_hard_regno_nregs[mode][regno];
2068 }
2069
2070 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2071
2072 static bool
2073 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2074 {
2075 return rs6000_hard_regno_mode_ok_p[mode][regno];
2076 }
2077
2078 /* Implement TARGET_MODES_TIEABLE_P.
2079
2080 PTImode cannot tie with other modes because PTImode is restricted to even
2081 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2082 57744).
2083
2084 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2085 128-bit floating point on VSX systems ties with other vectors. */
2086
2087 static bool
2088 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2089 {
2090 if (mode1 == PTImode)
2091 return mode2 == PTImode;
2092 if (mode2 == PTImode)
2093 return false;
2094
2095 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2096 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2097 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2098 return false;
2099
2100 if (SCALAR_FLOAT_MODE_P (mode1))
2101 return SCALAR_FLOAT_MODE_P (mode2);
2102 if (SCALAR_FLOAT_MODE_P (mode2))
2103 return false;
2104
2105 if (GET_MODE_CLASS (mode1) == MODE_CC)
2106 return GET_MODE_CLASS (mode2) == MODE_CC;
2107 if (GET_MODE_CLASS (mode2) == MODE_CC)
2108 return false;
2109
2110 return true;
2111 }
2112
2113 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2114
2115 static bool
2116 rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
2117 unsigned int regno, machine_mode mode)
2118 {
2119 if (TARGET_32BIT
2120 && TARGET_POWERPC64
2121 && GET_MODE_SIZE (mode) > 4
2122 && INT_REGNO_P (regno))
2123 return true;
2124
2125 if (TARGET_VSX
2126 && FP_REGNO_P (regno)
2127 && GET_MODE_SIZE (mode) > 8
2128 && !FLOAT128_2REG_P (mode))
2129 return true;
2130
2131 return false;
2132 }
2133
2134 /* Print interesting facts about registers. */
2135 static void
2136 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2137 {
2138 int r, m;
2139
2140 for (r = first_regno; r <= last_regno; ++r)
2141 {
2142 const char *comma = "";
2143 int len;
2144
2145 if (first_regno == last_regno)
2146 fprintf (stderr, "%s:\t", reg_name);
2147 else
2148 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2149
2150 len = 8;
2151 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2152 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2153 {
2154 if (len > 70)
2155 {
2156 fprintf (stderr, ",\n\t");
2157 len = 8;
2158 comma = "";
2159 }
2160
2161 if (rs6000_hard_regno_nregs[m][r] > 1)
2162 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2163 rs6000_hard_regno_nregs[m][r]);
2164 else
2165 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2166
2167 comma = ", ";
2168 }
2169
2170 if (call_used_regs[r])
2171 {
2172 if (len > 70)
2173 {
2174 fprintf (stderr, ",\n\t");
2175 len = 8;
2176 comma = "";
2177 }
2178
2179 len += fprintf (stderr, "%s%s", comma, "call-used");
2180 comma = ", ";
2181 }
2182
2183 if (fixed_regs[r])
2184 {
2185 if (len > 70)
2186 {
2187 fprintf (stderr, ",\n\t");
2188 len = 8;
2189 comma = "";
2190 }
2191
2192 len += fprintf (stderr, "%s%s", comma, "fixed");
2193 comma = ", ";
2194 }
2195
2196 if (len > 70)
2197 {
2198 fprintf (stderr, ",\n\t");
2199 comma = "";
2200 }
2201
2202 len += fprintf (stderr, "%sreg-class = %s", comma,
2203 reg_class_names[(int)rs6000_regno_regclass[r]]);
2204 comma = ", ";
2205
2206 if (len > 70)
2207 {
2208 fprintf (stderr, ",\n\t");
2209 comma = "";
2210 }
2211
2212 fprintf (stderr, "%sregno = %d\n", comma, r);
2213 }
2214 }
2215
2216 static const char *
2217 rs6000_debug_vector_unit (enum rs6000_vector v)
2218 {
2219 const char *ret;
2220
2221 switch (v)
2222 {
2223 case VECTOR_NONE: ret = "none"; break;
2224 case VECTOR_ALTIVEC: ret = "altivec"; break;
2225 case VECTOR_VSX: ret = "vsx"; break;
2226 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2227 default: ret = "unknown"; break;
2228 }
2229
2230 return ret;
2231 }
2232
2233 /* Inner function printing just the address mask for a particular reload
2234 register class. */
2235 DEBUG_FUNCTION char *
2236 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2237 {
2238 static char ret[8];
2239 char *p = ret;
2240
2241 if ((mask & RELOAD_REG_VALID) != 0)
2242 *p++ = 'v';
2243 else if (keep_spaces)
2244 *p++ = ' ';
2245
2246 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2247 *p++ = 'm';
2248 else if (keep_spaces)
2249 *p++ = ' ';
2250
2251 if ((mask & RELOAD_REG_INDEXED) != 0)
2252 *p++ = 'i';
2253 else if (keep_spaces)
2254 *p++ = ' ';
2255
2256 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2257 *p++ = 'O';
2258 else if ((mask & RELOAD_REG_OFFSET) != 0)
2259 *p++ = 'o';
2260 else if (keep_spaces)
2261 *p++ = ' ';
2262
2263 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2264 *p++ = '+';
2265 else if (keep_spaces)
2266 *p++ = ' ';
2267
2268 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2269 *p++ = '+';
2270 else if (keep_spaces)
2271 *p++ = ' ';
2272
2273 if ((mask & RELOAD_REG_AND_M16) != 0)
2274 *p++ = '&';
2275 else if (keep_spaces)
2276 *p++ = ' ';
2277
2278 *p = '\0';
2279
2280 return ret;
2281 }
2282
2283 /* Print the address masks in a human readble fashion. */
2284 DEBUG_FUNCTION void
2285 rs6000_debug_print_mode (ssize_t m)
2286 {
2287 ssize_t rc;
2288 int spaces = 0;
2289
2290 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2291 for (rc = 0; rc < N_RELOAD_REG; rc++)
2292 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2293 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2294
2295 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2296 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2297 {
2298 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2299 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2300 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2301 spaces = 0;
2302 }
2303 else
2304 spaces += sizeof (" Reload=sl") - 1;
2305
2306 if (reg_addr[m].scalar_in_vmx_p)
2307 {
2308 fprintf (stderr, "%*s Upper=y", spaces, "");
2309 spaces = 0;
2310 }
2311 else
2312 spaces += sizeof (" Upper=y") - 1;
2313
2314 if (rs6000_vector_unit[m] != VECTOR_NONE
2315 || rs6000_vector_mem[m] != VECTOR_NONE)
2316 {
2317 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2318 spaces, "",
2319 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2320 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2321 }
2322
2323 fputs ("\n", stderr);
2324 }
2325
2326 #define DEBUG_FMT_ID "%-32s= "
2327 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2328 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2329 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2330
2331 /* Print various interesting information with -mdebug=reg. */
2332 static void
2333 rs6000_debug_reg_global (void)
2334 {
2335 static const char *const tf[2] = { "false", "true" };
2336 const char *nl = (const char *)0;
2337 int m;
2338 size_t m1, m2, v;
2339 char costly_num[20];
2340 char nop_num[20];
2341 char flags_buffer[40];
2342 const char *costly_str;
2343 const char *nop_str;
2344 const char *trace_str;
2345 const char *abi_str;
2346 const char *cmodel_str;
2347 struct cl_target_option cl_opts;
2348
2349 /* Modes we want tieable information on. */
2350 static const machine_mode print_tieable_modes[] = {
2351 QImode,
2352 HImode,
2353 SImode,
2354 DImode,
2355 TImode,
2356 PTImode,
2357 SFmode,
2358 DFmode,
2359 TFmode,
2360 IFmode,
2361 KFmode,
2362 SDmode,
2363 DDmode,
2364 TDmode,
2365 V16QImode,
2366 V8HImode,
2367 V4SImode,
2368 V2DImode,
2369 V1TImode,
2370 V32QImode,
2371 V16HImode,
2372 V8SImode,
2373 V4DImode,
2374 V2TImode,
2375 V4SFmode,
2376 V2DFmode,
2377 V8SFmode,
2378 V4DFmode,
2379 CCmode,
2380 CCUNSmode,
2381 CCEQmode,
2382 };
2383
2384 /* Virtual regs we are interested in. */
2385 const static struct {
2386 int regno; /* register number. */
2387 const char *name; /* register name. */
2388 } virtual_regs[] = {
2389 { STACK_POINTER_REGNUM, "stack pointer:" },
2390 { TOC_REGNUM, "toc: " },
2391 { STATIC_CHAIN_REGNUM, "static chain: " },
2392 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2393 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2394 { ARG_POINTER_REGNUM, "arg pointer: " },
2395 { FRAME_POINTER_REGNUM, "frame pointer:" },
2396 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2397 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2398 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2399 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2400 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2401 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2402 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2403 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2404 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2405 };
2406
2407 fputs ("\nHard register information:\n", stderr);
2408 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2409 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2410 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2411 LAST_ALTIVEC_REGNO,
2412 "vs");
2413 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2414 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2415 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2416 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2417 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2418 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2419
2420 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2421 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2422 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2423
2424 fprintf (stderr,
2425 "\n"
2426 "d reg_class = %s\n"
2427 "f reg_class = %s\n"
2428 "v reg_class = %s\n"
2429 "wa reg_class = %s\n"
2430 "we reg_class = %s\n"
2431 "wr reg_class = %s\n"
2432 "wx reg_class = %s\n"
2433 "wA reg_class = %s\n"
2434 "\n",
2435 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2436 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2437 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2438 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2439 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2440 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2441 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2442 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2443
2444 nl = "\n";
2445 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2446 rs6000_debug_print_mode (m);
2447
2448 fputs ("\n", stderr);
2449
2450 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2451 {
2452 machine_mode mode1 = print_tieable_modes[m1];
2453 bool first_time = true;
2454
2455 nl = (const char *)0;
2456 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2457 {
2458 machine_mode mode2 = print_tieable_modes[m2];
2459 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2460 {
2461 if (first_time)
2462 {
2463 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2464 nl = "\n";
2465 first_time = false;
2466 }
2467
2468 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2469 }
2470 }
2471
2472 if (!first_time)
2473 fputs ("\n", stderr);
2474 }
2475
2476 if (nl)
2477 fputs (nl, stderr);
2478
2479 if (rs6000_recip_control)
2480 {
2481 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2482
2483 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2484 if (rs6000_recip_bits[m])
2485 {
2486 fprintf (stderr,
2487 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2488 GET_MODE_NAME (m),
2489 (RS6000_RECIP_AUTO_RE_P (m)
2490 ? "auto"
2491 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2492 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2493 ? "auto"
2494 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2495 }
2496
2497 fputs ("\n", stderr);
2498 }
2499
2500 if (rs6000_cpu_index >= 0)
2501 {
2502 const char *name = processor_target_table[rs6000_cpu_index].name;
2503 HOST_WIDE_INT flags
2504 = processor_target_table[rs6000_cpu_index].target_enable;
2505
2506 sprintf (flags_buffer, "-mcpu=%s flags", name);
2507 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2508 }
2509 else
2510 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2511
2512 if (rs6000_tune_index >= 0)
2513 {
2514 const char *name = processor_target_table[rs6000_tune_index].name;
2515 HOST_WIDE_INT flags
2516 = processor_target_table[rs6000_tune_index].target_enable;
2517
2518 sprintf (flags_buffer, "-mtune=%s flags", name);
2519 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2520 }
2521 else
2522 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2523
2524 cl_target_option_save (&cl_opts, &global_options);
2525 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2526 rs6000_isa_flags);
2527
2528 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2529 rs6000_isa_flags_explicit);
2530
2531 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2532 rs6000_builtin_mask);
2533
2534 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2535
2536 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2537 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2538
2539 switch (rs6000_sched_costly_dep)
2540 {
2541 case max_dep_latency:
2542 costly_str = "max_dep_latency";
2543 break;
2544
2545 case no_dep_costly:
2546 costly_str = "no_dep_costly";
2547 break;
2548
2549 case all_deps_costly:
2550 costly_str = "all_deps_costly";
2551 break;
2552
2553 case true_store_to_load_dep_costly:
2554 costly_str = "true_store_to_load_dep_costly";
2555 break;
2556
2557 case store_to_load_dep_costly:
2558 costly_str = "store_to_load_dep_costly";
2559 break;
2560
2561 default:
2562 costly_str = costly_num;
2563 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2564 break;
2565 }
2566
2567 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2568
2569 switch (rs6000_sched_insert_nops)
2570 {
2571 case sched_finish_regroup_exact:
2572 nop_str = "sched_finish_regroup_exact";
2573 break;
2574
2575 case sched_finish_pad_groups:
2576 nop_str = "sched_finish_pad_groups";
2577 break;
2578
2579 case sched_finish_none:
2580 nop_str = "sched_finish_none";
2581 break;
2582
2583 default:
2584 nop_str = nop_num;
2585 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2586 break;
2587 }
2588
2589 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2590
2591 switch (rs6000_sdata)
2592 {
2593 default:
2594 case SDATA_NONE:
2595 break;
2596
2597 case SDATA_DATA:
2598 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2599 break;
2600
2601 case SDATA_SYSV:
2602 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2603 break;
2604
2605 case SDATA_EABI:
2606 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2607 break;
2608
2609 }
2610
2611 switch (rs6000_traceback)
2612 {
2613 case traceback_default: trace_str = "default"; break;
2614 case traceback_none: trace_str = "none"; break;
2615 case traceback_part: trace_str = "part"; break;
2616 case traceback_full: trace_str = "full"; break;
2617 default: trace_str = "unknown"; break;
2618 }
2619
2620 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2621
2622 switch (rs6000_current_cmodel)
2623 {
2624 case CMODEL_SMALL: cmodel_str = "small"; break;
2625 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2626 case CMODEL_LARGE: cmodel_str = "large"; break;
2627 default: cmodel_str = "unknown"; break;
2628 }
2629
2630 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2631
2632 switch (rs6000_current_abi)
2633 {
2634 case ABI_NONE: abi_str = "none"; break;
2635 case ABI_AIX: abi_str = "aix"; break;
2636 case ABI_ELFv2: abi_str = "ELFv2"; break;
2637 case ABI_V4: abi_str = "V4"; break;
2638 case ABI_DARWIN: abi_str = "darwin"; break;
2639 default: abi_str = "unknown"; break;
2640 }
2641
2642 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2643
2644 if (rs6000_altivec_abi)
2645 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2646
2647 if (rs6000_darwin64_abi)
2648 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2649
2650 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2651 (TARGET_SOFT_FLOAT ? "true" : "false"));
2652
2653 if (TARGET_LINK_STACK)
2654 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2655
2656 if (TARGET_P8_FUSION)
2657 {
2658 char options[80];
2659
2660 strcpy (options, "power8");
2661 if (TARGET_P8_FUSION_SIGN)
2662 strcat (options, ", sign");
2663
2664 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2665 }
2666
2667 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2668 TARGET_SECURE_PLT ? "secure" : "bss");
2669 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2670 aix_struct_return ? "aix" : "sysv");
2671 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2672 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2673 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2674 tf[!!rs6000_align_branch_targets]);
2675 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2676 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2677 rs6000_long_double_type_size);
2678 if (rs6000_long_double_type_size > 64)
2679 {
2680 fprintf (stderr, DEBUG_FMT_S, "long double type",
2681 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2682 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2683 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2684 }
2685 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2686 (int)rs6000_sched_restricted_insns_priority);
2687 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2688 (int)END_BUILTINS);
2689 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2690 (int)RS6000_BUILTIN_COUNT);
2691
2692 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2693 (int)TARGET_FLOAT128_ENABLE_TYPE);
2694
2695 if (TARGET_VSX)
2696 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2697 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2698
2699 if (TARGET_DIRECT_MOVE_128)
2700 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2701 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2702 }
2703
2704 \f
2705 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2706 legitimate address support to figure out the appropriate addressing to
2707 use. */
2708
2709 static void
2710 rs6000_setup_reg_addr_masks (void)
2711 {
2712 ssize_t rc, reg, m, nregs;
2713 addr_mask_type any_addr_mask, addr_mask;
2714
2715 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2716 {
2717 machine_mode m2 = (machine_mode) m;
2718 bool complex_p = false;
2719 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2720 size_t msize;
2721
2722 if (COMPLEX_MODE_P (m2))
2723 {
2724 complex_p = true;
2725 m2 = GET_MODE_INNER (m2);
2726 }
2727
2728 msize = GET_MODE_SIZE (m2);
2729
2730 /* SDmode is special in that we want to access it only via REG+REG
2731 addressing on power7 and above, since we want to use the LFIWZX and
2732 STFIWZX instructions to load it. */
2733 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2734
2735 any_addr_mask = 0;
2736 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2737 {
2738 addr_mask = 0;
2739 reg = reload_reg_map[rc].reg;
2740
2741 /* Can mode values go in the GPR/FPR/Altivec registers? */
2742 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2743 {
2744 bool small_int_vsx_p = (small_int_p
2745 && (rc == RELOAD_REG_FPR
2746 || rc == RELOAD_REG_VMX));
2747
2748 nregs = rs6000_hard_regno_nregs[m][reg];
2749 addr_mask |= RELOAD_REG_VALID;
2750
2751 /* Indicate if the mode takes more than 1 physical register. If
2752 it takes a single register, indicate it can do REG+REG
2753 addressing. Small integers in VSX registers can only do
2754 REG+REG addressing. */
2755 if (small_int_vsx_p)
2756 addr_mask |= RELOAD_REG_INDEXED;
2757 else if (nregs > 1 || m == BLKmode || complex_p)
2758 addr_mask |= RELOAD_REG_MULTIPLE;
2759 else
2760 addr_mask |= RELOAD_REG_INDEXED;
2761
2762 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2763 addressing. If we allow scalars into Altivec registers,
2764 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2765
2766 For VSX systems, we don't allow update addressing for
2767 DFmode/SFmode if those registers can go in both the
2768 traditional floating point registers and Altivec registers.
2769 The load/store instructions for the Altivec registers do not
2770 have update forms. If we allowed update addressing, it seems
2771 to break IV-OPT code using floating point if the index type is
2772 int instead of long (PR target/81550 and target/84042). */
2773
2774 if (TARGET_UPDATE
2775 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2776 && msize <= 8
2777 && !VECTOR_MODE_P (m2)
2778 && !FLOAT128_VECTOR_P (m2)
2779 && !complex_p
2780 && (m != E_DFmode || !TARGET_VSX)
2781 && (m != E_SFmode || !TARGET_P8_VECTOR)
2782 && !small_int_vsx_p)
2783 {
2784 addr_mask |= RELOAD_REG_PRE_INCDEC;
2785
2786 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2787 we don't allow PRE_MODIFY for some multi-register
2788 operations. */
2789 switch (m)
2790 {
2791 default:
2792 addr_mask |= RELOAD_REG_PRE_MODIFY;
2793 break;
2794
2795 case E_DImode:
2796 if (TARGET_POWERPC64)
2797 addr_mask |= RELOAD_REG_PRE_MODIFY;
2798 break;
2799
2800 case E_DFmode:
2801 case E_DDmode:
2802 if (TARGET_HARD_FLOAT)
2803 addr_mask |= RELOAD_REG_PRE_MODIFY;
2804 break;
2805 }
2806 }
2807 }
2808
2809 /* GPR and FPR registers can do REG+OFFSET addressing, except
2810 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2811 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2812 if ((addr_mask != 0) && !indexed_only_p
2813 && msize <= 8
2814 && (rc == RELOAD_REG_GPR
2815 || ((msize == 8 || m2 == SFmode)
2816 && (rc == RELOAD_REG_FPR
2817 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2818 addr_mask |= RELOAD_REG_OFFSET;
2819
2820 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2821 instructions are enabled. The offset for 128-bit VSX registers is
2822 only 12-bits. While GPRs can handle the full offset range, VSX
2823 registers can only handle the restricted range. */
2824 else if ((addr_mask != 0) && !indexed_only_p
2825 && msize == 16 && TARGET_P9_VECTOR
2826 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2827 || (m2 == TImode && TARGET_VSX)))
2828 {
2829 addr_mask |= RELOAD_REG_OFFSET;
2830 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2831 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2832 }
2833
2834 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2835 addressing on 128-bit types. */
2836 if (rc == RELOAD_REG_VMX && msize == 16
2837 && (addr_mask & RELOAD_REG_VALID) != 0)
2838 addr_mask |= RELOAD_REG_AND_M16;
2839
2840 reg_addr[m].addr_mask[rc] = addr_mask;
2841 any_addr_mask |= addr_mask;
2842 }
2843
2844 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2845 }
2846 }
2847
2848 \f
2849 /* Initialize the various global tables that are based on register size. */
2850 static void
2851 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2852 {
2853 ssize_t r, m, c;
2854 int align64;
2855 int align32;
2856
2857 /* Precalculate REGNO_REG_CLASS. */
2858 rs6000_regno_regclass[0] = GENERAL_REGS;
2859 for (r = 1; r < 32; ++r)
2860 rs6000_regno_regclass[r] = BASE_REGS;
2861
2862 for (r = 32; r < 64; ++r)
2863 rs6000_regno_regclass[r] = FLOAT_REGS;
2864
2865 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2866 rs6000_regno_regclass[r] = NO_REGS;
2867
2868 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2869 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2870
2871 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2872 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2873 rs6000_regno_regclass[r] = CR_REGS;
2874
2875 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2876 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2877 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2878 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2879 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2880 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2881 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2882
2883 /* Precalculate register class to simpler reload register class. We don't
2884 need all of the register classes that are combinations of different
2885 classes, just the simple ones that have constraint letters. */
2886 for (c = 0; c < N_REG_CLASSES; c++)
2887 reg_class_to_reg_type[c] = NO_REG_TYPE;
2888
2889 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2890 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2891 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2892 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2893 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2894 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2895 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2896 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2897 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2898 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2899
2900 if (TARGET_VSX)
2901 {
2902 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2903 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2904 }
2905 else
2906 {
2907 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2908 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2909 }
2910
2911 /* Precalculate the valid memory formats as well as the vector information,
2912 this must be set up before the rs6000_hard_regno_nregs_internal calls
2913 below. */
2914 gcc_assert ((int)VECTOR_NONE == 0);
2915 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2916 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2917
2918 gcc_assert ((int)CODE_FOR_nothing == 0);
2919 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2920
2921 gcc_assert ((int)NO_REGS == 0);
2922 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2923
2924 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2925 believes it can use native alignment or still uses 128-bit alignment. */
2926 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2927 {
2928 align64 = 64;
2929 align32 = 32;
2930 }
2931 else
2932 {
2933 align64 = 128;
2934 align32 = 128;
2935 }
2936
2937 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2938 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2939 if (TARGET_FLOAT128_TYPE)
2940 {
2941 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2942 rs6000_vector_align[KFmode] = 128;
2943
2944 if (FLOAT128_IEEE_P (TFmode))
2945 {
2946 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2947 rs6000_vector_align[TFmode] = 128;
2948 }
2949 }
2950
2951 /* V2DF mode, VSX only. */
2952 if (TARGET_VSX)
2953 {
2954 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2955 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2956 rs6000_vector_align[V2DFmode] = align64;
2957 }
2958
2959 /* V4SF mode, either VSX or Altivec. */
2960 if (TARGET_VSX)
2961 {
2962 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2963 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2964 rs6000_vector_align[V4SFmode] = align32;
2965 }
2966 else if (TARGET_ALTIVEC)
2967 {
2968 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2969 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2970 rs6000_vector_align[V4SFmode] = align32;
2971 }
2972
2973 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2974 and stores. */
2975 if (TARGET_ALTIVEC)
2976 {
2977 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2978 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2979 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2980 rs6000_vector_align[V4SImode] = align32;
2981 rs6000_vector_align[V8HImode] = align32;
2982 rs6000_vector_align[V16QImode] = align32;
2983
2984 if (TARGET_VSX)
2985 {
2986 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2987 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2988 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2989 }
2990 else
2991 {
2992 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2993 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2994 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2995 }
2996 }
2997
2998 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2999 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3000 if (TARGET_VSX)
3001 {
3002 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3003 rs6000_vector_unit[V2DImode]
3004 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3005 rs6000_vector_align[V2DImode] = align64;
3006
3007 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3008 rs6000_vector_unit[V1TImode]
3009 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3010 rs6000_vector_align[V1TImode] = 128;
3011 }
3012
3013 /* DFmode, see if we want to use the VSX unit. Memory is handled
3014 differently, so don't set rs6000_vector_mem. */
3015 if (TARGET_VSX)
3016 {
3017 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3018 rs6000_vector_align[DFmode] = 64;
3019 }
3020
3021 /* SFmode, see if we want to use the VSX unit. */
3022 if (TARGET_P8_VECTOR)
3023 {
3024 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3025 rs6000_vector_align[SFmode] = 32;
3026 }
3027
3028 /* Allow TImode in VSX register and set the VSX memory macros. */
3029 if (TARGET_VSX)
3030 {
3031 rs6000_vector_mem[TImode] = VECTOR_VSX;
3032 rs6000_vector_align[TImode] = align64;
3033 }
3034
3035 /* Register class constraints for the constraints that depend on compile
3036 switches. When the VSX code was added, different constraints were added
3037 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3038 of the VSX registers are used. The register classes for scalar floating
3039 point types is set, based on whether we allow that type into the upper
3040 (Altivec) registers. GCC has register classes to target the Altivec
3041 registers for load/store operations, to select using a VSX memory
3042 operation instead of the traditional floating point operation. The
3043 constraints are:
3044
3045 d - Register class to use with traditional DFmode instructions.
3046 f - Register class to use with traditional SFmode instructions.
3047 v - Altivec register.
3048 wa - Any VSX register.
3049 wc - Reserved to represent individual CR bits (used in LLVM).
3050 wn - always NO_REGS.
3051 wr - GPR if 64-bit mode is permitted.
3052 wx - Float register if we can do 32-bit int stores. */
3053
3054 if (TARGET_HARD_FLOAT)
3055 {
3056 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3057 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3058 }
3059
3060 if (TARGET_VSX)
3061 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3062
3063 /* Add conditional constraints based on various options, to allow us to
3064 collapse multiple insn patterns. */
3065 if (TARGET_ALTIVEC)
3066 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3067
3068 if (TARGET_POWERPC64)
3069 {
3070 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3071 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3072 }
3073
3074 if (TARGET_STFIWX)
3075 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3076
3077 /* Support for new direct moves (ISA 3.0 + 64bit). */
3078 if (TARGET_DIRECT_MOVE_128)
3079 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3080
3081 /* Set up the reload helper and direct move functions. */
3082 if (TARGET_VSX || TARGET_ALTIVEC)
3083 {
3084 if (TARGET_64BIT)
3085 {
3086 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3087 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3088 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3089 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3090 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3091 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3092 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3093 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3094 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3095 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3096 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3097 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3098 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3099 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3100 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3101 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3102 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3103 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3104 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3105 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3106
3107 if (FLOAT128_VECTOR_P (KFmode))
3108 {
3109 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3110 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3111 }
3112
3113 if (FLOAT128_VECTOR_P (TFmode))
3114 {
3115 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3116 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3117 }
3118
3119 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3120 available. */
3121 if (TARGET_NO_SDMODE_STACK)
3122 {
3123 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3124 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3125 }
3126
3127 if (TARGET_VSX)
3128 {
3129 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3130 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3131 }
3132
3133 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3134 {
3135 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3136 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3137 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3138 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3139 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3140 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3141 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3142 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3143 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3144
3145 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3146 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3147 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3148 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3149 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3150 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3151 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3152 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3153 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3154
3155 if (FLOAT128_VECTOR_P (KFmode))
3156 {
3157 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3158 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3159 }
3160
3161 if (FLOAT128_VECTOR_P (TFmode))
3162 {
3163 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3164 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3165 }
3166 }
3167 }
3168 else
3169 {
3170 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3171 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3172 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3173 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3174 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3175 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3176 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3177 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3178 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3179 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3180 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3181 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3182 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3183 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3184 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3185 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3186 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3187 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3188 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3189 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3190
3191 if (FLOAT128_VECTOR_P (KFmode))
3192 {
3193 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3194 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3195 }
3196
3197 if (FLOAT128_IEEE_P (TFmode))
3198 {
3199 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3200 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3201 }
3202
3203 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3204 available. */
3205 if (TARGET_NO_SDMODE_STACK)
3206 {
3207 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3208 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3209 }
3210
3211 if (TARGET_VSX)
3212 {
3213 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3214 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3215 }
3216
3217 if (TARGET_DIRECT_MOVE)
3218 {
3219 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3220 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3221 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3222 }
3223 }
3224
3225 reg_addr[DFmode].scalar_in_vmx_p = true;
3226 reg_addr[DImode].scalar_in_vmx_p = true;
3227
3228 if (TARGET_P8_VECTOR)
3229 {
3230 reg_addr[SFmode].scalar_in_vmx_p = true;
3231 reg_addr[SImode].scalar_in_vmx_p = true;
3232
3233 if (TARGET_P9_VECTOR)
3234 {
3235 reg_addr[HImode].scalar_in_vmx_p = true;
3236 reg_addr[QImode].scalar_in_vmx_p = true;
3237 }
3238 }
3239 }
3240
3241 /* Precalculate HARD_REGNO_NREGS. */
3242 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3243 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3244 rs6000_hard_regno_nregs[m][r]
3245 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3246
3247 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3248 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3249 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3250 rs6000_hard_regno_mode_ok_p[m][r]
3251 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3252
3253 /* Precalculate CLASS_MAX_NREGS sizes. */
3254 for (c = 0; c < LIM_REG_CLASSES; ++c)
3255 {
3256 int reg_size;
3257
3258 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3259 reg_size = UNITS_PER_VSX_WORD;
3260
3261 else if (c == ALTIVEC_REGS)
3262 reg_size = UNITS_PER_ALTIVEC_WORD;
3263
3264 else if (c == FLOAT_REGS)
3265 reg_size = UNITS_PER_FP_WORD;
3266
3267 else
3268 reg_size = UNITS_PER_WORD;
3269
3270 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3271 {
3272 machine_mode m2 = (machine_mode)m;
3273 int reg_size2 = reg_size;
3274
3275 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3276 in VSX. */
3277 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3278 reg_size2 = UNITS_PER_FP_WORD;
3279
3280 rs6000_class_max_nregs[m][c]
3281 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3282 }
3283 }
3284
3285 /* Calculate which modes to automatically generate code to use a the
3286 reciprocal divide and square root instructions. In the future, possibly
3287 automatically generate the instructions even if the user did not specify
3288 -mrecip. The older machines double precision reciprocal sqrt estimate is
3289 not accurate enough. */
3290 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3291 if (TARGET_FRES)
3292 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3293 if (TARGET_FRE)
3294 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3295 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3296 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3297 if (VECTOR_UNIT_VSX_P (V2DFmode))
3298 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3299
3300 if (TARGET_FRSQRTES)
3301 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3302 if (TARGET_FRSQRTE)
3303 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3304 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3305 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3306 if (VECTOR_UNIT_VSX_P (V2DFmode))
3307 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3308
3309 if (rs6000_recip_control)
3310 {
3311 if (!flag_finite_math_only)
3312 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3313 "-ffast-math");
3314 if (flag_trapping_math)
3315 warning (0, "%qs requires %qs or %qs", "-mrecip",
3316 "-fno-trapping-math", "-ffast-math");
3317 if (!flag_reciprocal_math)
3318 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3319 "-ffast-math");
3320 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3321 {
3322 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3323 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3324 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3325
3326 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3327 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3328 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3329
3330 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3331 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3332 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3333
3334 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3335 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3336 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3337
3338 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3339 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3340 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3341
3342 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3343 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3344 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3345
3346 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3347 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3348 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3349
3350 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3351 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3352 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3353 }
3354 }
3355
3356 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3357 legitimate address support to figure out the appropriate addressing to
3358 use. */
3359 rs6000_setup_reg_addr_masks ();
3360
3361 if (global_init_p || TARGET_DEBUG_TARGET)
3362 {
3363 if (TARGET_DEBUG_REG)
3364 rs6000_debug_reg_global ();
3365
3366 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3367 fprintf (stderr,
3368 "SImode variable mult cost = %d\n"
3369 "SImode constant mult cost = %d\n"
3370 "SImode short constant mult cost = %d\n"
3371 "DImode multipliciation cost = %d\n"
3372 "SImode division cost = %d\n"
3373 "DImode division cost = %d\n"
3374 "Simple fp operation cost = %d\n"
3375 "DFmode multiplication cost = %d\n"
3376 "SFmode division cost = %d\n"
3377 "DFmode division cost = %d\n"
3378 "cache line size = %d\n"
3379 "l1 cache size = %d\n"
3380 "l2 cache size = %d\n"
3381 "simultaneous prefetches = %d\n"
3382 "\n",
3383 rs6000_cost->mulsi,
3384 rs6000_cost->mulsi_const,
3385 rs6000_cost->mulsi_const9,
3386 rs6000_cost->muldi,
3387 rs6000_cost->divsi,
3388 rs6000_cost->divdi,
3389 rs6000_cost->fp,
3390 rs6000_cost->dmul,
3391 rs6000_cost->sdiv,
3392 rs6000_cost->ddiv,
3393 rs6000_cost->cache_line_size,
3394 rs6000_cost->l1_cache_size,
3395 rs6000_cost->l2_cache_size,
3396 rs6000_cost->simultaneous_prefetches);
3397 }
3398 }
3399
3400 #if TARGET_MACHO
3401 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3402
3403 static void
3404 darwin_rs6000_override_options (void)
3405 {
3406 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3407 off. */
3408 rs6000_altivec_abi = 1;
3409 TARGET_ALTIVEC_VRSAVE = 1;
3410 rs6000_current_abi = ABI_DARWIN;
3411
3412 if (DEFAULT_ABI == ABI_DARWIN
3413 && TARGET_64BIT)
3414 darwin_one_byte_bool = 1;
3415
3416 if (TARGET_64BIT && ! TARGET_POWERPC64)
3417 {
3418 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3419 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3420 }
3421 if (flag_mkernel)
3422 {
3423 rs6000_default_long_calls = 1;
3424 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3425 }
3426
3427 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3428 Altivec. */
3429 if (!flag_mkernel && !flag_apple_kext
3430 && TARGET_64BIT
3431 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3432 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3433
3434 /* Unless the user (not the configurer) has explicitly overridden
3435 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3436 G4 unless targeting the kernel. */
3437 if (!flag_mkernel
3438 && !flag_apple_kext
3439 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3440 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3441 && ! global_options_set.x_rs6000_cpu_index)
3442 {
3443 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3444 }
3445 }
3446 #endif
3447
3448 /* If not otherwise specified by a target, make 'long double' equivalent to
3449 'double'. */
3450
3451 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3452 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3453 #endif
3454
3455 /* Return the builtin mask of the various options used that could affect which
3456 builtins were used. In the past we used target_flags, but we've run out of
3457 bits, and some options are no longer in target_flags. */
3458
3459 HOST_WIDE_INT
3460 rs6000_builtin_mask_calculate (void)
3461 {
3462 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3463 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3464 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3465 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3466 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3467 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3468 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3469 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3470 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3471 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3472 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3473 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3474 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3475 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3476 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3477 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3478 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3479 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3480 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3481 | ((TARGET_LONG_DOUBLE_128
3482 && TARGET_HARD_FLOAT
3483 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3484 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3485 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3486 }
3487
3488 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3489 to clobber the XER[CA] bit because clobbering that bit without telling
3490 the compiler worked just fine with versions of GCC before GCC 5, and
3491 breaking a lot of older code in ways that are hard to track down is
3492 not such a great idea. */
3493
3494 static rtx_insn *
3495 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3496 vec<const char *> &/*constraints*/,
3497 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3498 {
3499 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3500 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3501 return NULL;
3502 }
3503
3504 /* Override command line options.
3505
3506 Combine build-specific configuration information with options
3507 specified on the command line to set various state variables which
3508 influence code generation, optimization, and expansion of built-in
3509 functions. Assure that command-line configuration preferences are
3510 compatible with each other and with the build configuration; issue
3511 warnings while adjusting configuration or error messages while
3512 rejecting configuration.
3513
3514 Upon entry to this function:
3515
3516 This function is called once at the beginning of
3517 compilation, and then again at the start and end of compiling
3518 each section of code that has a different configuration, as
3519 indicated, for example, by adding the
3520
3521 __attribute__((__target__("cpu=power9")))
3522
3523 qualifier to a function definition or, for example, by bracketing
3524 code between
3525
3526 #pragma GCC target("altivec")
3527
3528 and
3529
3530 #pragma GCC reset_options
3531
3532 directives. Parameter global_init_p is true for the initial
3533 invocation, which initializes global variables, and false for all
3534 subsequent invocations.
3535
3536
3537 Various global state information is assumed to be valid. This
3538 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3539 default CPU specified at build configure time, TARGET_DEFAULT,
3540 representing the default set of option flags for the default
3541 target, and global_options_set.x_rs6000_isa_flags, representing
3542 which options were requested on the command line.
3543
3544 Upon return from this function:
3545
3546 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3547 was set by name on the command line. Additionally, if certain
3548 attributes are automatically enabled or disabled by this function
3549 in order to assure compatibility between options and
3550 configuration, the flags associated with those attributes are
3551 also set. By setting these "explicit bits", we avoid the risk
3552 that other code might accidentally overwrite these particular
3553 attributes with "default values".
3554
3555 The various bits of rs6000_isa_flags are set to indicate the
3556 target options that have been selected for the most current
3557 compilation efforts. This has the effect of also turning on the
3558 associated TARGET_XXX values since these are macros which are
3559 generally defined to test the corresponding bit of the
3560 rs6000_isa_flags variable.
3561
3562 The variable rs6000_builtin_mask is set to represent the target
3563 options for the most current compilation efforts, consistent with
3564 the current contents of rs6000_isa_flags. This variable controls
3565 expansion of built-in functions.
3566
3567 Various other global variables and fields of global structures
3568 (over 50 in all) are initialized to reflect the desired options
3569 for the most current compilation efforts. */
3570
3571 static bool
3572 rs6000_option_override_internal (bool global_init_p)
3573 {
3574 bool ret = true;
3575
3576 HOST_WIDE_INT set_masks;
3577 HOST_WIDE_INT ignore_masks;
3578 int cpu_index = -1;
3579 int tune_index;
3580 struct cl_target_option *main_target_opt
3581 = ((global_init_p || target_option_default_node == NULL)
3582 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3583
3584 /* Print defaults. */
3585 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3586 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3587
3588 /* Remember the explicit arguments. */
3589 if (global_init_p)
3590 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3591
3592 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3593 library functions, so warn about it. The flag may be useful for
3594 performance studies from time to time though, so don't disable it
3595 entirely. */
3596 if (global_options_set.x_rs6000_alignment_flags
3597 && rs6000_alignment_flags == MASK_ALIGN_POWER
3598 && DEFAULT_ABI == ABI_DARWIN
3599 && TARGET_64BIT)
3600 warning (0, "%qs is not supported for 64-bit Darwin;"
3601 " it is incompatible with the installed C and C++ libraries",
3602 "-malign-power");
3603
3604 /* Numerous experiment shows that IRA based loop pressure
3605 calculation works better for RTL loop invariant motion on targets
3606 with enough (>= 32) registers. It is an expensive optimization.
3607 So it is on only for peak performance. */
3608 if (optimize >= 3 && global_init_p
3609 && !global_options_set.x_flag_ira_loop_pressure)
3610 flag_ira_loop_pressure = 1;
3611
3612 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3613 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3614 options were already specified. */
3615 if (flag_sanitize & SANITIZE_USER_ADDRESS
3616 && !global_options_set.x_flag_asynchronous_unwind_tables)
3617 flag_asynchronous_unwind_tables = 1;
3618
3619 /* Set the pointer size. */
3620 if (TARGET_64BIT)
3621 {
3622 rs6000_pmode = DImode;
3623 rs6000_pointer_size = 64;
3624 }
3625 else
3626 {
3627 rs6000_pmode = SImode;
3628 rs6000_pointer_size = 32;
3629 }
3630
3631 /* Some OSs don't support saving the high part of 64-bit registers on context
3632 switch. Other OSs don't support saving Altivec registers. On those OSs,
3633 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3634 if the user wants either, the user must explicitly specify them and we
3635 won't interfere with the user's specification. */
3636
3637 set_masks = POWERPC_MASKS;
3638 #ifdef OS_MISSING_POWERPC64
3639 if (OS_MISSING_POWERPC64)
3640 set_masks &= ~OPTION_MASK_POWERPC64;
3641 #endif
3642 #ifdef OS_MISSING_ALTIVEC
3643 if (OS_MISSING_ALTIVEC)
3644 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3645 | OTHER_VSX_VECTOR_MASKS);
3646 #endif
3647
3648 /* Don't override by the processor default if given explicitly. */
3649 set_masks &= ~rs6000_isa_flags_explicit;
3650
3651 if (global_init_p && rs6000_dejagnu_cpu_index >= 0)
3652 rs6000_cpu_index = rs6000_dejagnu_cpu_index;
3653
3654 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3655 the cpu in a target attribute or pragma, but did not specify a tuning
3656 option, use the cpu for the tuning option rather than the option specified
3657 with -mtune on the command line. Process a '--with-cpu' configuration
3658 request as an implicit --cpu. */
3659 if (rs6000_cpu_index >= 0)
3660 cpu_index = rs6000_cpu_index;
3661 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3662 cpu_index = main_target_opt->x_rs6000_cpu_index;
3663 else if (OPTION_TARGET_CPU_DEFAULT)
3664 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3665
3666 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3667 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3668 with those from the cpu, except for options that were explicitly set. If
3669 we don't have a cpu, do not override the target bits set in
3670 TARGET_DEFAULT. */
3671 if (cpu_index >= 0)
3672 {
3673 rs6000_cpu_index = cpu_index;
3674 rs6000_isa_flags &= ~set_masks;
3675 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3676 & set_masks);
3677 }
3678 else
3679 {
3680 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3681 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3682 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3683 to using rs6000_isa_flags, we need to do the initialization here.
3684
3685 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3686 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3687 HOST_WIDE_INT flags;
3688 if (TARGET_DEFAULT)
3689 flags = TARGET_DEFAULT;
3690 else
3691 {
3692 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3693 const char *default_cpu = (!TARGET_POWERPC64
3694 ? "powerpc"
3695 : (BYTES_BIG_ENDIAN
3696 ? "powerpc64"
3697 : "powerpc64le"));
3698 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3699 flags = processor_target_table[default_cpu_index].target_enable;
3700 }
3701 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3702 }
3703
3704 if (rs6000_tune_index >= 0)
3705 tune_index = rs6000_tune_index;
3706 else if (cpu_index >= 0)
3707 rs6000_tune_index = tune_index = cpu_index;
3708 else
3709 {
3710 size_t i;
3711 enum processor_type tune_proc
3712 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3713
3714 tune_index = -1;
3715 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3716 if (processor_target_table[i].processor == tune_proc)
3717 {
3718 tune_index = i;
3719 break;
3720 }
3721 }
3722
3723 if (cpu_index >= 0)
3724 rs6000_cpu = processor_target_table[cpu_index].processor;
3725 else
3726 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3727
3728 gcc_assert (tune_index >= 0);
3729 rs6000_tune = processor_target_table[tune_index].processor;
3730
3731 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3732 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3733 || rs6000_cpu == PROCESSOR_PPCE5500)
3734 {
3735 if (TARGET_ALTIVEC)
3736 error ("AltiVec not supported in this target");
3737 }
3738
3739 /* If we are optimizing big endian systems for space, use the load/store
3740 multiple instructions. */
3741 if (BYTES_BIG_ENDIAN && optimize_size)
3742 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3743
3744 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3745 because the hardware doesn't support the instructions used in little
3746 endian mode, and causes an alignment trap. The 750 does not cause an
3747 alignment trap (except when the target is unaligned). */
3748
3749 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3750 {
3751 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3752 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3753 warning (0, "%qs is not supported on little endian systems",
3754 "-mmultiple");
3755 }
3756
3757 /* If little-endian, default to -mstrict-align on older processors.
3758 Testing for htm matches power8 and later. */
3759 if (!BYTES_BIG_ENDIAN
3760 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3761 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3762
3763 if (!rs6000_fold_gimple)
3764 fprintf (stderr,
3765 "gimple folding of rs6000 builtins has been disabled.\n");
3766
3767 /* Add some warnings for VSX. */
3768 if (TARGET_VSX)
3769 {
3770 const char *msg = NULL;
3771 if (!TARGET_HARD_FLOAT)
3772 {
3773 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3774 msg = N_("%<-mvsx%> requires hardware floating point");
3775 else
3776 {
3777 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3778 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3779 }
3780 }
3781 else if (TARGET_AVOID_XFORM > 0)
3782 msg = N_("%<-mvsx%> needs indexed addressing");
3783 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3784 & OPTION_MASK_ALTIVEC))
3785 {
3786 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3787 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3788 else
3789 msg = N_("%<-mno-altivec%> disables vsx");
3790 }
3791
3792 if (msg)
3793 {
3794 warning (0, msg);
3795 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3796 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3797 }
3798 }
3799
3800 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3801 the -mcpu setting to enable options that conflict. */
3802 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3803 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3804 | OPTION_MASK_ALTIVEC
3805 | OPTION_MASK_VSX)) != 0)
3806 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3807 | OPTION_MASK_DIRECT_MOVE)
3808 & ~rs6000_isa_flags_explicit);
3809
3810 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3811 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3812
3813 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3814 off all of the options that depend on those flags. */
3815 ignore_masks = rs6000_disable_incompatible_switches ();
3816
3817 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3818 unless the user explicitly used the -mno-<option> to disable the code. */
3819 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3820 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3821 else if (TARGET_P9_MINMAX)
3822 {
3823 if (cpu_index >= 0)
3824 {
3825 if (cpu_index == PROCESSOR_POWER9)
3826 {
3827 /* legacy behavior: allow -mcpu=power9 with certain
3828 capabilities explicitly disabled. */
3829 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3830 }
3831 else
3832 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3833 "for <xxx> less than power9", "-mcpu");
3834 }
3835 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3836 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3837 & rs6000_isa_flags_explicit))
3838 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3839 were explicitly cleared. */
3840 error ("%qs incompatible with explicitly disabled options",
3841 "-mpower9-minmax");
3842 else
3843 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3844 }
3845 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3846 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3847 else if (TARGET_VSX)
3848 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3849 else if (TARGET_POPCNTD)
3850 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3851 else if (TARGET_DFP)
3852 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3853 else if (TARGET_CMPB)
3854 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3855 else if (TARGET_FPRND)
3856 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3857 else if (TARGET_POPCNTB)
3858 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3859 else if (TARGET_ALTIVEC)
3860 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3861
3862 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3863 {
3864 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3865 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3866 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3867 }
3868
3869 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3870 {
3871 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3872 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3873 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3874 }
3875
3876 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3877 {
3878 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3879 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3880 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3881 }
3882
3883 if (TARGET_P8_VECTOR && !TARGET_VSX)
3884 {
3885 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3886 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3887 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3888 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3889 {
3890 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3891 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3892 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3893 }
3894 else
3895 {
3896 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3897 not explicit. */
3898 rs6000_isa_flags |= OPTION_MASK_VSX;
3899 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3900 }
3901 }
3902
3903 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3904 {
3905 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3906 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3907 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3908 }
3909
3910 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3911 silently turn off quad memory mode. */
3912 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3913 {
3914 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3915 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3916
3917 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3918 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3919
3920 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3921 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3922 }
3923
3924 /* Non-atomic quad memory load/store are disabled for little endian, since
3925 the words are reversed, but atomic operations can still be done by
3926 swapping the words. */
3927 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3928 {
3929 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3930 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3931 "mode"));
3932
3933 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3934 }
3935
3936 /* Assume if the user asked for normal quad memory instructions, they want
3937 the atomic versions as well, unless they explicity told us not to use quad
3938 word atomic instructions. */
3939 if (TARGET_QUAD_MEMORY
3940 && !TARGET_QUAD_MEMORY_ATOMIC
3941 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3942 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3943
3944 /* If we can shrink-wrap the TOC register save separately, then use
3945 -msave-toc-indirect unless explicitly disabled. */
3946 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3947 && flag_shrink_wrap_separate
3948 && optimize_function_for_speed_p (cfun))
3949 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3950
3951 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3952 generating power8 instructions. Power9 does not optimize power8 fusion
3953 cases. */
3954 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3955 {
3956 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3957 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3958 else
3959 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3960 }
3961
3962 /* Setting additional fusion flags turns on base fusion. */
3963 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3964 {
3965 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3966 {
3967 if (TARGET_P8_FUSION_SIGN)
3968 error ("%qs requires %qs", "-mpower8-fusion-sign",
3969 "-mpower8-fusion");
3970
3971 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3972 }
3973 else
3974 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3975 }
3976
3977 /* Power8 does not fuse sign extended loads with the addis. If we are
3978 optimizing at high levels for speed, convert a sign extended load into a
3979 zero extending load, and an explicit sign extension. */
3980 if (TARGET_P8_FUSION
3981 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3982 && optimize_function_for_speed_p (cfun)
3983 && optimize >= 3)
3984 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3985
3986 /* ISA 3.0 vector instructions include ISA 2.07. */
3987 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3988 {
3989 /* We prefer to not mention undocumented options in
3990 error messages. However, if users have managed to select
3991 power9-vector without selecting power8-vector, they
3992 already know about undocumented flags. */
3993 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3994 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3995 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3996 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3997 {
3998 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3999 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4000 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4001 }
4002 else
4003 {
4004 /* OPTION_MASK_P9_VECTOR is explicit and
4005 OPTION_MASK_P8_VECTOR is not explicit. */
4006 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4007 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4008 }
4009 }
4010
4011 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4012 support. If we only have ISA 2.06 support, and the user did not specify
4013 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4014 but we don't enable the full vectorization support */
4015 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4016 TARGET_ALLOW_MOVMISALIGN = 1;
4017
4018 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4019 {
4020 if (TARGET_ALLOW_MOVMISALIGN > 0
4021 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4022 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4023
4024 TARGET_ALLOW_MOVMISALIGN = 0;
4025 }
4026
4027 /* Determine when unaligned vector accesses are permitted, and when
4028 they are preferred over masked Altivec loads. Note that if
4029 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4030 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4031 not true. */
4032 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4033 {
4034 if (!TARGET_VSX)
4035 {
4036 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4037 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4038
4039 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4040 }
4041
4042 else if (!TARGET_ALLOW_MOVMISALIGN)
4043 {
4044 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4045 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4046 "-mallow-movmisalign");
4047
4048 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4049 }
4050 }
4051
4052 /* Use long double size to select the appropriate long double. We use
4053 TYPE_PRECISION to differentiate the 3 different long double types. We map
4054 128 into the precision used for TFmode. */
4055 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4056 ? 64
4057 : FLOAT_PRECISION_TFmode);
4058
4059 /* Set long double size before the IEEE 128-bit tests. */
4060 if (!global_options_set.x_rs6000_long_double_type_size)
4061 {
4062 if (main_target_opt != NULL
4063 && (main_target_opt->x_rs6000_long_double_type_size
4064 != default_long_double_size))
4065 error ("target attribute or pragma changes %<long double%> size");
4066 else
4067 rs6000_long_double_type_size = default_long_double_size;
4068 }
4069 else if (rs6000_long_double_type_size == 128)
4070 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4071 else if (global_options_set.x_rs6000_ieeequad)
4072 {
4073 if (global_options.x_rs6000_ieeequad)
4074 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4075 else
4076 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4077 }
4078
4079 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4080 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4081 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4082 those systems will not pick up this default. Warn if the user changes the
4083 default unless -Wno-psabi. */
4084 if (!global_options_set.x_rs6000_ieeequad)
4085 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4086
4087 else
4088 {
4089 if (global_options.x_rs6000_ieeequad
4090 && (!TARGET_POPCNTD || !TARGET_VSX))
4091 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4092
4093 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4094 {
4095 static bool warned_change_long_double;
4096 if (!warned_change_long_double)
4097 {
4098 warned_change_long_double = true;
4099 if (TARGET_IEEEQUAD)
4100 warning (OPT_Wpsabi, "Using IEEE extended precision "
4101 "%<long double%>");
4102 else
4103 warning (OPT_Wpsabi, "Using IBM extended precision "
4104 "%<long double%>");
4105 }
4106 }
4107 }
4108
4109 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4110 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4111 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4112 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4113 the keyword as well as the type. */
4114 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4115
4116 /* IEEE 128-bit floating point requires VSX support. */
4117 if (TARGET_FLOAT128_KEYWORD)
4118 {
4119 if (!TARGET_VSX)
4120 {
4121 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4122 error ("%qs requires VSX support", "%<-mfloat128%>");
4123
4124 TARGET_FLOAT128_TYPE = 0;
4125 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4126 | OPTION_MASK_FLOAT128_HW);
4127 }
4128 else if (!TARGET_FLOAT128_TYPE)
4129 {
4130 TARGET_FLOAT128_TYPE = 1;
4131 warning (0, "The %<-mfloat128%> option may not be fully supported");
4132 }
4133 }
4134
4135 /* Enable the __float128 keyword under Linux by default. */
4136 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4137 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4138 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4139
4140 /* If we have are supporting the float128 type and full ISA 3.0 support,
4141 enable -mfloat128-hardware by default. However, don't enable the
4142 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4143 because sometimes the compiler wants to put things in an integer
4144 container, and if we don't have __int128 support, it is impossible. */
4145 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4146 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4147 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4148 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4149
4150 if (TARGET_FLOAT128_HW
4151 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4152 {
4153 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4154 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4155
4156 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4157 }
4158
4159 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4160 {
4161 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4162 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4163
4164 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4165 }
4166
4167 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4168 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4169 {
4170 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4171 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4172 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4173 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4174
4175 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4176 }
4177
4178 /* -mpcrel requires prefixed load/store addressing. */
4179 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4180 {
4181 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4182 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4183
4184 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4185 }
4186
4187 /* Print the options after updating the defaults. */
4188 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4189 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4190
4191 /* E500mc does "better" if we inline more aggressively. Respect the
4192 user's opinion, though. */
4193 if (rs6000_block_move_inline_limit == 0
4194 && (rs6000_tune == PROCESSOR_PPCE500MC
4195 || rs6000_tune == PROCESSOR_PPCE500MC64
4196 || rs6000_tune == PROCESSOR_PPCE5500
4197 || rs6000_tune == PROCESSOR_PPCE6500))
4198 rs6000_block_move_inline_limit = 128;
4199
4200 /* store_one_arg depends on expand_block_move to handle at least the
4201 size of reg_parm_stack_space. */
4202 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4203 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4204
4205 if (global_init_p)
4206 {
4207 /* If the appropriate debug option is enabled, replace the target hooks
4208 with debug versions that call the real version and then prints
4209 debugging information. */
4210 if (TARGET_DEBUG_COST)
4211 {
4212 targetm.rtx_costs = rs6000_debug_rtx_costs;
4213 targetm.address_cost = rs6000_debug_address_cost;
4214 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4215 }
4216
4217 if (TARGET_DEBUG_ADDR)
4218 {
4219 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4220 targetm.legitimize_address = rs6000_debug_legitimize_address;
4221 rs6000_secondary_reload_class_ptr
4222 = rs6000_debug_secondary_reload_class;
4223 targetm.secondary_memory_needed
4224 = rs6000_debug_secondary_memory_needed;
4225 targetm.can_change_mode_class
4226 = rs6000_debug_can_change_mode_class;
4227 rs6000_preferred_reload_class_ptr
4228 = rs6000_debug_preferred_reload_class;
4229 rs6000_mode_dependent_address_ptr
4230 = rs6000_debug_mode_dependent_address;
4231 }
4232
4233 if (rs6000_veclibabi_name)
4234 {
4235 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4236 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4237 else
4238 {
4239 error ("unknown vectorization library ABI type (%qs) for "
4240 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4241 ret = false;
4242 }
4243 }
4244 }
4245
4246 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4247 target attribute or pragma which automatically enables both options,
4248 unless the altivec ABI was set. This is set by default for 64-bit, but
4249 not for 32-bit. */
4250 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4251 {
4252 TARGET_FLOAT128_TYPE = 0;
4253 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4254 | OPTION_MASK_FLOAT128_KEYWORD)
4255 & ~rs6000_isa_flags_explicit);
4256 }
4257
4258 /* Enable Altivec ABI for AIX -maltivec. */
4259 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4260 {
4261 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4262 error ("target attribute or pragma changes AltiVec ABI");
4263 else
4264 rs6000_altivec_abi = 1;
4265 }
4266
4267 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4268 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4269 be explicitly overridden in either case. */
4270 if (TARGET_ELF)
4271 {
4272 if (!global_options_set.x_rs6000_altivec_abi
4273 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4274 {
4275 if (main_target_opt != NULL &&
4276 !main_target_opt->x_rs6000_altivec_abi)
4277 error ("target attribute or pragma changes AltiVec ABI");
4278 else
4279 rs6000_altivec_abi = 1;
4280 }
4281 }
4282
4283 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4284 So far, the only darwin64 targets are also MACH-O. */
4285 if (TARGET_MACHO
4286 && DEFAULT_ABI == ABI_DARWIN
4287 && TARGET_64BIT)
4288 {
4289 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4290 error ("target attribute or pragma changes darwin64 ABI");
4291 else
4292 {
4293 rs6000_darwin64_abi = 1;
4294 /* Default to natural alignment, for better performance. */
4295 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4296 }
4297 }
4298
4299 /* Place FP constants in the constant pool instead of TOC
4300 if section anchors enabled. */
4301 if (flag_section_anchors
4302 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4303 TARGET_NO_FP_IN_TOC = 1;
4304
4305 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4306 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4307
4308 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4309 SUBTARGET_OVERRIDE_OPTIONS;
4310 #endif
4311 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4312 SUBSUBTARGET_OVERRIDE_OPTIONS;
4313 #endif
4314 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4315 SUB3TARGET_OVERRIDE_OPTIONS;
4316 #endif
4317
4318 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4319 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4320
4321 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4322 && rs6000_tune != PROCESSOR_POWER5
4323 && rs6000_tune != PROCESSOR_POWER6
4324 && rs6000_tune != PROCESSOR_POWER7
4325 && rs6000_tune != PROCESSOR_POWER8
4326 && rs6000_tune != PROCESSOR_POWER9
4327 && rs6000_tune != PROCESSOR_FUTURE
4328 && rs6000_tune != PROCESSOR_PPCA2
4329 && rs6000_tune != PROCESSOR_CELL
4330 && rs6000_tune != PROCESSOR_PPC476);
4331 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4332 || rs6000_tune == PROCESSOR_POWER5
4333 || rs6000_tune == PROCESSOR_POWER7
4334 || rs6000_tune == PROCESSOR_POWER8);
4335 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4336 || rs6000_tune == PROCESSOR_POWER5
4337 || rs6000_tune == PROCESSOR_POWER6
4338 || rs6000_tune == PROCESSOR_POWER7
4339 || rs6000_tune == PROCESSOR_POWER8
4340 || rs6000_tune == PROCESSOR_POWER9
4341 || rs6000_tune == PROCESSOR_FUTURE
4342 || rs6000_tune == PROCESSOR_PPCE500MC
4343 || rs6000_tune == PROCESSOR_PPCE500MC64
4344 || rs6000_tune == PROCESSOR_PPCE5500
4345 || rs6000_tune == PROCESSOR_PPCE6500);
4346
4347 /* Allow debug switches to override the above settings. These are set to -1
4348 in rs6000.opt to indicate the user hasn't directly set the switch. */
4349 if (TARGET_ALWAYS_HINT >= 0)
4350 rs6000_always_hint = TARGET_ALWAYS_HINT;
4351
4352 if (TARGET_SCHED_GROUPS >= 0)
4353 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4354
4355 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4356 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4357
4358 rs6000_sched_restricted_insns_priority
4359 = (rs6000_sched_groups ? 1 : 0);
4360
4361 /* Handle -msched-costly-dep option. */
4362 rs6000_sched_costly_dep
4363 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4364
4365 if (rs6000_sched_costly_dep_str)
4366 {
4367 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4368 rs6000_sched_costly_dep = no_dep_costly;
4369 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4370 rs6000_sched_costly_dep = all_deps_costly;
4371 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4372 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4373 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4374 rs6000_sched_costly_dep = store_to_load_dep_costly;
4375 else
4376 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4377 atoi (rs6000_sched_costly_dep_str));
4378 }
4379
4380 /* Handle -minsert-sched-nops option. */
4381 rs6000_sched_insert_nops
4382 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4383
4384 if (rs6000_sched_insert_nops_str)
4385 {
4386 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4387 rs6000_sched_insert_nops = sched_finish_none;
4388 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4389 rs6000_sched_insert_nops = sched_finish_pad_groups;
4390 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4391 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4392 else
4393 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4394 atoi (rs6000_sched_insert_nops_str));
4395 }
4396
4397 /* Handle stack protector */
4398 if (!global_options_set.x_rs6000_stack_protector_guard)
4399 #ifdef TARGET_THREAD_SSP_OFFSET
4400 rs6000_stack_protector_guard = SSP_TLS;
4401 #else
4402 rs6000_stack_protector_guard = SSP_GLOBAL;
4403 #endif
4404
4405 #ifdef TARGET_THREAD_SSP_OFFSET
4406 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4407 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4408 #endif
4409
4410 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4411 {
4412 char *endp;
4413 const char *str = rs6000_stack_protector_guard_offset_str;
4414
4415 errno = 0;
4416 long offset = strtol (str, &endp, 0);
4417 if (!*str || *endp || errno)
4418 error ("%qs is not a valid number in %qs", str,
4419 "-mstack-protector-guard-offset=");
4420
4421 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4422 || (TARGET_64BIT && (offset & 3)))
4423 error ("%qs is not a valid offset in %qs", str,
4424 "-mstack-protector-guard-offset=");
4425
4426 rs6000_stack_protector_guard_offset = offset;
4427 }
4428
4429 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4430 {
4431 const char *str = rs6000_stack_protector_guard_reg_str;
4432 int reg = decode_reg_name (str);
4433
4434 if (!IN_RANGE (reg, 1, 31))
4435 error ("%qs is not a valid base register in %qs", str,
4436 "-mstack-protector-guard-reg=");
4437
4438 rs6000_stack_protector_guard_reg = reg;
4439 }
4440
4441 if (rs6000_stack_protector_guard == SSP_TLS
4442 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4443 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4444
4445 if (global_init_p)
4446 {
4447 #ifdef TARGET_REGNAMES
4448 /* If the user desires alternate register names, copy in the
4449 alternate names now. */
4450 if (TARGET_REGNAMES)
4451 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4452 #endif
4453
4454 /* Set aix_struct_return last, after the ABI is determined.
4455 If -maix-struct-return or -msvr4-struct-return was explicitly
4456 used, don't override with the ABI default. */
4457 if (!global_options_set.x_aix_struct_return)
4458 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4459
4460 #if 0
4461 /* IBM XL compiler defaults to unsigned bitfields. */
4462 if (TARGET_XL_COMPAT)
4463 flag_signed_bitfields = 0;
4464 #endif
4465
4466 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4467 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4468
4469 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4470
4471 /* We can only guarantee the availability of DI pseudo-ops when
4472 assembling for 64-bit targets. */
4473 if (!TARGET_64BIT)
4474 {
4475 targetm.asm_out.aligned_op.di = NULL;
4476 targetm.asm_out.unaligned_op.di = NULL;
4477 }
4478
4479
4480 /* Set branch target alignment, if not optimizing for size. */
4481 if (!optimize_size)
4482 {
4483 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4484 aligned 8byte to avoid misprediction by the branch predictor. */
4485 if (rs6000_tune == PROCESSOR_TITAN
4486 || rs6000_tune == PROCESSOR_CELL)
4487 {
4488 if (flag_align_functions && !str_align_functions)
4489 str_align_functions = "8";
4490 if (flag_align_jumps && !str_align_jumps)
4491 str_align_jumps = "8";
4492 if (flag_align_loops && !str_align_loops)
4493 str_align_loops = "8";
4494 }
4495 if (rs6000_align_branch_targets)
4496 {
4497 if (flag_align_functions && !str_align_functions)
4498 str_align_functions = "16";
4499 if (flag_align_jumps && !str_align_jumps)
4500 str_align_jumps = "16";
4501 if (flag_align_loops && !str_align_loops)
4502 {
4503 can_override_loop_align = 1;
4504 str_align_loops = "16";
4505 }
4506 }
4507
4508 if (flag_align_jumps && !str_align_jumps)
4509 str_align_jumps = "16";
4510 if (flag_align_loops && !str_align_loops)
4511 str_align_loops = "16";
4512 }
4513
4514 /* Arrange to save and restore machine status around nested functions. */
4515 init_machine_status = rs6000_init_machine_status;
4516
4517 /* We should always be splitting complex arguments, but we can't break
4518 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4519 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4520 targetm.calls.split_complex_arg = NULL;
4521
4522 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4523 if (DEFAULT_ABI == ABI_AIX)
4524 targetm.calls.custom_function_descriptors = 0;
4525 }
4526
4527 /* Initialize rs6000_cost with the appropriate target costs. */
4528 if (optimize_size)
4529 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4530 else
4531 switch (rs6000_tune)
4532 {
4533 case PROCESSOR_RS64A:
4534 rs6000_cost = &rs64a_cost;
4535 break;
4536
4537 case PROCESSOR_MPCCORE:
4538 rs6000_cost = &mpccore_cost;
4539 break;
4540
4541 case PROCESSOR_PPC403:
4542 rs6000_cost = &ppc403_cost;
4543 break;
4544
4545 case PROCESSOR_PPC405:
4546 rs6000_cost = &ppc405_cost;
4547 break;
4548
4549 case PROCESSOR_PPC440:
4550 rs6000_cost = &ppc440_cost;
4551 break;
4552
4553 case PROCESSOR_PPC476:
4554 rs6000_cost = &ppc476_cost;
4555 break;
4556
4557 case PROCESSOR_PPC601:
4558 rs6000_cost = &ppc601_cost;
4559 break;
4560
4561 case PROCESSOR_PPC603:
4562 rs6000_cost = &ppc603_cost;
4563 break;
4564
4565 case PROCESSOR_PPC604:
4566 rs6000_cost = &ppc604_cost;
4567 break;
4568
4569 case PROCESSOR_PPC604e:
4570 rs6000_cost = &ppc604e_cost;
4571 break;
4572
4573 case PROCESSOR_PPC620:
4574 rs6000_cost = &ppc620_cost;
4575 break;
4576
4577 case PROCESSOR_PPC630:
4578 rs6000_cost = &ppc630_cost;
4579 break;
4580
4581 case PROCESSOR_CELL:
4582 rs6000_cost = &ppccell_cost;
4583 break;
4584
4585 case PROCESSOR_PPC750:
4586 case PROCESSOR_PPC7400:
4587 rs6000_cost = &ppc750_cost;
4588 break;
4589
4590 case PROCESSOR_PPC7450:
4591 rs6000_cost = &ppc7450_cost;
4592 break;
4593
4594 case PROCESSOR_PPC8540:
4595 case PROCESSOR_PPC8548:
4596 rs6000_cost = &ppc8540_cost;
4597 break;
4598
4599 case PROCESSOR_PPCE300C2:
4600 case PROCESSOR_PPCE300C3:
4601 rs6000_cost = &ppce300c2c3_cost;
4602 break;
4603
4604 case PROCESSOR_PPCE500MC:
4605 rs6000_cost = &ppce500mc_cost;
4606 break;
4607
4608 case PROCESSOR_PPCE500MC64:
4609 rs6000_cost = &ppce500mc64_cost;
4610 break;
4611
4612 case PROCESSOR_PPCE5500:
4613 rs6000_cost = &ppce5500_cost;
4614 break;
4615
4616 case PROCESSOR_PPCE6500:
4617 rs6000_cost = &ppce6500_cost;
4618 break;
4619
4620 case PROCESSOR_TITAN:
4621 rs6000_cost = &titan_cost;
4622 break;
4623
4624 case PROCESSOR_POWER4:
4625 case PROCESSOR_POWER5:
4626 rs6000_cost = &power4_cost;
4627 break;
4628
4629 case PROCESSOR_POWER6:
4630 rs6000_cost = &power6_cost;
4631 break;
4632
4633 case PROCESSOR_POWER7:
4634 rs6000_cost = &power7_cost;
4635 break;
4636
4637 case PROCESSOR_POWER8:
4638 rs6000_cost = &power8_cost;
4639 break;
4640
4641 case PROCESSOR_POWER9:
4642 case PROCESSOR_FUTURE:
4643 rs6000_cost = &power9_cost;
4644 break;
4645
4646 case PROCESSOR_PPCA2:
4647 rs6000_cost = &ppca2_cost;
4648 break;
4649
4650 default:
4651 gcc_unreachable ();
4652 }
4653
4654 if (global_init_p)
4655 {
4656 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4657 rs6000_cost->simultaneous_prefetches,
4658 global_options.x_param_values,
4659 global_options_set.x_param_values);
4660 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4661 global_options.x_param_values,
4662 global_options_set.x_param_values);
4663 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4664 rs6000_cost->cache_line_size,
4665 global_options.x_param_values,
4666 global_options_set.x_param_values);
4667 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4668 global_options.x_param_values,
4669 global_options_set.x_param_values);
4670
4671 /* Increase loop peeling limits based on performance analysis. */
4672 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4673 global_options.x_param_values,
4674 global_options_set.x_param_values);
4675 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4676 global_options.x_param_values,
4677 global_options_set.x_param_values);
4678
4679 /* Use the 'model' -fsched-pressure algorithm by default. */
4680 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4681 SCHED_PRESSURE_MODEL,
4682 global_options.x_param_values,
4683 global_options_set.x_param_values);
4684
4685 /* If using typedef char *va_list, signal that
4686 __builtin_va_start (&ap, 0) can be optimized to
4687 ap = __builtin_next_arg (0). */
4688 if (DEFAULT_ABI != ABI_V4)
4689 targetm.expand_builtin_va_start = NULL;
4690 }
4691
4692 /* If not explicitly specified via option, decide whether to generate indexed
4693 load/store instructions. A value of -1 indicates that the
4694 initial value of this variable has not been overwritten. During
4695 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4696 if (TARGET_AVOID_XFORM == -1)
4697 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4698 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4699 need indexed accesses and the type used is the scalar type of the element
4700 being loaded or stored. */
4701 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4702 && !TARGET_ALTIVEC);
4703
4704 /* Set the -mrecip options. */
4705 if (rs6000_recip_name)
4706 {
4707 char *p = ASTRDUP (rs6000_recip_name);
4708 char *q;
4709 unsigned int mask, i;
4710 bool invert;
4711
4712 while ((q = strtok (p, ",")) != NULL)
4713 {
4714 p = NULL;
4715 if (*q == '!')
4716 {
4717 invert = true;
4718 q++;
4719 }
4720 else
4721 invert = false;
4722
4723 if (!strcmp (q, "default"))
4724 mask = ((TARGET_RECIP_PRECISION)
4725 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4726 else
4727 {
4728 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4729 if (!strcmp (q, recip_options[i].string))
4730 {
4731 mask = recip_options[i].mask;
4732 break;
4733 }
4734
4735 if (i == ARRAY_SIZE (recip_options))
4736 {
4737 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4738 invert = false;
4739 mask = 0;
4740 ret = false;
4741 }
4742 }
4743
4744 if (invert)
4745 rs6000_recip_control &= ~mask;
4746 else
4747 rs6000_recip_control |= mask;
4748 }
4749 }
4750
4751 /* Set the builtin mask of the various options used that could affect which
4752 builtins were used. In the past we used target_flags, but we've run out
4753 of bits, and some options are no longer in target_flags. */
4754 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4755 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4756 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4757 rs6000_builtin_mask);
4758
4759 /* Initialize all of the registers. */
4760 rs6000_init_hard_regno_mode_ok (global_init_p);
4761
4762 /* Save the initial options in case the user does function specific options */
4763 if (global_init_p)
4764 target_option_default_node = target_option_current_node
4765 = build_target_option_node (&global_options);
4766
4767 /* If not explicitly specified via option, decide whether to generate the
4768 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4769 if (TARGET_LINK_STACK == -1)
4770 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4771
4772 /* Deprecate use of -mno-speculate-indirect-jumps. */
4773 if (!rs6000_speculate_indirect_jumps)
4774 warning (0, "%qs is deprecated and not recommended in any circumstances",
4775 "-mno-speculate-indirect-jumps");
4776
4777 return ret;
4778 }
4779
4780 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4781 define the target cpu type. */
4782
4783 static void
4784 rs6000_option_override (void)
4785 {
4786 (void) rs6000_option_override_internal (true);
4787 }
4788
4789 \f
4790 /* Implement targetm.vectorize.builtin_mask_for_load. */
4791 static tree
4792 rs6000_builtin_mask_for_load (void)
4793 {
4794 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4795 if ((TARGET_ALTIVEC && !TARGET_VSX)
4796 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4797 return altivec_builtin_mask_for_load;
4798 else
4799 return 0;
4800 }
4801
4802 /* Implement LOOP_ALIGN. */
4803 align_flags
4804 rs6000_loop_align (rtx label)
4805 {
4806 basic_block bb;
4807 int ninsns;
4808
4809 /* Don't override loop alignment if -falign-loops was specified. */
4810 if (!can_override_loop_align)
4811 return align_loops;
4812
4813 bb = BLOCK_FOR_INSN (label);
4814 ninsns = num_loop_insns(bb->loop_father);
4815
4816 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4817 if (ninsns > 4 && ninsns <= 8
4818 && (rs6000_tune == PROCESSOR_POWER4
4819 || rs6000_tune == PROCESSOR_POWER5
4820 || rs6000_tune == PROCESSOR_POWER6
4821 || rs6000_tune == PROCESSOR_POWER7
4822 || rs6000_tune == PROCESSOR_POWER8))
4823 return align_flags (5);
4824 else
4825 return align_loops;
4826 }
4827
4828 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4829 after applying N number of iterations. This routine does not determine
4830 how may iterations are required to reach desired alignment. */
4831
4832 static bool
4833 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4834 {
4835 if (is_packed)
4836 return false;
4837
4838 if (TARGET_32BIT)
4839 {
4840 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4841 return true;
4842
4843 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4844 return true;
4845
4846 return false;
4847 }
4848 else
4849 {
4850 if (TARGET_MACHO)
4851 return false;
4852
4853 /* Assuming that all other types are naturally aligned. CHECKME! */
4854 return true;
4855 }
4856 }
4857
4858 /* Return true if the vector misalignment factor is supported by the
4859 target. */
4860 static bool
4861 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4862 const_tree type,
4863 int misalignment,
4864 bool is_packed)
4865 {
4866 if (TARGET_VSX)
4867 {
4868 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4869 return true;
4870
4871 /* Return if movmisalign pattern is not supported for this mode. */
4872 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4873 return false;
4874
4875 if (misalignment == -1)
4876 {
4877 /* Misalignment factor is unknown at compile time but we know
4878 it's word aligned. */
4879 if (rs6000_vector_alignment_reachable (type, is_packed))
4880 {
4881 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4882
4883 if (element_size == 64 || element_size == 32)
4884 return true;
4885 }
4886
4887 return false;
4888 }
4889
4890 /* VSX supports word-aligned vector. */
4891 if (misalignment % 4 == 0)
4892 return true;
4893 }
4894 return false;
4895 }
4896
4897 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4898 static int
4899 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4900 tree vectype, int misalign)
4901 {
4902 unsigned elements;
4903 tree elem_type;
4904
4905 switch (type_of_cost)
4906 {
4907 case scalar_stmt:
4908 case scalar_load:
4909 case scalar_store:
4910 case vector_stmt:
4911 case vector_load:
4912 case vector_store:
4913 case vec_to_scalar:
4914 case scalar_to_vec:
4915 case cond_branch_not_taken:
4916 return 1;
4917
4918 case vec_perm:
4919 if (TARGET_VSX)
4920 return 3;
4921 else
4922 return 1;
4923
4924 case vec_promote_demote:
4925 if (TARGET_VSX)
4926 return 4;
4927 else
4928 return 1;
4929
4930 case cond_branch_taken:
4931 return 3;
4932
4933 case unaligned_load:
4934 case vector_gather_load:
4935 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4936 return 1;
4937
4938 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4939 {
4940 elements = TYPE_VECTOR_SUBPARTS (vectype);
4941 if (elements == 2)
4942 /* Double word aligned. */
4943 return 2;
4944
4945 if (elements == 4)
4946 {
4947 switch (misalign)
4948 {
4949 case 8:
4950 /* Double word aligned. */
4951 return 2;
4952
4953 case -1:
4954 /* Unknown misalignment. */
4955 case 4:
4956 case 12:
4957 /* Word aligned. */
4958 return 22;
4959
4960 default:
4961 gcc_unreachable ();
4962 }
4963 }
4964 }
4965
4966 if (TARGET_ALTIVEC)
4967 /* Misaligned loads are not supported. */
4968 gcc_unreachable ();
4969
4970 return 2;
4971
4972 case unaligned_store:
4973 case vector_scatter_store:
4974 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4975 return 1;
4976
4977 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4978 {
4979 elements = TYPE_VECTOR_SUBPARTS (vectype);
4980 if (elements == 2)
4981 /* Double word aligned. */
4982 return 2;
4983
4984 if (elements == 4)
4985 {
4986 switch (misalign)
4987 {
4988 case 8:
4989 /* Double word aligned. */
4990 return 2;
4991
4992 case -1:
4993 /* Unknown misalignment. */
4994 case 4:
4995 case 12:
4996 /* Word aligned. */
4997 return 23;
4998
4999 default:
5000 gcc_unreachable ();
5001 }
5002 }
5003 }
5004
5005 if (TARGET_ALTIVEC)
5006 /* Misaligned stores are not supported. */
5007 gcc_unreachable ();
5008
5009 return 2;
5010
5011 case vec_construct:
5012 /* This is a rough approximation assuming non-constant elements
5013 constructed into a vector via element insertion. FIXME:
5014 vec_construct is not granular enough for uniformly good
5015 decisions. If the initialization is a splat, this is
5016 cheaper than we estimate. Improve this someday. */
5017 elem_type = TREE_TYPE (vectype);
5018 /* 32-bit vectors loaded into registers are stored as double
5019 precision, so we need 2 permutes, 2 converts, and 1 merge
5020 to construct a vector of short floats from them. */
5021 if (SCALAR_FLOAT_TYPE_P (elem_type)
5022 && TYPE_PRECISION (elem_type) == 32)
5023 return 5;
5024 /* On POWER9, integer vector types are built up in GPRs and then
5025 use a direct move (2 cycles). For POWER8 this is even worse,
5026 as we need two direct moves and a merge, and the direct moves
5027 are five cycles. */
5028 else if (INTEGRAL_TYPE_P (elem_type))
5029 {
5030 if (TARGET_P9_VECTOR)
5031 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5032 else
5033 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5034 }
5035 else
5036 /* V2DFmode doesn't need a direct move. */
5037 return 2;
5038
5039 default:
5040 gcc_unreachable ();
5041 }
5042 }
5043
5044 /* Implement targetm.vectorize.preferred_simd_mode. */
5045
5046 static machine_mode
5047 rs6000_preferred_simd_mode (scalar_mode mode)
5048 {
5049 if (TARGET_VSX)
5050 switch (mode)
5051 {
5052 case E_DFmode:
5053 return V2DFmode;
5054 default:;
5055 }
5056 if (TARGET_ALTIVEC || TARGET_VSX)
5057 switch (mode)
5058 {
5059 case E_SFmode:
5060 return V4SFmode;
5061 case E_TImode:
5062 return V1TImode;
5063 case E_DImode:
5064 return V2DImode;
5065 case E_SImode:
5066 return V4SImode;
5067 case E_HImode:
5068 return V8HImode;
5069 case E_QImode:
5070 return V16QImode;
5071 default:;
5072 }
5073 return word_mode;
5074 }
5075
5076 typedef struct _rs6000_cost_data
5077 {
5078 struct loop *loop_info;
5079 unsigned cost[3];
5080 } rs6000_cost_data;
5081
5082 /* Test for likely overcommitment of vector hardware resources. If a
5083 loop iteration is relatively large, and too large a percentage of
5084 instructions in the loop are vectorized, the cost model may not
5085 adequately reflect delays from unavailable vector resources.
5086 Penalize the loop body cost for this case. */
5087
5088 static void
5089 rs6000_density_test (rs6000_cost_data *data)
5090 {
5091 const int DENSITY_PCT_THRESHOLD = 85;
5092 const int DENSITY_SIZE_THRESHOLD = 70;
5093 const int DENSITY_PENALTY = 10;
5094 struct loop *loop = data->loop_info;
5095 basic_block *bbs = get_loop_body (loop);
5096 int nbbs = loop->num_nodes;
5097 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5098 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5099 int i, density_pct;
5100
5101 for (i = 0; i < nbbs; i++)
5102 {
5103 basic_block bb = bbs[i];
5104 gimple_stmt_iterator gsi;
5105
5106 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5107 {
5108 gimple *stmt = gsi_stmt (gsi);
5109 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5110
5111 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5112 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5113 not_vec_cost++;
5114 }
5115 }
5116
5117 free (bbs);
5118 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5119
5120 if (density_pct > DENSITY_PCT_THRESHOLD
5121 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5122 {
5123 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_NOTE, vect_location,
5126 "density %d%%, cost %d exceeds threshold, penalizing "
5127 "loop body cost by %d%%", density_pct,
5128 vec_cost + not_vec_cost, DENSITY_PENALTY);
5129 }
5130 }
5131
5132 /* Implement targetm.vectorize.init_cost. */
5133
5134 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5135 instruction is needed by the vectorization. */
5136 static bool rs6000_vect_nonmem;
5137
5138 static void *
5139 rs6000_init_cost (struct loop *loop_info)
5140 {
5141 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5142 data->loop_info = loop_info;
5143 data->cost[vect_prologue] = 0;
5144 data->cost[vect_body] = 0;
5145 data->cost[vect_epilogue] = 0;
5146 rs6000_vect_nonmem = false;
5147 return data;
5148 }
5149
5150 /* Implement targetm.vectorize.add_stmt_cost. */
5151
5152 static unsigned
5153 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5154 struct _stmt_vec_info *stmt_info, int misalign,
5155 enum vect_cost_model_location where)
5156 {
5157 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5158 unsigned retval = 0;
5159
5160 if (flag_vect_cost_model)
5161 {
5162 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5163 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5164 misalign);
5165 /* Statements in an inner loop relative to the loop being
5166 vectorized are weighted more heavily. The value here is
5167 arbitrary and could potentially be improved with analysis. */
5168 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5169 count *= 50; /* FIXME. */
5170
5171 retval = (unsigned) (count * stmt_cost);
5172 cost_data->cost[where] += retval;
5173
5174 /* Check whether we're doing something other than just a copy loop.
5175 Not all such loops may be profitably vectorized; see
5176 rs6000_finish_cost. */
5177 if ((kind == vec_to_scalar || kind == vec_perm
5178 || kind == vec_promote_demote || kind == vec_construct
5179 || kind == scalar_to_vec)
5180 || (where == vect_body && kind == vector_stmt))
5181 rs6000_vect_nonmem = true;
5182 }
5183
5184 return retval;
5185 }
5186
5187 /* Implement targetm.vectorize.finish_cost. */
5188
5189 static void
5190 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5191 unsigned *body_cost, unsigned *epilogue_cost)
5192 {
5193 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5194
5195 if (cost_data->loop_info)
5196 rs6000_density_test (cost_data);
5197
5198 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5199 that require versioning for any reason. The vectorization is at
5200 best a wash inside the loop, and the versioning checks make
5201 profitability highly unlikely and potentially quite harmful. */
5202 if (cost_data->loop_info)
5203 {
5204 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5205 if (!rs6000_vect_nonmem
5206 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5207 && LOOP_REQUIRES_VERSIONING (vec_info))
5208 cost_data->cost[vect_body] += 10000;
5209 }
5210
5211 *prologue_cost = cost_data->cost[vect_prologue];
5212 *body_cost = cost_data->cost[vect_body];
5213 *epilogue_cost = cost_data->cost[vect_epilogue];
5214 }
5215
5216 /* Implement targetm.vectorize.destroy_cost_data. */
5217
5218 static void
5219 rs6000_destroy_cost_data (void *data)
5220 {
5221 free (data);
5222 }
5223
5224 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5225 library with vectorized intrinsics. */
5226
5227 static tree
5228 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5229 tree type_in)
5230 {
5231 char name[32];
5232 const char *suffix = NULL;
5233 tree fntype, new_fndecl, bdecl = NULL_TREE;
5234 int n_args = 1;
5235 const char *bname;
5236 machine_mode el_mode, in_mode;
5237 int n, in_n;
5238
5239 /* Libmass is suitable for unsafe math only as it does not correctly support
5240 parts of IEEE with the required precision such as denormals. Only support
5241 it if we have VSX to use the simd d2 or f4 functions.
5242 XXX: Add variable length support. */
5243 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5244 return NULL_TREE;
5245
5246 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5247 n = TYPE_VECTOR_SUBPARTS (type_out);
5248 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5249 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5250 if (el_mode != in_mode
5251 || n != in_n)
5252 return NULL_TREE;
5253
5254 switch (fn)
5255 {
5256 CASE_CFN_ATAN2:
5257 CASE_CFN_HYPOT:
5258 CASE_CFN_POW:
5259 n_args = 2;
5260 gcc_fallthrough ();
5261
5262 CASE_CFN_ACOS:
5263 CASE_CFN_ACOSH:
5264 CASE_CFN_ASIN:
5265 CASE_CFN_ASINH:
5266 CASE_CFN_ATAN:
5267 CASE_CFN_ATANH:
5268 CASE_CFN_CBRT:
5269 CASE_CFN_COS:
5270 CASE_CFN_COSH:
5271 CASE_CFN_ERF:
5272 CASE_CFN_ERFC:
5273 CASE_CFN_EXP2:
5274 CASE_CFN_EXP:
5275 CASE_CFN_EXPM1:
5276 CASE_CFN_LGAMMA:
5277 CASE_CFN_LOG10:
5278 CASE_CFN_LOG1P:
5279 CASE_CFN_LOG2:
5280 CASE_CFN_LOG:
5281 CASE_CFN_SIN:
5282 CASE_CFN_SINH:
5283 CASE_CFN_SQRT:
5284 CASE_CFN_TAN:
5285 CASE_CFN_TANH:
5286 if (el_mode == DFmode && n == 2)
5287 {
5288 bdecl = mathfn_built_in (double_type_node, fn);
5289 suffix = "d2"; /* pow -> powd2 */
5290 }
5291 else if (el_mode == SFmode && n == 4)
5292 {
5293 bdecl = mathfn_built_in (float_type_node, fn);
5294 suffix = "4"; /* powf -> powf4 */
5295 }
5296 else
5297 return NULL_TREE;
5298 if (!bdecl)
5299 return NULL_TREE;
5300 break;
5301
5302 default:
5303 return NULL_TREE;
5304 }
5305
5306 gcc_assert (suffix != NULL);
5307 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5308 if (!bname)
5309 return NULL_TREE;
5310
5311 strcpy (name, bname + sizeof ("__builtin_") - 1);
5312 strcat (name, suffix);
5313
5314 if (n_args == 1)
5315 fntype = build_function_type_list (type_out, type_in, NULL);
5316 else if (n_args == 2)
5317 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5318 else
5319 gcc_unreachable ();
5320
5321 /* Build a function declaration for the vectorized function. */
5322 new_fndecl = build_decl (BUILTINS_LOCATION,
5323 FUNCTION_DECL, get_identifier (name), fntype);
5324 TREE_PUBLIC (new_fndecl) = 1;
5325 DECL_EXTERNAL (new_fndecl) = 1;
5326 DECL_IS_NOVOPS (new_fndecl) = 1;
5327 TREE_READONLY (new_fndecl) = 1;
5328
5329 return new_fndecl;
5330 }
5331
5332 /* Returns a function decl for a vectorized version of the builtin function
5333 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5334 if it is not available. */
5335
5336 static tree
5337 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5338 tree type_in)
5339 {
5340 machine_mode in_mode, out_mode;
5341 int in_n, out_n;
5342
5343 if (TARGET_DEBUG_BUILTIN)
5344 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5345 combined_fn_name (combined_fn (fn)),
5346 GET_MODE_NAME (TYPE_MODE (type_out)),
5347 GET_MODE_NAME (TYPE_MODE (type_in)));
5348
5349 if (TREE_CODE (type_out) != VECTOR_TYPE
5350 || TREE_CODE (type_in) != VECTOR_TYPE)
5351 return NULL_TREE;
5352
5353 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5354 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5355 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5356 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5357
5358 switch (fn)
5359 {
5360 CASE_CFN_COPYSIGN:
5361 if (VECTOR_UNIT_VSX_P (V2DFmode)
5362 && out_mode == DFmode && out_n == 2
5363 && in_mode == DFmode && in_n == 2)
5364 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5365 if (VECTOR_UNIT_VSX_P (V4SFmode)
5366 && out_mode == SFmode && out_n == 4
5367 && in_mode == SFmode && in_n == 4)
5368 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5369 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5370 && out_mode == SFmode && out_n == 4
5371 && in_mode == SFmode && in_n == 4)
5372 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5373 break;
5374 CASE_CFN_CEIL:
5375 if (VECTOR_UNIT_VSX_P (V2DFmode)
5376 && out_mode == DFmode && out_n == 2
5377 && in_mode == DFmode && in_n == 2)
5378 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5379 if (VECTOR_UNIT_VSX_P (V4SFmode)
5380 && out_mode == SFmode && out_n == 4
5381 && in_mode == SFmode && in_n == 4)
5382 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5383 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5384 && out_mode == SFmode && out_n == 4
5385 && in_mode == SFmode && in_n == 4)
5386 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5387 break;
5388 CASE_CFN_FLOOR:
5389 if (VECTOR_UNIT_VSX_P (V2DFmode)
5390 && out_mode == DFmode && out_n == 2
5391 && in_mode == DFmode && in_n == 2)
5392 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5393 if (VECTOR_UNIT_VSX_P (V4SFmode)
5394 && out_mode == SFmode && out_n == 4
5395 && in_mode == SFmode && in_n == 4)
5396 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5397 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5398 && out_mode == SFmode && out_n == 4
5399 && in_mode == SFmode && in_n == 4)
5400 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5401 break;
5402 CASE_CFN_FMA:
5403 if (VECTOR_UNIT_VSX_P (V2DFmode)
5404 && out_mode == DFmode && out_n == 2
5405 && in_mode == DFmode && in_n == 2)
5406 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5407 if (VECTOR_UNIT_VSX_P (V4SFmode)
5408 && out_mode == SFmode && out_n == 4
5409 && in_mode == SFmode && in_n == 4)
5410 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5411 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5412 && out_mode == SFmode && out_n == 4
5413 && in_mode == SFmode && in_n == 4)
5414 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5415 break;
5416 CASE_CFN_TRUNC:
5417 if (VECTOR_UNIT_VSX_P (V2DFmode)
5418 && out_mode == DFmode && out_n == 2
5419 && in_mode == DFmode && in_n == 2)
5420 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5421 if (VECTOR_UNIT_VSX_P (V4SFmode)
5422 && out_mode == SFmode && out_n == 4
5423 && in_mode == SFmode && in_n == 4)
5424 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5425 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5426 && out_mode == SFmode && out_n == 4
5427 && in_mode == SFmode && in_n == 4)
5428 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5429 break;
5430 CASE_CFN_NEARBYINT:
5431 if (VECTOR_UNIT_VSX_P (V2DFmode)
5432 && flag_unsafe_math_optimizations
5433 && out_mode == DFmode && out_n == 2
5434 && in_mode == DFmode && in_n == 2)
5435 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5436 if (VECTOR_UNIT_VSX_P (V4SFmode)
5437 && flag_unsafe_math_optimizations
5438 && out_mode == SFmode && out_n == 4
5439 && in_mode == SFmode && in_n == 4)
5440 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5441 break;
5442 CASE_CFN_RINT:
5443 if (VECTOR_UNIT_VSX_P (V2DFmode)
5444 && !flag_trapping_math
5445 && out_mode == DFmode && out_n == 2
5446 && in_mode == DFmode && in_n == 2)
5447 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5448 if (VECTOR_UNIT_VSX_P (V4SFmode)
5449 && !flag_trapping_math
5450 && out_mode == SFmode && out_n == 4
5451 && in_mode == SFmode && in_n == 4)
5452 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5453 break;
5454 default:
5455 break;
5456 }
5457
5458 /* Generate calls to libmass if appropriate. */
5459 if (rs6000_veclib_handler)
5460 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5461
5462 return NULL_TREE;
5463 }
5464
5465 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5466
5467 static tree
5468 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5469 tree type_in)
5470 {
5471 machine_mode in_mode, out_mode;
5472 int in_n, out_n;
5473
5474 if (TARGET_DEBUG_BUILTIN)
5475 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5476 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5477 GET_MODE_NAME (TYPE_MODE (type_out)),
5478 GET_MODE_NAME (TYPE_MODE (type_in)));
5479
5480 if (TREE_CODE (type_out) != VECTOR_TYPE
5481 || TREE_CODE (type_in) != VECTOR_TYPE)
5482 return NULL_TREE;
5483
5484 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5485 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5486 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5487 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5488
5489 enum rs6000_builtins fn
5490 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5491 switch (fn)
5492 {
5493 case RS6000_BUILTIN_RSQRTF:
5494 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5495 && out_mode == SFmode && out_n == 4
5496 && in_mode == SFmode && in_n == 4)
5497 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5498 break;
5499 case RS6000_BUILTIN_RSQRT:
5500 if (VECTOR_UNIT_VSX_P (V2DFmode)
5501 && out_mode == DFmode && out_n == 2
5502 && in_mode == DFmode && in_n == 2)
5503 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5504 break;
5505 case RS6000_BUILTIN_RECIPF:
5506 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5507 && out_mode == SFmode && out_n == 4
5508 && in_mode == SFmode && in_n == 4)
5509 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5510 break;
5511 case RS6000_BUILTIN_RECIP:
5512 if (VECTOR_UNIT_VSX_P (V2DFmode)
5513 && out_mode == DFmode && out_n == 2
5514 && in_mode == DFmode && in_n == 2)
5515 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5516 break;
5517 default:
5518 break;
5519 }
5520 return NULL_TREE;
5521 }
5522 \f
5523 /* Default CPU string for rs6000*_file_start functions. */
5524 static const char *rs6000_default_cpu;
5525
5526 #ifdef USING_ELFOS_H
5527 const char *rs6000_machine;
5528
5529 const char *
5530 rs6000_machine_from_flags (void)
5531 {
5532 if ((rs6000_isa_flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER))
5533 != 0)
5534 return "future";
5535 if ((rs6000_isa_flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5536 return "power9";
5537 if ((rs6000_isa_flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5538 return "power8";
5539 if ((rs6000_isa_flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5540 return "power7";
5541 if ((rs6000_isa_flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5542 return "power6";
5543 if ((rs6000_isa_flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5544 return "power5";
5545 if ((rs6000_isa_flags & ISA_2_1_MASKS) != 0)
5546 return "power4";
5547 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5548 return "ppc64";
5549 return "ppc";
5550 }
5551
5552 void
5553 emit_asm_machine (void)
5554 {
5555 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5556 }
5557 #endif
5558
5559 /* Do anything needed at the start of the asm file. */
5560
5561 static void
5562 rs6000_file_start (void)
5563 {
5564 char buffer[80];
5565 const char *start = buffer;
5566 FILE *file = asm_out_file;
5567
5568 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5569
5570 default_file_start ();
5571
5572 if (flag_verbose_asm)
5573 {
5574 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5575
5576 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5577 {
5578 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5579 start = "";
5580 }
5581
5582 if (global_options_set.x_rs6000_cpu_index)
5583 {
5584 fprintf (file, "%s -mcpu=%s", start,
5585 processor_target_table[rs6000_cpu_index].name);
5586 start = "";
5587 }
5588
5589 if (global_options_set.x_rs6000_tune_index)
5590 {
5591 fprintf (file, "%s -mtune=%s", start,
5592 processor_target_table[rs6000_tune_index].name);
5593 start = "";
5594 }
5595
5596 if (PPC405_ERRATUM77)
5597 {
5598 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5599 start = "";
5600 }
5601
5602 #ifdef USING_ELFOS_H
5603 switch (rs6000_sdata)
5604 {
5605 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5606 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5607 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5608 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5609 }
5610
5611 if (rs6000_sdata && g_switch_value)
5612 {
5613 fprintf (file, "%s -G %d", start,
5614 g_switch_value);
5615 start = "";
5616 }
5617 #endif
5618
5619 if (*start == '\0')
5620 putc ('\n', file);
5621 }
5622
5623 #ifdef USING_ELFOS_H
5624 rs6000_machine = rs6000_machine_from_flags ();
5625 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
5626 && !global_options_set.x_rs6000_cpu_index)
5627 emit_asm_machine ();
5628 #endif
5629
5630 if (DEFAULT_ABI == ABI_ELFv2)
5631 fprintf (file, "\t.abiversion 2\n");
5632 }
5633
5634 \f
5635 /* Return nonzero if this function is known to have a null epilogue. */
5636
5637 int
5638 direct_return (void)
5639 {
5640 if (reload_completed)
5641 {
5642 rs6000_stack_t *info = rs6000_stack_info ();
5643
5644 if (info->first_gp_reg_save == 32
5645 && info->first_fp_reg_save == 64
5646 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5647 && ! info->lr_save_p
5648 && ! info->cr_save_p
5649 && info->vrsave_size == 0
5650 && ! info->push_p)
5651 return 1;
5652 }
5653
5654 return 0;
5655 }
5656
5657 /* Helper for num_insns_constant. Calculate number of instructions to
5658 load VALUE to a single gpr using combinations of addi, addis, ori,
5659 oris and sldi instructions. */
5660
5661 static int
5662 num_insns_constant_gpr (HOST_WIDE_INT value)
5663 {
5664 /* signed constant loadable with addi */
5665 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5666 return 1;
5667
5668 /* constant loadable with addis */
5669 else if ((value & 0xffff) == 0
5670 && (value >> 31 == -1 || value >> 31 == 0))
5671 return 1;
5672
5673 else if (TARGET_POWERPC64)
5674 {
5675 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5676 HOST_WIDE_INT high = value >> 31;
5677
5678 if (high == 0 || high == -1)
5679 return 2;
5680
5681 high >>= 1;
5682
5683 if (low == 0)
5684 return num_insns_constant_gpr (high) + 1;
5685 else if (high == 0)
5686 return num_insns_constant_gpr (low) + 1;
5687 else
5688 return (num_insns_constant_gpr (high)
5689 + num_insns_constant_gpr (low) + 1);
5690 }
5691
5692 else
5693 return 2;
5694 }
5695
5696 /* Helper for num_insns_constant. Allow constants formed by the
5697 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5698 and handle modes that require multiple gprs. */
5699
5700 static int
5701 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5702 {
5703 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5704 int total = 0;
5705 while (nregs-- > 0)
5706 {
5707 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5708 int insns = num_insns_constant_gpr (low);
5709 if (insns > 2
5710 /* We won't get more than 2 from num_insns_constant_gpr
5711 except when TARGET_POWERPC64 and mode is DImode or
5712 wider, so the register mode must be DImode. */
5713 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5714 insns = 2;
5715 total += insns;
5716 value >>= BITS_PER_WORD;
5717 }
5718 return total;
5719 }
5720
5721 /* Return the number of instructions it takes to form a constant in as
5722 many gprs are needed for MODE. */
5723
5724 int
5725 num_insns_constant (rtx op, machine_mode mode)
5726 {
5727 HOST_WIDE_INT val;
5728
5729 switch (GET_CODE (op))
5730 {
5731 case CONST_INT:
5732 val = INTVAL (op);
5733 break;
5734
5735 case CONST_WIDE_INT:
5736 {
5737 int insns = 0;
5738 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5739 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5740 DImode);
5741 return insns;
5742 }
5743
5744 case CONST_DOUBLE:
5745 {
5746 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5747
5748 if (mode == SFmode || mode == SDmode)
5749 {
5750 long l;
5751
5752 if (mode == SDmode)
5753 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5754 else
5755 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5756 /* See the first define_split in rs6000.md handling a
5757 const_double_operand. */
5758 val = l;
5759 mode = SImode;
5760 }
5761 else if (mode == DFmode || mode == DDmode)
5762 {
5763 long l[2];
5764
5765 if (mode == DDmode)
5766 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5767 else
5768 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5769
5770 /* See the second (32-bit) and third (64-bit) define_split
5771 in rs6000.md handling a const_double_operand. */
5772 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5773 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5774 mode = DImode;
5775 }
5776 else if (mode == TFmode || mode == TDmode
5777 || mode == KFmode || mode == IFmode)
5778 {
5779 long l[4];
5780 int insns;
5781
5782 if (mode == TDmode)
5783 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5784 else
5785 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5786
5787 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5788 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5789 insns = num_insns_constant_multi (val, DImode);
5790 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5791 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5792 insns += num_insns_constant_multi (val, DImode);
5793 return insns;
5794 }
5795 else
5796 gcc_unreachable ();
5797 }
5798 break;
5799
5800 default:
5801 gcc_unreachable ();
5802 }
5803
5804 return num_insns_constant_multi (val, mode);
5805 }
5806
5807 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5808 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5809 corresponding element of the vector, but for V4SFmode, the
5810 corresponding "float" is interpreted as an SImode integer. */
5811
5812 HOST_WIDE_INT
5813 const_vector_elt_as_int (rtx op, unsigned int elt)
5814 {
5815 rtx tmp;
5816
5817 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5818 gcc_assert (GET_MODE (op) != V2DImode
5819 && GET_MODE (op) != V2DFmode);
5820
5821 tmp = CONST_VECTOR_ELT (op, elt);
5822 if (GET_MODE (op) == V4SFmode)
5823 tmp = gen_lowpart (SImode, tmp);
5824 return INTVAL (tmp);
5825 }
5826
5827 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5828 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5829 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5830 all items are set to the same value and contain COPIES replicas of the
5831 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5832 operand and the others are set to the value of the operand's msb. */
5833
5834 static bool
5835 vspltis_constant (rtx op, unsigned step, unsigned copies)
5836 {
5837 machine_mode mode = GET_MODE (op);
5838 machine_mode inner = GET_MODE_INNER (mode);
5839
5840 unsigned i;
5841 unsigned nunits;
5842 unsigned bitsize;
5843 unsigned mask;
5844
5845 HOST_WIDE_INT val;
5846 HOST_WIDE_INT splat_val;
5847 HOST_WIDE_INT msb_val;
5848
5849 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5850 return false;
5851
5852 nunits = GET_MODE_NUNITS (mode);
5853 bitsize = GET_MODE_BITSIZE (inner);
5854 mask = GET_MODE_MASK (inner);
5855
5856 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5857 splat_val = val;
5858 msb_val = val >= 0 ? 0 : -1;
5859
5860 /* Construct the value to be splatted, if possible. If not, return 0. */
5861 for (i = 2; i <= copies; i *= 2)
5862 {
5863 HOST_WIDE_INT small_val;
5864 bitsize /= 2;
5865 small_val = splat_val >> bitsize;
5866 mask >>= bitsize;
5867 if (splat_val != ((HOST_WIDE_INT)
5868 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5869 | (small_val & mask)))
5870 return false;
5871 splat_val = small_val;
5872 }
5873
5874 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5875 if (EASY_VECTOR_15 (splat_val))
5876 ;
5877
5878 /* Also check if we can splat, and then add the result to itself. Do so if
5879 the value is positive, of if the splat instruction is using OP's mode;
5880 for splat_val < 0, the splat and the add should use the same mode. */
5881 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5882 && (splat_val >= 0 || (step == 1 && copies == 1)))
5883 ;
5884
5885 /* Also check if are loading up the most significant bit which can be done by
5886 loading up -1 and shifting the value left by -1. */
5887 else if (EASY_VECTOR_MSB (splat_val, inner))
5888 ;
5889
5890 else
5891 return false;
5892
5893 /* Check if VAL is present in every STEP-th element, and the
5894 other elements are filled with its most significant bit. */
5895 for (i = 1; i < nunits; ++i)
5896 {
5897 HOST_WIDE_INT desired_val;
5898 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5899 if ((i & (step - 1)) == 0)
5900 desired_val = val;
5901 else
5902 desired_val = msb_val;
5903
5904 if (desired_val != const_vector_elt_as_int (op, elt))
5905 return false;
5906 }
5907
5908 return true;
5909 }
5910
5911 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5912 instruction, filling in the bottom elements with 0 or -1.
5913
5914 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5915 for the number of zeroes to shift in, or negative for the number of 0xff
5916 bytes to shift in.
5917
5918 OP is a CONST_VECTOR. */
5919
5920 int
5921 vspltis_shifted (rtx op)
5922 {
5923 machine_mode mode = GET_MODE (op);
5924 machine_mode inner = GET_MODE_INNER (mode);
5925
5926 unsigned i, j;
5927 unsigned nunits;
5928 unsigned mask;
5929
5930 HOST_WIDE_INT val;
5931
5932 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5933 return false;
5934
5935 /* We need to create pseudo registers to do the shift, so don't recognize
5936 shift vector constants after reload. */
5937 if (!can_create_pseudo_p ())
5938 return false;
5939
5940 nunits = GET_MODE_NUNITS (mode);
5941 mask = GET_MODE_MASK (inner);
5942
5943 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5944
5945 /* Check if the value can really be the operand of a vspltis[bhw]. */
5946 if (EASY_VECTOR_15 (val))
5947 ;
5948
5949 /* Also check if we are loading up the most significant bit which can be done
5950 by loading up -1 and shifting the value left by -1. */
5951 else if (EASY_VECTOR_MSB (val, inner))
5952 ;
5953
5954 else
5955 return 0;
5956
5957 /* Check if VAL is present in every STEP-th element until we find elements
5958 that are 0 or all 1 bits. */
5959 for (i = 1; i < nunits; ++i)
5960 {
5961 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5962 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5963
5964 /* If the value isn't the splat value, check for the remaining elements
5965 being 0/-1. */
5966 if (val != elt_val)
5967 {
5968 if (elt_val == 0)
5969 {
5970 for (j = i+1; j < nunits; ++j)
5971 {
5972 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5973 if (const_vector_elt_as_int (op, elt2) != 0)
5974 return 0;
5975 }
5976
5977 return (nunits - i) * GET_MODE_SIZE (inner);
5978 }
5979
5980 else if ((elt_val & mask) == mask)
5981 {
5982 for (j = i+1; j < nunits; ++j)
5983 {
5984 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5985 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5986 return 0;
5987 }
5988
5989 return -((nunits - i) * GET_MODE_SIZE (inner));
5990 }
5991
5992 else
5993 return 0;
5994 }
5995 }
5996
5997 /* If all elements are equal, we don't need to do VLSDOI. */
5998 return 0;
5999 }
6000
6001
6002 /* Return true if OP is of the given MODE and can be synthesized
6003 with a vspltisb, vspltish or vspltisw. */
6004
6005 bool
6006 easy_altivec_constant (rtx op, machine_mode mode)
6007 {
6008 unsigned step, copies;
6009
6010 if (mode == VOIDmode)
6011 mode = GET_MODE (op);
6012 else if (mode != GET_MODE (op))
6013 return false;
6014
6015 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6016 constants. */
6017 if (mode == V2DFmode)
6018 return zero_constant (op, mode);
6019
6020 else if (mode == V2DImode)
6021 {
6022 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6023 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6024 return false;
6025
6026 if (zero_constant (op, mode))
6027 return true;
6028
6029 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6030 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6031 return true;
6032
6033 return false;
6034 }
6035
6036 /* V1TImode is a special container for TImode. Ignore for now. */
6037 else if (mode == V1TImode)
6038 return false;
6039
6040 /* Start with a vspltisw. */
6041 step = GET_MODE_NUNITS (mode) / 4;
6042 copies = 1;
6043
6044 if (vspltis_constant (op, step, copies))
6045 return true;
6046
6047 /* Then try with a vspltish. */
6048 if (step == 1)
6049 copies <<= 1;
6050 else
6051 step >>= 1;
6052
6053 if (vspltis_constant (op, step, copies))
6054 return true;
6055
6056 /* And finally a vspltisb. */
6057 if (step == 1)
6058 copies <<= 1;
6059 else
6060 step >>= 1;
6061
6062 if (vspltis_constant (op, step, copies))
6063 return true;
6064
6065 if (vspltis_shifted (op) != 0)
6066 return true;
6067
6068 return false;
6069 }
6070
6071 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6072 result is OP. Abort if it is not possible. */
6073
6074 rtx
6075 gen_easy_altivec_constant (rtx op)
6076 {
6077 machine_mode mode = GET_MODE (op);
6078 int nunits = GET_MODE_NUNITS (mode);
6079 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6080 unsigned step = nunits / 4;
6081 unsigned copies = 1;
6082
6083 /* Start with a vspltisw. */
6084 if (vspltis_constant (op, step, copies))
6085 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6086
6087 /* Then try with a vspltish. */
6088 if (step == 1)
6089 copies <<= 1;
6090 else
6091 step >>= 1;
6092
6093 if (vspltis_constant (op, step, copies))
6094 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6095
6096 /* And finally a vspltisb. */
6097 if (step == 1)
6098 copies <<= 1;
6099 else
6100 step >>= 1;
6101
6102 if (vspltis_constant (op, step, copies))
6103 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6104
6105 gcc_unreachable ();
6106 }
6107
6108 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6109 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6110
6111 Return the number of instructions needed (1 or 2) into the address pointed
6112 via NUM_INSNS_PTR.
6113
6114 Return the constant that is being split via CONSTANT_PTR. */
6115
6116 bool
6117 xxspltib_constant_p (rtx op,
6118 machine_mode mode,
6119 int *num_insns_ptr,
6120 int *constant_ptr)
6121 {
6122 size_t nunits = GET_MODE_NUNITS (mode);
6123 size_t i;
6124 HOST_WIDE_INT value;
6125 rtx element;
6126
6127 /* Set the returned values to out of bound values. */
6128 *num_insns_ptr = -1;
6129 *constant_ptr = 256;
6130
6131 if (!TARGET_P9_VECTOR)
6132 return false;
6133
6134 if (mode == VOIDmode)
6135 mode = GET_MODE (op);
6136
6137 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6138 return false;
6139
6140 /* Handle (vec_duplicate <constant>). */
6141 if (GET_CODE (op) == VEC_DUPLICATE)
6142 {
6143 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6144 && mode != V2DImode)
6145 return false;
6146
6147 element = XEXP (op, 0);
6148 if (!CONST_INT_P (element))
6149 return false;
6150
6151 value = INTVAL (element);
6152 if (!IN_RANGE (value, -128, 127))
6153 return false;
6154 }
6155
6156 /* Handle (const_vector [...]). */
6157 else if (GET_CODE (op) == CONST_VECTOR)
6158 {
6159 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6160 && mode != V2DImode)
6161 return false;
6162
6163 element = CONST_VECTOR_ELT (op, 0);
6164 if (!CONST_INT_P (element))
6165 return false;
6166
6167 value = INTVAL (element);
6168 if (!IN_RANGE (value, -128, 127))
6169 return false;
6170
6171 for (i = 1; i < nunits; i++)
6172 {
6173 element = CONST_VECTOR_ELT (op, i);
6174 if (!CONST_INT_P (element))
6175 return false;
6176
6177 if (value != INTVAL (element))
6178 return false;
6179 }
6180 }
6181
6182 /* Handle integer constants being loaded into the upper part of the VSX
6183 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6184 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6185 else if (CONST_INT_P (op))
6186 {
6187 if (!SCALAR_INT_MODE_P (mode))
6188 return false;
6189
6190 value = INTVAL (op);
6191 if (!IN_RANGE (value, -128, 127))
6192 return false;
6193
6194 if (!IN_RANGE (value, -1, 0))
6195 {
6196 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6197 return false;
6198
6199 if (EASY_VECTOR_15 (value))
6200 return false;
6201 }
6202 }
6203
6204 else
6205 return false;
6206
6207 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6208 sign extend. Special case 0/-1 to allow getting any VSX register instead
6209 of an Altivec register. */
6210 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6211 && EASY_VECTOR_15 (value))
6212 return false;
6213
6214 /* Return # of instructions and the constant byte for XXSPLTIB. */
6215 if (mode == V16QImode)
6216 *num_insns_ptr = 1;
6217
6218 else if (IN_RANGE (value, -1, 0))
6219 *num_insns_ptr = 1;
6220
6221 else
6222 *num_insns_ptr = 2;
6223
6224 *constant_ptr = (int) value;
6225 return true;
6226 }
6227
6228 const char *
6229 output_vec_const_move (rtx *operands)
6230 {
6231 int shift;
6232 machine_mode mode;
6233 rtx dest, vec;
6234
6235 dest = operands[0];
6236 vec = operands[1];
6237 mode = GET_MODE (dest);
6238
6239 if (TARGET_VSX)
6240 {
6241 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6242 int xxspltib_value = 256;
6243 int num_insns = -1;
6244
6245 if (zero_constant (vec, mode))
6246 {
6247 if (TARGET_P9_VECTOR)
6248 return "xxspltib %x0,0";
6249
6250 else if (dest_vmx_p)
6251 return "vspltisw %0,0";
6252
6253 else
6254 return "xxlxor %x0,%x0,%x0";
6255 }
6256
6257 if (all_ones_constant (vec, mode))
6258 {
6259 if (TARGET_P9_VECTOR)
6260 return "xxspltib %x0,255";
6261
6262 else if (dest_vmx_p)
6263 return "vspltisw %0,-1";
6264
6265 else if (TARGET_P8_VECTOR)
6266 return "xxlorc %x0,%x0,%x0";
6267
6268 else
6269 gcc_unreachable ();
6270 }
6271
6272 if (TARGET_P9_VECTOR
6273 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6274 {
6275 if (num_insns == 1)
6276 {
6277 operands[2] = GEN_INT (xxspltib_value & 0xff);
6278 return "xxspltib %x0,%2";
6279 }
6280
6281 return "#";
6282 }
6283 }
6284
6285 if (TARGET_ALTIVEC)
6286 {
6287 rtx splat_vec;
6288
6289 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6290 if (zero_constant (vec, mode))
6291 return "vspltisw %0,0";
6292
6293 if (all_ones_constant (vec, mode))
6294 return "vspltisw %0,-1";
6295
6296 /* Do we need to construct a value using VSLDOI? */
6297 shift = vspltis_shifted (vec);
6298 if (shift != 0)
6299 return "#";
6300
6301 splat_vec = gen_easy_altivec_constant (vec);
6302 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6303 operands[1] = XEXP (splat_vec, 0);
6304 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6305 return "#";
6306
6307 switch (GET_MODE (splat_vec))
6308 {
6309 case E_V4SImode:
6310 return "vspltisw %0,%1";
6311
6312 case E_V8HImode:
6313 return "vspltish %0,%1";
6314
6315 case E_V16QImode:
6316 return "vspltisb %0,%1";
6317
6318 default:
6319 gcc_unreachable ();
6320 }
6321 }
6322
6323 gcc_unreachable ();
6324 }
6325
6326 /* Initialize vector TARGET to VALS. */
6327
6328 void
6329 rs6000_expand_vector_init (rtx target, rtx vals)
6330 {
6331 machine_mode mode = GET_MODE (target);
6332 machine_mode inner_mode = GET_MODE_INNER (mode);
6333 int n_elts = GET_MODE_NUNITS (mode);
6334 int n_var = 0, one_var = -1;
6335 bool all_same = true, all_const_zero = true;
6336 rtx x, mem;
6337 int i;
6338
6339 for (i = 0; i < n_elts; ++i)
6340 {
6341 x = XVECEXP (vals, 0, i);
6342 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6343 ++n_var, one_var = i;
6344 else if (x != CONST0_RTX (inner_mode))
6345 all_const_zero = false;
6346
6347 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6348 all_same = false;
6349 }
6350
6351 if (n_var == 0)
6352 {
6353 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6354 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6355 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6356 {
6357 /* Zero register. */
6358 emit_move_insn (target, CONST0_RTX (mode));
6359 return;
6360 }
6361 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6362 {
6363 /* Splat immediate. */
6364 emit_insn (gen_rtx_SET (target, const_vec));
6365 return;
6366 }
6367 else
6368 {
6369 /* Load from constant pool. */
6370 emit_move_insn (target, const_vec);
6371 return;
6372 }
6373 }
6374
6375 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6376 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6377 {
6378 rtx op[2];
6379 size_t i;
6380 size_t num_elements = all_same ? 1 : 2;
6381 for (i = 0; i < num_elements; i++)
6382 {
6383 op[i] = XVECEXP (vals, 0, i);
6384 /* Just in case there is a SUBREG with a smaller mode, do a
6385 conversion. */
6386 if (GET_MODE (op[i]) != inner_mode)
6387 {
6388 rtx tmp = gen_reg_rtx (inner_mode);
6389 convert_move (tmp, op[i], 0);
6390 op[i] = tmp;
6391 }
6392 /* Allow load with splat double word. */
6393 else if (MEM_P (op[i]))
6394 {
6395 if (!all_same)
6396 op[i] = force_reg (inner_mode, op[i]);
6397 }
6398 else if (!REG_P (op[i]))
6399 op[i] = force_reg (inner_mode, op[i]);
6400 }
6401
6402 if (all_same)
6403 {
6404 if (mode == V2DFmode)
6405 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6406 else
6407 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6408 }
6409 else
6410 {
6411 if (mode == V2DFmode)
6412 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6413 else
6414 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6415 }
6416 return;
6417 }
6418
6419 /* Special case initializing vector int if we are on 64-bit systems with
6420 direct move or we have the ISA 3.0 instructions. */
6421 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6422 && TARGET_DIRECT_MOVE_64BIT)
6423 {
6424 if (all_same)
6425 {
6426 rtx element0 = XVECEXP (vals, 0, 0);
6427 if (MEM_P (element0))
6428 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6429 else
6430 element0 = force_reg (SImode, element0);
6431
6432 if (TARGET_P9_VECTOR)
6433 emit_insn (gen_vsx_splat_v4si (target, element0));
6434 else
6435 {
6436 rtx tmp = gen_reg_rtx (DImode);
6437 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6438 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6439 }
6440 return;
6441 }
6442 else
6443 {
6444 rtx elements[4];
6445 size_t i;
6446
6447 for (i = 0; i < 4; i++)
6448 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6449
6450 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6451 elements[2], elements[3]));
6452 return;
6453 }
6454 }
6455
6456 /* With single precision floating point on VSX, know that internally single
6457 precision is actually represented as a double, and either make 2 V2DF
6458 vectors, and convert these vectors to single precision, or do one
6459 conversion, and splat the result to the other elements. */
6460 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6461 {
6462 if (all_same)
6463 {
6464 rtx element0 = XVECEXP (vals, 0, 0);
6465
6466 if (TARGET_P9_VECTOR)
6467 {
6468 if (MEM_P (element0))
6469 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6470
6471 emit_insn (gen_vsx_splat_v4sf (target, element0));
6472 }
6473
6474 else
6475 {
6476 rtx freg = gen_reg_rtx (V4SFmode);
6477 rtx sreg = force_reg (SFmode, element0);
6478 rtx cvt = (TARGET_XSCVDPSPN
6479 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6480 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6481
6482 emit_insn (cvt);
6483 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6484 const0_rtx));
6485 }
6486 }
6487 else
6488 {
6489 rtx dbl_even = gen_reg_rtx (V2DFmode);
6490 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6491 rtx flt_even = gen_reg_rtx (V4SFmode);
6492 rtx flt_odd = gen_reg_rtx (V4SFmode);
6493 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6494 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6495 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6496 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6497
6498 /* Use VMRGEW if we can instead of doing a permute. */
6499 if (TARGET_P8_VECTOR)
6500 {
6501 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6502 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6503 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6504 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6505 if (BYTES_BIG_ENDIAN)
6506 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6507 else
6508 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6509 }
6510 else
6511 {
6512 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6513 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6514 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6515 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6516 rs6000_expand_extract_even (target, flt_even, flt_odd);
6517 }
6518 }
6519 return;
6520 }
6521
6522 /* Special case initializing vector short/char that are splats if we are on
6523 64-bit systems with direct move. */
6524 if (all_same && TARGET_DIRECT_MOVE_64BIT
6525 && (mode == V16QImode || mode == V8HImode))
6526 {
6527 rtx op0 = XVECEXP (vals, 0, 0);
6528 rtx di_tmp = gen_reg_rtx (DImode);
6529
6530 if (!REG_P (op0))
6531 op0 = force_reg (GET_MODE_INNER (mode), op0);
6532
6533 if (mode == V16QImode)
6534 {
6535 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6536 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6537 return;
6538 }
6539
6540 if (mode == V8HImode)
6541 {
6542 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6543 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6544 return;
6545 }
6546 }
6547
6548 /* Store value to stack temp. Load vector element. Splat. However, splat
6549 of 64-bit items is not supported on Altivec. */
6550 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6551 {
6552 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6553 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6554 XVECEXP (vals, 0, 0));
6555 x = gen_rtx_UNSPEC (VOIDmode,
6556 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6557 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6558 gen_rtvec (2,
6559 gen_rtx_SET (target, mem),
6560 x)));
6561 x = gen_rtx_VEC_SELECT (inner_mode, target,
6562 gen_rtx_PARALLEL (VOIDmode,
6563 gen_rtvec (1, const0_rtx)));
6564 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6565 return;
6566 }
6567
6568 /* One field is non-constant. Load constant then overwrite
6569 varying field. */
6570 if (n_var == 1)
6571 {
6572 rtx copy = copy_rtx (vals);
6573
6574 /* Load constant part of vector, substitute neighboring value for
6575 varying element. */
6576 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6577 rs6000_expand_vector_init (target, copy);
6578
6579 /* Insert variable. */
6580 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6581 return;
6582 }
6583
6584 /* Construct the vector in memory one field at a time
6585 and load the whole vector. */
6586 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6587 for (i = 0; i < n_elts; i++)
6588 emit_move_insn (adjust_address_nv (mem, inner_mode,
6589 i * GET_MODE_SIZE (inner_mode)),
6590 XVECEXP (vals, 0, i));
6591 emit_move_insn (target, mem);
6592 }
6593
6594 /* Set field ELT of TARGET to VAL. */
6595
6596 void
6597 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6598 {
6599 machine_mode mode = GET_MODE (target);
6600 machine_mode inner_mode = GET_MODE_INNER (mode);
6601 rtx reg = gen_reg_rtx (mode);
6602 rtx mask, mem, x;
6603 int width = GET_MODE_SIZE (inner_mode);
6604 int i;
6605
6606 val = force_reg (GET_MODE (val), val);
6607
6608 if (VECTOR_MEM_VSX_P (mode))
6609 {
6610 rtx insn = NULL_RTX;
6611 rtx elt_rtx = GEN_INT (elt);
6612
6613 if (mode == V2DFmode)
6614 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6615
6616 else if (mode == V2DImode)
6617 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6618
6619 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6620 {
6621 if (mode == V4SImode)
6622 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6623 else if (mode == V8HImode)
6624 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6625 else if (mode == V16QImode)
6626 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6627 else if (mode == V4SFmode)
6628 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6629 }
6630
6631 if (insn)
6632 {
6633 emit_insn (insn);
6634 return;
6635 }
6636 }
6637
6638 /* Simplify setting single element vectors like V1TImode. */
6639 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6640 {
6641 emit_move_insn (target, gen_lowpart (mode, val));
6642 return;
6643 }
6644
6645 /* Load single variable value. */
6646 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6647 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6648 x = gen_rtx_UNSPEC (VOIDmode,
6649 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6650 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6651 gen_rtvec (2,
6652 gen_rtx_SET (reg, mem),
6653 x)));
6654
6655 /* Linear sequence. */
6656 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6657 for (i = 0; i < 16; ++i)
6658 XVECEXP (mask, 0, i) = GEN_INT (i);
6659
6660 /* Set permute mask to insert element into target. */
6661 for (i = 0; i < width; ++i)
6662 XVECEXP (mask, 0, elt*width + i)
6663 = GEN_INT (i + 0x10);
6664 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6665
6666 if (BYTES_BIG_ENDIAN)
6667 x = gen_rtx_UNSPEC (mode,
6668 gen_rtvec (3, target, reg,
6669 force_reg (V16QImode, x)),
6670 UNSPEC_VPERM);
6671 else
6672 {
6673 if (TARGET_P9_VECTOR)
6674 x = gen_rtx_UNSPEC (mode,
6675 gen_rtvec (3, reg, target,
6676 force_reg (V16QImode, x)),
6677 UNSPEC_VPERMR);
6678 else
6679 {
6680 /* Invert selector. We prefer to generate VNAND on P8 so
6681 that future fusion opportunities can kick in, but must
6682 generate VNOR elsewhere. */
6683 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6684 rtx iorx = (TARGET_P8_VECTOR
6685 ? gen_rtx_IOR (V16QImode, notx, notx)
6686 : gen_rtx_AND (V16QImode, notx, notx));
6687 rtx tmp = gen_reg_rtx (V16QImode);
6688 emit_insn (gen_rtx_SET (tmp, iorx));
6689
6690 /* Permute with operands reversed and adjusted selector. */
6691 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6692 UNSPEC_VPERM);
6693 }
6694 }
6695
6696 emit_insn (gen_rtx_SET (target, x));
6697 }
6698
6699 /* Extract field ELT from VEC into TARGET. */
6700
6701 void
6702 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6703 {
6704 machine_mode mode = GET_MODE (vec);
6705 machine_mode inner_mode = GET_MODE_INNER (mode);
6706 rtx mem;
6707
6708 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6709 {
6710 switch (mode)
6711 {
6712 default:
6713 break;
6714 case E_V1TImode:
6715 emit_move_insn (target, gen_lowpart (TImode, vec));
6716 break;
6717 case E_V2DFmode:
6718 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6719 return;
6720 case E_V2DImode:
6721 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6722 return;
6723 case E_V4SFmode:
6724 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6725 return;
6726 case E_V16QImode:
6727 if (TARGET_DIRECT_MOVE_64BIT)
6728 {
6729 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6730 return;
6731 }
6732 else
6733 break;
6734 case E_V8HImode:
6735 if (TARGET_DIRECT_MOVE_64BIT)
6736 {
6737 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6738 return;
6739 }
6740 else
6741 break;
6742 case E_V4SImode:
6743 if (TARGET_DIRECT_MOVE_64BIT)
6744 {
6745 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6746 return;
6747 }
6748 break;
6749 }
6750 }
6751 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6752 && TARGET_DIRECT_MOVE_64BIT)
6753 {
6754 if (GET_MODE (elt) != DImode)
6755 {
6756 rtx tmp = gen_reg_rtx (DImode);
6757 convert_move (tmp, elt, 0);
6758 elt = tmp;
6759 }
6760 else if (!REG_P (elt))
6761 elt = force_reg (DImode, elt);
6762
6763 switch (mode)
6764 {
6765 case E_V1TImode:
6766 emit_move_insn (target, gen_lowpart (TImode, vec));
6767 return;
6768
6769 case E_V2DFmode:
6770 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6771 return;
6772
6773 case E_V2DImode:
6774 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6775 return;
6776
6777 case E_V4SFmode:
6778 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6779 return;
6780
6781 case E_V4SImode:
6782 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6783 return;
6784
6785 case E_V8HImode:
6786 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6787 return;
6788
6789 case E_V16QImode:
6790 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6791 return;
6792
6793 default:
6794 gcc_unreachable ();
6795 }
6796 }
6797
6798 /* Allocate mode-sized buffer. */
6799 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6800
6801 emit_move_insn (mem, vec);
6802 if (CONST_INT_P (elt))
6803 {
6804 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6805
6806 /* Add offset to field within buffer matching vector element. */
6807 mem = adjust_address_nv (mem, inner_mode,
6808 modulo_elt * GET_MODE_SIZE (inner_mode));
6809 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6810 }
6811 else
6812 {
6813 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6814 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6815 rtx new_addr = gen_reg_rtx (Pmode);
6816
6817 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6818 if (ele_size > 1)
6819 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6820 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6821 new_addr = change_address (mem, inner_mode, new_addr);
6822 emit_move_insn (target, new_addr);
6823 }
6824 }
6825
6826 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6827 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6828 temporary (BASE_TMP) to fixup the address. Return the new memory address
6829 that is valid for reads or writes to a given register (SCALAR_REG). */
6830
6831 rtx
6832 rs6000_adjust_vec_address (rtx scalar_reg,
6833 rtx mem,
6834 rtx element,
6835 rtx base_tmp,
6836 machine_mode scalar_mode)
6837 {
6838 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6839 rtx addr = XEXP (mem, 0);
6840 rtx element_offset;
6841 rtx new_addr;
6842 bool valid_addr_p;
6843
6844 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6845 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6846
6847 /* Calculate what we need to add to the address to get the element
6848 address. */
6849 if (CONST_INT_P (element))
6850 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6851 else
6852 {
6853 int byte_shift = exact_log2 (scalar_size);
6854 gcc_assert (byte_shift >= 0);
6855
6856 if (byte_shift == 0)
6857 element_offset = element;
6858
6859 else
6860 {
6861 if (TARGET_POWERPC64)
6862 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6863 else
6864 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6865
6866 element_offset = base_tmp;
6867 }
6868 }
6869
6870 /* Create the new address pointing to the element within the vector. If we
6871 are adding 0, we don't have to change the address. */
6872 if (element_offset == const0_rtx)
6873 new_addr = addr;
6874
6875 /* A simple indirect address can be converted into a reg + offset
6876 address. */
6877 else if (REG_P (addr) || SUBREG_P (addr))
6878 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6879
6880 /* Optimize D-FORM addresses with constant offset with a constant element, to
6881 include the element offset in the address directly. */
6882 else if (GET_CODE (addr) == PLUS)
6883 {
6884 rtx op0 = XEXP (addr, 0);
6885 rtx op1 = XEXP (addr, 1);
6886 rtx insn;
6887
6888 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6889 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6890 {
6891 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6892 rtx offset_rtx = GEN_INT (offset);
6893
6894 if (IN_RANGE (offset, -32768, 32767)
6895 && (scalar_size < 8 || (offset & 0x3) == 0))
6896 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6897 else
6898 {
6899 emit_move_insn (base_tmp, offset_rtx);
6900 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6901 }
6902 }
6903 else
6904 {
6905 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6906 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6907
6908 /* Note, ADDI requires the register being added to be a base
6909 register. If the register was R0, load it up into the temporary
6910 and do the add. */
6911 if (op1_reg_p
6912 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6913 {
6914 insn = gen_add3_insn (base_tmp, op1, element_offset);
6915 gcc_assert (insn != NULL_RTX);
6916 emit_insn (insn);
6917 }
6918
6919 else if (ele_reg_p
6920 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6921 {
6922 insn = gen_add3_insn (base_tmp, element_offset, op1);
6923 gcc_assert (insn != NULL_RTX);
6924 emit_insn (insn);
6925 }
6926
6927 else
6928 {
6929 emit_move_insn (base_tmp, op1);
6930 emit_insn (gen_add2_insn (base_tmp, element_offset));
6931 }
6932
6933 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6934 }
6935 }
6936
6937 else
6938 {
6939 emit_move_insn (base_tmp, addr);
6940 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6941 }
6942
6943 /* If we have a PLUS, we need to see whether the particular register class
6944 allows for D-FORM or X-FORM addressing. */
6945 if (GET_CODE (new_addr) == PLUS)
6946 {
6947 rtx op1 = XEXP (new_addr, 1);
6948 addr_mask_type addr_mask;
6949 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6950
6951 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6952 if (INT_REGNO_P (scalar_regno))
6953 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6954
6955 else if (FP_REGNO_P (scalar_regno))
6956 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6957
6958 else if (ALTIVEC_REGNO_P (scalar_regno))
6959 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6960
6961 else
6962 gcc_unreachable ();
6963
6964 if (REG_P (op1) || SUBREG_P (op1))
6965 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6966 else
6967 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6968 }
6969
6970 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6971 valid_addr_p = true;
6972
6973 else
6974 valid_addr_p = false;
6975
6976 if (!valid_addr_p)
6977 {
6978 emit_move_insn (base_tmp, new_addr);
6979 new_addr = base_tmp;
6980 }
6981
6982 return change_address (mem, scalar_mode, new_addr);
6983 }
6984
6985 /* Split a variable vec_extract operation into the component instructions. */
6986
6987 void
6988 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6989 rtx tmp_altivec)
6990 {
6991 machine_mode mode = GET_MODE (src);
6992 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6993 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6994 int byte_shift = exact_log2 (scalar_size);
6995
6996 gcc_assert (byte_shift >= 0);
6997
6998 /* If we are given a memory address, optimize to load just the element. We
6999 don't have to adjust the vector element number on little endian
7000 systems. */
7001 if (MEM_P (src))
7002 {
7003 int num_elements = GET_MODE_NUNITS (mode);
7004 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7005
7006 emit_insn (gen_anddi3 (element, element, num_ele_m1));
7007 gcc_assert (REG_P (tmp_gpr));
7008 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7009 tmp_gpr, scalar_mode));
7010 return;
7011 }
7012
7013 else if (REG_P (src) || SUBREG_P (src))
7014 {
7015 int num_elements = GET_MODE_NUNITS (mode);
7016 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7017 int bit_shift = 7 - exact_log2 (num_elements);
7018 rtx element2;
7019 unsigned int dest_regno = reg_or_subregno (dest);
7020 unsigned int src_regno = reg_or_subregno (src);
7021 unsigned int element_regno = reg_or_subregno (element);
7022
7023 gcc_assert (REG_P (tmp_gpr));
7024
7025 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7026 a general purpose register. */
7027 if (TARGET_P9_VECTOR
7028 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7029 && INT_REGNO_P (dest_regno)
7030 && ALTIVEC_REGNO_P (src_regno)
7031 && INT_REGNO_P (element_regno))
7032 {
7033 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7034 rtx element_si = gen_rtx_REG (SImode, element_regno);
7035
7036 if (mode == V16QImode)
7037 emit_insn (BYTES_BIG_ENDIAN
7038 ? gen_vextublx (dest_si, element_si, src)
7039 : gen_vextubrx (dest_si, element_si, src));
7040
7041 else if (mode == V8HImode)
7042 {
7043 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7044 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7045 emit_insn (BYTES_BIG_ENDIAN
7046 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7047 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7048 }
7049
7050
7051 else
7052 {
7053 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7054 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7055 emit_insn (BYTES_BIG_ENDIAN
7056 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7057 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7058 }
7059
7060 return;
7061 }
7062
7063
7064 gcc_assert (REG_P (tmp_altivec));
7065
7066 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7067 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7068 will shift the element into the upper position (adding 3 to convert a
7069 byte shift into a bit shift). */
7070 if (scalar_size == 8)
7071 {
7072 if (!BYTES_BIG_ENDIAN)
7073 {
7074 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7075 element2 = tmp_gpr;
7076 }
7077 else
7078 element2 = element;
7079
7080 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7081 bit. */
7082 emit_insn (gen_rtx_SET (tmp_gpr,
7083 gen_rtx_AND (DImode,
7084 gen_rtx_ASHIFT (DImode,
7085 element2,
7086 GEN_INT (6)),
7087 GEN_INT (64))));
7088 }
7089 else
7090 {
7091 if (!BYTES_BIG_ENDIAN)
7092 {
7093 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7094
7095 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7096 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7097 element2 = tmp_gpr;
7098 }
7099 else
7100 element2 = element;
7101
7102 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7103 }
7104
7105 /* Get the value into the lower byte of the Altivec register where VSLO
7106 expects it. */
7107 if (TARGET_P9_VECTOR)
7108 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7109 else if (can_create_pseudo_p ())
7110 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7111 else
7112 {
7113 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7114 emit_move_insn (tmp_di, tmp_gpr);
7115 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7116 }
7117
7118 /* Do the VSLO to get the value into the final location. */
7119 switch (mode)
7120 {
7121 case E_V2DFmode:
7122 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7123 return;
7124
7125 case E_V2DImode:
7126 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7127 return;
7128
7129 case E_V4SFmode:
7130 {
7131 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7132 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7133 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7134 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7135 tmp_altivec));
7136
7137 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7138 return;
7139 }
7140
7141 case E_V4SImode:
7142 case E_V8HImode:
7143 case E_V16QImode:
7144 {
7145 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7146 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7147 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7148 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7149 tmp_altivec));
7150 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7151 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7152 GEN_INT (64 - bits_in_element)));
7153 return;
7154 }
7155
7156 default:
7157 gcc_unreachable ();
7158 }
7159
7160 return;
7161 }
7162 else
7163 gcc_unreachable ();
7164 }
7165
7166 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7167 selects whether the alignment is abi mandated, optional, or
7168 both abi and optional alignment. */
7169
7170 unsigned int
7171 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7172 {
7173 if (how != align_opt)
7174 {
7175 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7176 align = 128;
7177 }
7178
7179 if (how != align_abi)
7180 {
7181 if (TREE_CODE (type) == ARRAY_TYPE
7182 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7183 {
7184 if (align < BITS_PER_WORD)
7185 align = BITS_PER_WORD;
7186 }
7187 }
7188
7189 return align;
7190 }
7191
7192 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7193 instructions simply ignore the low bits; VSX memory instructions
7194 are aligned to 4 or 8 bytes. */
7195
7196 static bool
7197 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7198 {
7199 return (STRICT_ALIGNMENT
7200 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7201 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7202 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7203 && (int) align < VECTOR_ALIGN (mode)))));
7204 }
7205
7206 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7207
7208 bool
7209 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7210 {
7211 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7212 {
7213 if (computed != 128)
7214 {
7215 static bool warned;
7216 if (!warned && warn_psabi)
7217 {
7218 warned = true;
7219 inform (input_location,
7220 "the layout of aggregates containing vectors with"
7221 " %d-byte alignment has changed in GCC 5",
7222 computed / BITS_PER_UNIT);
7223 }
7224 }
7225 /* In current GCC there is no special case. */
7226 return false;
7227 }
7228
7229 return false;
7230 }
7231
7232 /* AIX increases natural record alignment to doubleword if the first
7233 field is an FP double while the FP fields remain word aligned. */
7234
7235 unsigned int
7236 rs6000_special_round_type_align (tree type, unsigned int computed,
7237 unsigned int specified)
7238 {
7239 unsigned int align = MAX (computed, specified);
7240 tree field = TYPE_FIELDS (type);
7241
7242 /* Skip all non field decls */
7243 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7244 field = DECL_CHAIN (field);
7245
7246 if (field != NULL && field != type)
7247 {
7248 type = TREE_TYPE (field);
7249 while (TREE_CODE (type) == ARRAY_TYPE)
7250 type = TREE_TYPE (type);
7251
7252 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7253 align = MAX (align, 64);
7254 }
7255
7256 return align;
7257 }
7258
7259 /* Darwin increases record alignment to the natural alignment of
7260 the first field. */
7261
7262 unsigned int
7263 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7264 unsigned int specified)
7265 {
7266 unsigned int align = MAX (computed, specified);
7267
7268 if (TYPE_PACKED (type))
7269 return align;
7270
7271 /* Find the first field, looking down into aggregates. */
7272 do {
7273 tree field = TYPE_FIELDS (type);
7274 /* Skip all non field decls */
7275 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7276 field = DECL_CHAIN (field);
7277 if (! field)
7278 break;
7279 /* A packed field does not contribute any extra alignment. */
7280 if (DECL_PACKED (field))
7281 return align;
7282 type = TREE_TYPE (field);
7283 while (TREE_CODE (type) == ARRAY_TYPE)
7284 type = TREE_TYPE (type);
7285 } while (AGGREGATE_TYPE_P (type));
7286
7287 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7288 align = MAX (align, TYPE_ALIGN (type));
7289
7290 return align;
7291 }
7292
7293 /* Return 1 for an operand in small memory on V.4/eabi. */
7294
7295 int
7296 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7297 machine_mode mode ATTRIBUTE_UNUSED)
7298 {
7299 #if TARGET_ELF
7300 rtx sym_ref;
7301
7302 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7303 return 0;
7304
7305 if (DEFAULT_ABI != ABI_V4)
7306 return 0;
7307
7308 if (SYMBOL_REF_P (op))
7309 sym_ref = op;
7310
7311 else if (GET_CODE (op) != CONST
7312 || GET_CODE (XEXP (op, 0)) != PLUS
7313 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7314 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7315 return 0;
7316
7317 else
7318 {
7319 rtx sum = XEXP (op, 0);
7320 HOST_WIDE_INT summand;
7321
7322 /* We have to be careful here, because it is the referenced address
7323 that must be 32k from _SDA_BASE_, not just the symbol. */
7324 summand = INTVAL (XEXP (sum, 1));
7325 if (summand < 0 || summand > g_switch_value)
7326 return 0;
7327
7328 sym_ref = XEXP (sum, 0);
7329 }
7330
7331 return SYMBOL_REF_SMALL_P (sym_ref);
7332 #else
7333 return 0;
7334 #endif
7335 }
7336
7337 /* Return true if either operand is a general purpose register. */
7338
7339 bool
7340 gpr_or_gpr_p (rtx op0, rtx op1)
7341 {
7342 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7343 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7344 }
7345
7346 /* Return true if this is a move direct operation between GPR registers and
7347 floating point/VSX registers. */
7348
7349 bool
7350 direct_move_p (rtx op0, rtx op1)
7351 {
7352 if (!REG_P (op0) || !REG_P (op1))
7353 return false;
7354
7355 if (!TARGET_DIRECT_MOVE)
7356 return false;
7357
7358 int regno0 = REGNO (op0);
7359 int regno1 = REGNO (op1);
7360 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7361 return false;
7362
7363 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7364 return true;
7365
7366 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7367 return true;
7368
7369 return false;
7370 }
7371
7372 /* Return true if the ADDR is an acceptable address for a quad memory
7373 operation of mode MODE (either LQ/STQ for general purpose registers, or
7374 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7375 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7376 3.0 LXV/STXV instruction. */
7377
7378 bool
7379 quad_address_p (rtx addr, machine_mode mode, bool strict)
7380 {
7381 rtx op0, op1;
7382
7383 if (GET_MODE_SIZE (mode) != 16)
7384 return false;
7385
7386 if (legitimate_indirect_address_p (addr, strict))
7387 return true;
7388
7389 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7390 return false;
7391
7392 if (GET_CODE (addr) != PLUS)
7393 return false;
7394
7395 op0 = XEXP (addr, 0);
7396 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7397 return false;
7398
7399 op1 = XEXP (addr, 1);
7400 if (!CONST_INT_P (op1))
7401 return false;
7402
7403 return quad_address_offset_p (INTVAL (op1));
7404 }
7405
7406 /* Return true if this is a load or store quad operation. This function does
7407 not handle the atomic quad memory instructions. */
7408
7409 bool
7410 quad_load_store_p (rtx op0, rtx op1)
7411 {
7412 bool ret;
7413
7414 if (!TARGET_QUAD_MEMORY)
7415 ret = false;
7416
7417 else if (REG_P (op0) && MEM_P (op1))
7418 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7419 && quad_memory_operand (op1, GET_MODE (op1))
7420 && !reg_overlap_mentioned_p (op0, op1));
7421
7422 else if (MEM_P (op0) && REG_P (op1))
7423 ret = (quad_memory_operand (op0, GET_MODE (op0))
7424 && quad_int_reg_operand (op1, GET_MODE (op1)));
7425
7426 else
7427 ret = false;
7428
7429 if (TARGET_DEBUG_ADDR)
7430 {
7431 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7432 ret ? "true" : "false");
7433 debug_rtx (gen_rtx_SET (op0, op1));
7434 }
7435
7436 return ret;
7437 }
7438
7439 /* Given an address, return a constant offset term if one exists. */
7440
7441 static rtx
7442 address_offset (rtx op)
7443 {
7444 if (GET_CODE (op) == PRE_INC
7445 || GET_CODE (op) == PRE_DEC)
7446 op = XEXP (op, 0);
7447 else if (GET_CODE (op) == PRE_MODIFY
7448 || GET_CODE (op) == LO_SUM)
7449 op = XEXP (op, 1);
7450
7451 if (GET_CODE (op) == CONST)
7452 op = XEXP (op, 0);
7453
7454 if (GET_CODE (op) == PLUS)
7455 op = XEXP (op, 1);
7456
7457 if (CONST_INT_P (op))
7458 return op;
7459
7460 return NULL_RTX;
7461 }
7462
7463 /* Return true if the MEM operand is a memory operand suitable for use
7464 with a (full width, possibly multiple) gpr load/store. On
7465 powerpc64 this means the offset must be divisible by 4.
7466 Implements 'Y' constraint.
7467
7468 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7469 a constraint function we know the operand has satisfied a suitable
7470 memory predicate.
7471
7472 Offsetting a lo_sum should not be allowed, except where we know by
7473 alignment that a 32k boundary is not crossed. Note that by
7474 "offsetting" here we mean a further offset to access parts of the
7475 MEM. It's fine to have a lo_sum where the inner address is offset
7476 from a sym, since the same sym+offset will appear in the high part
7477 of the address calculation. */
7478
7479 bool
7480 mem_operand_gpr (rtx op, machine_mode mode)
7481 {
7482 unsigned HOST_WIDE_INT offset;
7483 int extra;
7484 rtx addr = XEXP (op, 0);
7485
7486 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7487 if (TARGET_UPDATE
7488 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7489 && mode_supports_pre_incdec_p (mode)
7490 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7491 return true;
7492
7493 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7494 if (!rs6000_offsettable_memref_p (op, mode, false))
7495 return false;
7496
7497 op = address_offset (addr);
7498 if (op == NULL_RTX)
7499 return true;
7500
7501 offset = INTVAL (op);
7502 if (TARGET_POWERPC64 && (offset & 3) != 0)
7503 return false;
7504
7505 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7506 if (extra < 0)
7507 extra = 0;
7508
7509 if (GET_CODE (addr) == LO_SUM)
7510 /* For lo_sum addresses, we must allow any offset except one that
7511 causes a wrap, so test only the low 16 bits. */
7512 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7513
7514 return offset + 0x8000 < 0x10000u - extra;
7515 }
7516
7517 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7518 enforce an offset divisible by 4 even for 32-bit. */
7519
7520 bool
7521 mem_operand_ds_form (rtx op, machine_mode mode)
7522 {
7523 unsigned HOST_WIDE_INT offset;
7524 int extra;
7525 rtx addr = XEXP (op, 0);
7526
7527 if (!offsettable_address_p (false, mode, addr))
7528 return false;
7529
7530 op = address_offset (addr);
7531 if (op == NULL_RTX)
7532 return true;
7533
7534 offset = INTVAL (op);
7535 if ((offset & 3) != 0)
7536 return false;
7537
7538 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7539 if (extra < 0)
7540 extra = 0;
7541
7542 if (GET_CODE (addr) == LO_SUM)
7543 /* For lo_sum addresses, we must allow any offset except one that
7544 causes a wrap, so test only the low 16 bits. */
7545 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7546
7547 return offset + 0x8000 < 0x10000u - extra;
7548 }
7549 \f
7550 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7551
7552 static bool
7553 reg_offset_addressing_ok_p (machine_mode mode)
7554 {
7555 switch (mode)
7556 {
7557 case E_V16QImode:
7558 case E_V8HImode:
7559 case E_V4SFmode:
7560 case E_V4SImode:
7561 case E_V2DFmode:
7562 case E_V2DImode:
7563 case E_V1TImode:
7564 case E_TImode:
7565 case E_TFmode:
7566 case E_KFmode:
7567 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7568 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7569 a vector mode, if we want to use the VSX registers to move it around,
7570 we need to restrict ourselves to reg+reg addressing. Similarly for
7571 IEEE 128-bit floating point that is passed in a single vector
7572 register. */
7573 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7574 return mode_supports_dq_form (mode);
7575 break;
7576
7577 case E_SDmode:
7578 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7579 addressing for the LFIWZX and STFIWX instructions. */
7580 if (TARGET_NO_SDMODE_STACK)
7581 return false;
7582 break;
7583
7584 default:
7585 break;
7586 }
7587
7588 return true;
7589 }
7590
7591 static bool
7592 virtual_stack_registers_memory_p (rtx op)
7593 {
7594 int regnum;
7595
7596 if (REG_P (op))
7597 regnum = REGNO (op);
7598
7599 else if (GET_CODE (op) == PLUS
7600 && REG_P (XEXP (op, 0))
7601 && CONST_INT_P (XEXP (op, 1)))
7602 regnum = REGNO (XEXP (op, 0));
7603
7604 else
7605 return false;
7606
7607 return (regnum >= FIRST_VIRTUAL_REGISTER
7608 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7609 }
7610
7611 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7612 is known to not straddle a 32k boundary. This function is used
7613 to determine whether -mcmodel=medium code can use TOC pointer
7614 relative addressing for OP. This means the alignment of the TOC
7615 pointer must also be taken into account, and unfortunately that is
7616 only 8 bytes. */
7617
7618 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7619 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7620 #endif
7621
7622 static bool
7623 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7624 machine_mode mode)
7625 {
7626 tree decl;
7627 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7628
7629 if (!SYMBOL_REF_P (op))
7630 return false;
7631
7632 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7633 SYMBOL_REF. */
7634 if (mode_supports_dq_form (mode))
7635 return false;
7636
7637 dsize = GET_MODE_SIZE (mode);
7638 decl = SYMBOL_REF_DECL (op);
7639 if (!decl)
7640 {
7641 if (dsize == 0)
7642 return false;
7643
7644 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7645 replacing memory addresses with an anchor plus offset. We
7646 could find the decl by rummaging around in the block->objects
7647 VEC for the given offset but that seems like too much work. */
7648 dalign = BITS_PER_UNIT;
7649 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7650 && SYMBOL_REF_ANCHOR_P (op)
7651 && SYMBOL_REF_BLOCK (op) != NULL)
7652 {
7653 struct object_block *block = SYMBOL_REF_BLOCK (op);
7654
7655 dalign = block->alignment;
7656 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7657 }
7658 else if (CONSTANT_POOL_ADDRESS_P (op))
7659 {
7660 /* It would be nice to have get_pool_align().. */
7661 machine_mode cmode = get_pool_mode (op);
7662
7663 dalign = GET_MODE_ALIGNMENT (cmode);
7664 }
7665 }
7666 else if (DECL_P (decl))
7667 {
7668 dalign = DECL_ALIGN (decl);
7669
7670 if (dsize == 0)
7671 {
7672 /* Allow BLKmode when the entire object is known to not
7673 cross a 32k boundary. */
7674 if (!DECL_SIZE_UNIT (decl))
7675 return false;
7676
7677 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7678 return false;
7679
7680 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7681 if (dsize > 32768)
7682 return false;
7683
7684 dalign /= BITS_PER_UNIT;
7685 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7686 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7687 return dalign >= dsize;
7688 }
7689 }
7690 else
7691 gcc_unreachable ();
7692
7693 /* Find how many bits of the alignment we know for this access. */
7694 dalign /= BITS_PER_UNIT;
7695 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7696 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7697 mask = dalign - 1;
7698 lsb = offset & -offset;
7699 mask &= lsb - 1;
7700 dalign = mask + 1;
7701
7702 return dalign >= dsize;
7703 }
7704
7705 static bool
7706 constant_pool_expr_p (rtx op)
7707 {
7708 rtx base, offset;
7709
7710 split_const (op, &base, &offset);
7711 return (SYMBOL_REF_P (base)
7712 && CONSTANT_POOL_ADDRESS_P (base)
7713 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7714 }
7715
7716 /* These are only used to pass through from print_operand/print_operand_address
7717 to rs6000_output_addr_const_extra over the intervening function
7718 output_addr_const which is not target code. */
7719 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7720
7721 /* Return true if OP is a toc pointer relative address (the output
7722 of create_TOC_reference). If STRICT, do not match non-split
7723 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7724 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7725 TOCREL_OFFSET_RET respectively. */
7726
7727 bool
7728 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7729 const_rtx *tocrel_offset_ret)
7730 {
7731 if (!TARGET_TOC)
7732 return false;
7733
7734 if (TARGET_CMODEL != CMODEL_SMALL)
7735 {
7736 /* When strict ensure we have everything tidy. */
7737 if (strict
7738 && !(GET_CODE (op) == LO_SUM
7739 && REG_P (XEXP (op, 0))
7740 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7741 return false;
7742
7743 /* When not strict, allow non-split TOC addresses and also allow
7744 (lo_sum (high ..)) TOC addresses created during reload. */
7745 if (GET_CODE (op) == LO_SUM)
7746 op = XEXP (op, 1);
7747 }
7748
7749 const_rtx tocrel_base = op;
7750 const_rtx tocrel_offset = const0_rtx;
7751
7752 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7753 {
7754 tocrel_base = XEXP (op, 0);
7755 tocrel_offset = XEXP (op, 1);
7756 }
7757
7758 if (tocrel_base_ret)
7759 *tocrel_base_ret = tocrel_base;
7760 if (tocrel_offset_ret)
7761 *tocrel_offset_ret = tocrel_offset;
7762
7763 return (GET_CODE (tocrel_base) == UNSPEC
7764 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7765 && REG_P (XVECEXP (tocrel_base, 0, 1))
7766 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7767 }
7768
7769 /* Return true if X is a constant pool address, and also for cmodel=medium
7770 if X is a toc-relative address known to be offsettable within MODE. */
7771
7772 bool
7773 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7774 bool strict)
7775 {
7776 const_rtx tocrel_base, tocrel_offset;
7777 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7778 && (TARGET_CMODEL != CMODEL_MEDIUM
7779 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7780 || mode == QImode
7781 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7782 INTVAL (tocrel_offset), mode)));
7783 }
7784
7785 static bool
7786 legitimate_small_data_p (machine_mode mode, rtx x)
7787 {
7788 return (DEFAULT_ABI == ABI_V4
7789 && !flag_pic && !TARGET_TOC
7790 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7791 && small_data_operand (x, mode));
7792 }
7793
7794 bool
7795 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7796 bool strict, bool worst_case)
7797 {
7798 unsigned HOST_WIDE_INT offset;
7799 unsigned int extra;
7800
7801 if (GET_CODE (x) != PLUS)
7802 return false;
7803 if (!REG_P (XEXP (x, 0)))
7804 return false;
7805 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7806 return false;
7807 if (mode_supports_dq_form (mode))
7808 return quad_address_p (x, mode, strict);
7809 if (!reg_offset_addressing_ok_p (mode))
7810 return virtual_stack_registers_memory_p (x);
7811 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7812 return true;
7813 if (!CONST_INT_P (XEXP (x, 1)))
7814 return false;
7815
7816 offset = INTVAL (XEXP (x, 1));
7817 extra = 0;
7818 switch (mode)
7819 {
7820 case E_DFmode:
7821 case E_DDmode:
7822 case E_DImode:
7823 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7824 addressing. */
7825 if (VECTOR_MEM_VSX_P (mode))
7826 return false;
7827
7828 if (!worst_case)
7829 break;
7830 if (!TARGET_POWERPC64)
7831 extra = 4;
7832 else if (offset & 3)
7833 return false;
7834 break;
7835
7836 case E_TFmode:
7837 case E_IFmode:
7838 case E_KFmode:
7839 case E_TDmode:
7840 case E_TImode:
7841 case E_PTImode:
7842 extra = 8;
7843 if (!worst_case)
7844 break;
7845 if (!TARGET_POWERPC64)
7846 extra = 12;
7847 else if (offset & 3)
7848 return false;
7849 break;
7850
7851 default:
7852 break;
7853 }
7854
7855 offset += 0x8000;
7856 return offset < 0x10000 - extra;
7857 }
7858
7859 bool
7860 legitimate_indexed_address_p (rtx x, int strict)
7861 {
7862 rtx op0, op1;
7863
7864 if (GET_CODE (x) != PLUS)
7865 return false;
7866
7867 op0 = XEXP (x, 0);
7868 op1 = XEXP (x, 1);
7869
7870 return (REG_P (op0) && REG_P (op1)
7871 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7872 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7873 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7874 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7875 }
7876
7877 bool
7878 avoiding_indexed_address_p (machine_mode mode)
7879 {
7880 /* Avoid indexed addressing for modes that have non-indexed
7881 load/store instruction forms. */
7882 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7883 }
7884
7885 bool
7886 legitimate_indirect_address_p (rtx x, int strict)
7887 {
7888 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7889 }
7890
7891 bool
7892 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7893 {
7894 if (!TARGET_MACHO || !flag_pic
7895 || mode != SImode || !MEM_P (x))
7896 return false;
7897 x = XEXP (x, 0);
7898
7899 if (GET_CODE (x) != LO_SUM)
7900 return false;
7901 if (!REG_P (XEXP (x, 0)))
7902 return false;
7903 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7904 return false;
7905 x = XEXP (x, 1);
7906
7907 return CONSTANT_P (x);
7908 }
7909
7910 static bool
7911 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7912 {
7913 if (GET_CODE (x) != LO_SUM)
7914 return false;
7915 if (!REG_P (XEXP (x, 0)))
7916 return false;
7917 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7918 return false;
7919 /* quad word addresses are restricted, and we can't use LO_SUM. */
7920 if (mode_supports_dq_form (mode))
7921 return false;
7922 x = XEXP (x, 1);
7923
7924 if (TARGET_ELF || TARGET_MACHO)
7925 {
7926 bool large_toc_ok;
7927
7928 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7929 return false;
7930 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7931 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7932 recognizes some LO_SUM addresses as valid although this
7933 function says opposite. In most cases, LRA through different
7934 transformations can generate correct code for address reloads.
7935 It cannot manage only some LO_SUM cases. So we need to add
7936 code here saying that some addresses are still valid. */
7937 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7938 && small_toc_ref (x, VOIDmode));
7939 if (TARGET_TOC && ! large_toc_ok)
7940 return false;
7941 if (GET_MODE_NUNITS (mode) != 1)
7942 return false;
7943 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7944 && !(/* ??? Assume floating point reg based on mode? */
7945 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7946 return false;
7947
7948 return CONSTANT_P (x) || large_toc_ok;
7949 }
7950
7951 return false;
7952 }
7953
7954
7955 /* Try machine-dependent ways of modifying an illegitimate address
7956 to be legitimate. If we find one, return the new, valid address.
7957 This is used from only one place: `memory_address' in explow.c.
7958
7959 OLDX is the address as it was before break_out_memory_refs was
7960 called. In some cases it is useful to look at this to decide what
7961 needs to be done.
7962
7963 It is always safe for this function to do nothing. It exists to
7964 recognize opportunities to optimize the output.
7965
7966 On RS/6000, first check for the sum of a register with a constant
7967 integer that is out of range. If so, generate code to add the
7968 constant with the low-order 16 bits masked to the register and force
7969 this result into another register (this can be done with `cau').
7970 Then generate an address of REG+(CONST&0xffff), allowing for the
7971 possibility of bit 16 being a one.
7972
7973 Then check for the sum of a register and something not constant, try to
7974 load the other things into a register and return the sum. */
7975
7976 static rtx
7977 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7978 machine_mode mode)
7979 {
7980 unsigned int extra;
7981
7982 if (!reg_offset_addressing_ok_p (mode)
7983 || mode_supports_dq_form (mode))
7984 {
7985 if (virtual_stack_registers_memory_p (x))
7986 return x;
7987
7988 /* In theory we should not be seeing addresses of the form reg+0,
7989 but just in case it is generated, optimize it away. */
7990 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7991 return force_reg (Pmode, XEXP (x, 0));
7992
7993 /* For TImode with load/store quad, restrict addresses to just a single
7994 pointer, so it works with both GPRs and VSX registers. */
7995 /* Make sure both operands are registers. */
7996 else if (GET_CODE (x) == PLUS
7997 && (mode != TImode || !TARGET_VSX))
7998 return gen_rtx_PLUS (Pmode,
7999 force_reg (Pmode, XEXP (x, 0)),
8000 force_reg (Pmode, XEXP (x, 1)));
8001 else
8002 return force_reg (Pmode, x);
8003 }
8004 if (SYMBOL_REF_P (x))
8005 {
8006 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8007 if (model != 0)
8008 return rs6000_legitimize_tls_address (x, model);
8009 }
8010
8011 extra = 0;
8012 switch (mode)
8013 {
8014 case E_TFmode:
8015 case E_TDmode:
8016 case E_TImode:
8017 case E_PTImode:
8018 case E_IFmode:
8019 case E_KFmode:
8020 /* As in legitimate_offset_address_p we do not assume
8021 worst-case. The mode here is just a hint as to the registers
8022 used. A TImode is usually in gprs, but may actually be in
8023 fprs. Leave worst-case scenario for reload to handle via
8024 insn constraints. PTImode is only GPRs. */
8025 extra = 8;
8026 break;
8027 default:
8028 break;
8029 }
8030
8031 if (GET_CODE (x) == PLUS
8032 && REG_P (XEXP (x, 0))
8033 && CONST_INT_P (XEXP (x, 1))
8034 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8035 >= 0x10000 - extra))
8036 {
8037 HOST_WIDE_INT high_int, low_int;
8038 rtx sum;
8039 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8040 if (low_int >= 0x8000 - extra)
8041 low_int = 0;
8042 high_int = INTVAL (XEXP (x, 1)) - low_int;
8043 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8044 GEN_INT (high_int)), 0);
8045 return plus_constant (Pmode, sum, low_int);
8046 }
8047 else if (GET_CODE (x) == PLUS
8048 && REG_P (XEXP (x, 0))
8049 && !CONST_INT_P (XEXP (x, 1))
8050 && GET_MODE_NUNITS (mode) == 1
8051 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8052 || (/* ??? Assume floating point reg based on mode? */
8053 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8054 && !avoiding_indexed_address_p (mode))
8055 {
8056 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8057 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8058 }
8059 else if ((TARGET_ELF
8060 #if TARGET_MACHO
8061 || !MACHO_DYNAMIC_NO_PIC_P
8062 #endif
8063 )
8064 && TARGET_32BIT
8065 && TARGET_NO_TOC
8066 && !flag_pic
8067 && !CONST_INT_P (x)
8068 && !CONST_WIDE_INT_P (x)
8069 && !CONST_DOUBLE_P (x)
8070 && CONSTANT_P (x)
8071 && GET_MODE_NUNITS (mode) == 1
8072 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8073 || (/* ??? Assume floating point reg based on mode? */
8074 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8075 {
8076 rtx reg = gen_reg_rtx (Pmode);
8077 if (TARGET_ELF)
8078 emit_insn (gen_elf_high (reg, x));
8079 else
8080 emit_insn (gen_macho_high (reg, x));
8081 return gen_rtx_LO_SUM (Pmode, reg, x);
8082 }
8083 else if (TARGET_TOC
8084 && SYMBOL_REF_P (x)
8085 && constant_pool_expr_p (x)
8086 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8087 return create_TOC_reference (x, NULL_RTX);
8088 else
8089 return x;
8090 }
8091
8092 /* Debug version of rs6000_legitimize_address. */
8093 static rtx
8094 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8095 {
8096 rtx ret;
8097 rtx_insn *insns;
8098
8099 start_sequence ();
8100 ret = rs6000_legitimize_address (x, oldx, mode);
8101 insns = get_insns ();
8102 end_sequence ();
8103
8104 if (ret != x)
8105 {
8106 fprintf (stderr,
8107 "\nrs6000_legitimize_address: mode %s, old code %s, "
8108 "new code %s, modified\n",
8109 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8110 GET_RTX_NAME (GET_CODE (ret)));
8111
8112 fprintf (stderr, "Original address:\n");
8113 debug_rtx (x);
8114
8115 fprintf (stderr, "oldx:\n");
8116 debug_rtx (oldx);
8117
8118 fprintf (stderr, "New address:\n");
8119 debug_rtx (ret);
8120
8121 if (insns)
8122 {
8123 fprintf (stderr, "Insns added:\n");
8124 debug_rtx_list (insns, 20);
8125 }
8126 }
8127 else
8128 {
8129 fprintf (stderr,
8130 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8131 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8132
8133 debug_rtx (x);
8134 }
8135
8136 if (insns)
8137 emit_insn (insns);
8138
8139 return ret;
8140 }
8141
8142 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8143 We need to emit DTP-relative relocations. */
8144
8145 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8146 static void
8147 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8148 {
8149 switch (size)
8150 {
8151 case 4:
8152 fputs ("\t.long\t", file);
8153 break;
8154 case 8:
8155 fputs (DOUBLE_INT_ASM_OP, file);
8156 break;
8157 default:
8158 gcc_unreachable ();
8159 }
8160 output_addr_const (file, x);
8161 if (TARGET_ELF)
8162 fputs ("@dtprel+0x8000", file);
8163 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8164 {
8165 switch (SYMBOL_REF_TLS_MODEL (x))
8166 {
8167 case 0:
8168 break;
8169 case TLS_MODEL_LOCAL_EXEC:
8170 fputs ("@le", file);
8171 break;
8172 case TLS_MODEL_INITIAL_EXEC:
8173 fputs ("@ie", file);
8174 break;
8175 case TLS_MODEL_GLOBAL_DYNAMIC:
8176 case TLS_MODEL_LOCAL_DYNAMIC:
8177 fputs ("@m", file);
8178 break;
8179 default:
8180 gcc_unreachable ();
8181 }
8182 }
8183 }
8184
8185 /* Return true if X is a symbol that refers to real (rather than emulated)
8186 TLS. */
8187
8188 static bool
8189 rs6000_real_tls_symbol_ref_p (rtx x)
8190 {
8191 return (SYMBOL_REF_P (x)
8192 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8193 }
8194
8195 /* In the name of slightly smaller debug output, and to cater to
8196 general assembler lossage, recognize various UNSPEC sequences
8197 and turn them back into a direct symbol reference. */
8198
8199 static rtx
8200 rs6000_delegitimize_address (rtx orig_x)
8201 {
8202 rtx x, y, offset;
8203
8204 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8205 orig_x = XVECEXP (orig_x, 0, 0);
8206
8207 orig_x = delegitimize_mem_from_attrs (orig_x);
8208
8209 x = orig_x;
8210 if (MEM_P (x))
8211 x = XEXP (x, 0);
8212
8213 y = x;
8214 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8215 y = XEXP (y, 1);
8216
8217 offset = NULL_RTX;
8218 if (GET_CODE (y) == PLUS
8219 && GET_MODE (y) == Pmode
8220 && CONST_INT_P (XEXP (y, 1)))
8221 {
8222 offset = XEXP (y, 1);
8223 y = XEXP (y, 0);
8224 }
8225
8226 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8227 {
8228 y = XVECEXP (y, 0, 0);
8229
8230 #ifdef HAVE_AS_TLS
8231 /* Do not associate thread-local symbols with the original
8232 constant pool symbol. */
8233 if (TARGET_XCOFF
8234 && SYMBOL_REF_P (y)
8235 && CONSTANT_POOL_ADDRESS_P (y)
8236 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8237 return orig_x;
8238 #endif
8239
8240 if (offset != NULL_RTX)
8241 y = gen_rtx_PLUS (Pmode, y, offset);
8242 if (!MEM_P (orig_x))
8243 return y;
8244 else
8245 return replace_equiv_address_nv (orig_x, y);
8246 }
8247
8248 if (TARGET_MACHO
8249 && GET_CODE (orig_x) == LO_SUM
8250 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8251 {
8252 y = XEXP (XEXP (orig_x, 1), 0);
8253 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8254 return XVECEXP (y, 0, 0);
8255 }
8256
8257 return orig_x;
8258 }
8259
8260 /* Return true if X shouldn't be emitted into the debug info.
8261 The linker doesn't like .toc section references from
8262 .debug_* sections, so reject .toc section symbols. */
8263
8264 static bool
8265 rs6000_const_not_ok_for_debug_p (rtx x)
8266 {
8267 if (GET_CODE (x) == UNSPEC)
8268 return true;
8269 if (SYMBOL_REF_P (x)
8270 && CONSTANT_POOL_ADDRESS_P (x))
8271 {
8272 rtx c = get_pool_constant (x);
8273 machine_mode cmode = get_pool_mode (x);
8274 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8275 return true;
8276 }
8277
8278 return false;
8279 }
8280
8281 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8282
8283 static bool
8284 rs6000_legitimate_combined_insn (rtx_insn *insn)
8285 {
8286 int icode = INSN_CODE (insn);
8287
8288 /* Reject creating doloop insns. Combine should not be allowed
8289 to create these for a number of reasons:
8290 1) In a nested loop, if combine creates one of these in an
8291 outer loop and the register allocator happens to allocate ctr
8292 to the outer loop insn, then the inner loop can't use ctr.
8293 Inner loops ought to be more highly optimized.
8294 2) Combine often wants to create one of these from what was
8295 originally a three insn sequence, first combining the three
8296 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8297 allocated ctr, the splitter takes use back to the three insn
8298 sequence. It's better to stop combine at the two insn
8299 sequence.
8300 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8301 insns, the register allocator sometimes uses floating point
8302 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8303 jump insn and output reloads are not implemented for jumps,
8304 the ctrsi/ctrdi splitters need to handle all possible cases.
8305 That's a pain, and it gets to be seriously difficult when a
8306 splitter that runs after reload needs memory to transfer from
8307 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8308 for the difficult case. It's better to not create problems
8309 in the first place. */
8310 if (icode != CODE_FOR_nothing
8311 && (icode == CODE_FOR_bdz_si
8312 || icode == CODE_FOR_bdz_di
8313 || icode == CODE_FOR_bdnz_si
8314 || icode == CODE_FOR_bdnz_di
8315 || icode == CODE_FOR_bdztf_si
8316 || icode == CODE_FOR_bdztf_di
8317 || icode == CODE_FOR_bdnztf_si
8318 || icode == CODE_FOR_bdnztf_di))
8319 return false;
8320
8321 return true;
8322 }
8323
8324 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8325
8326 static GTY(()) rtx rs6000_tls_symbol;
8327 static rtx
8328 rs6000_tls_get_addr (void)
8329 {
8330 if (!rs6000_tls_symbol)
8331 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8332
8333 return rs6000_tls_symbol;
8334 }
8335
8336 /* Construct the SYMBOL_REF for TLS GOT references. */
8337
8338 static GTY(()) rtx rs6000_got_symbol;
8339 rtx
8340 rs6000_got_sym (void)
8341 {
8342 if (!rs6000_got_symbol)
8343 {
8344 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8345 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8346 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8347 }
8348
8349 return rs6000_got_symbol;
8350 }
8351
8352 /* AIX Thread-Local Address support. */
8353
8354 static rtx
8355 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8356 {
8357 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8358 const char *name;
8359 char *tlsname;
8360
8361 name = XSTR (addr, 0);
8362 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8363 or the symbol will be in TLS private data section. */
8364 if (name[strlen (name) - 1] != ']'
8365 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8366 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8367 {
8368 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8369 strcpy (tlsname, name);
8370 strcat (tlsname,
8371 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8372 tlsaddr = copy_rtx (addr);
8373 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8374 }
8375 else
8376 tlsaddr = addr;
8377
8378 /* Place addr into TOC constant pool. */
8379 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8380
8381 /* Output the TOC entry and create the MEM referencing the value. */
8382 if (constant_pool_expr_p (XEXP (sym, 0))
8383 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8384 {
8385 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8386 mem = gen_const_mem (Pmode, tocref);
8387 set_mem_alias_set (mem, get_TOC_alias_set ());
8388 }
8389 else
8390 return sym;
8391
8392 /* Use global-dynamic for local-dynamic. */
8393 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8394 || model == TLS_MODEL_LOCAL_DYNAMIC)
8395 {
8396 /* Create new TOC reference for @m symbol. */
8397 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8398 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8399 strcpy (tlsname, "*LCM");
8400 strcat (tlsname, name + 3);
8401 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8402 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8403 tocref = create_TOC_reference (modaddr, NULL_RTX);
8404 rtx modmem = gen_const_mem (Pmode, tocref);
8405 set_mem_alias_set (modmem, get_TOC_alias_set ());
8406
8407 rtx modreg = gen_reg_rtx (Pmode);
8408 emit_insn (gen_rtx_SET (modreg, modmem));
8409
8410 tmpreg = gen_reg_rtx (Pmode);
8411 emit_insn (gen_rtx_SET (tmpreg, mem));
8412
8413 dest = gen_reg_rtx (Pmode);
8414 if (TARGET_32BIT)
8415 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8416 else
8417 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8418 return dest;
8419 }
8420 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8421 else if (TARGET_32BIT)
8422 {
8423 tlsreg = gen_reg_rtx (SImode);
8424 emit_insn (gen_tls_get_tpointer (tlsreg));
8425 }
8426 else
8427 tlsreg = gen_rtx_REG (DImode, 13);
8428
8429 /* Load the TOC value into temporary register. */
8430 tmpreg = gen_reg_rtx (Pmode);
8431 emit_insn (gen_rtx_SET (tmpreg, mem));
8432 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8433 gen_rtx_MINUS (Pmode, addr, tlsreg));
8434
8435 /* Add TOC symbol value to TLS pointer. */
8436 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8437
8438 return dest;
8439 }
8440
8441 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8442 __tls_get_addr call. */
8443
8444 void
8445 rs6000_output_tlsargs (rtx *operands)
8446 {
8447 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8448 rtx op[3];
8449
8450 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8451 op[0] = operands[0];
8452 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8453 op[1] = XVECEXP (operands[2], 0, 0);
8454 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8455 {
8456 /* The GOT register. */
8457 op[2] = XVECEXP (operands[2], 0, 1);
8458 if (TARGET_CMODEL != CMODEL_SMALL)
8459 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8460 "addi %0,%0,%1@got@tlsgd@l", op);
8461 else
8462 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8463 }
8464 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8465 {
8466 if (TARGET_CMODEL != CMODEL_SMALL)
8467 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8468 "addi %0,%0,%&@got@tlsld@l", op);
8469 else
8470 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8471 }
8472 else
8473 gcc_unreachable ();
8474 }
8475
8476 /* Passes the tls arg value for global dynamic and local dynamic
8477 emit_library_call_value in rs6000_legitimize_tls_address to
8478 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8479 marker relocs put on __tls_get_addr calls. */
8480 static rtx global_tlsarg;
8481
8482 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8483 this (thread-local) address. */
8484
8485 static rtx
8486 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8487 {
8488 rtx dest, insn;
8489
8490 if (TARGET_XCOFF)
8491 return rs6000_legitimize_tls_address_aix (addr, model);
8492
8493 dest = gen_reg_rtx (Pmode);
8494 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8495 {
8496 rtx tlsreg;
8497
8498 if (TARGET_64BIT)
8499 {
8500 tlsreg = gen_rtx_REG (Pmode, 13);
8501 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8502 }
8503 else
8504 {
8505 tlsreg = gen_rtx_REG (Pmode, 2);
8506 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8507 }
8508 emit_insn (insn);
8509 }
8510 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8511 {
8512 rtx tlsreg, tmp;
8513
8514 tmp = gen_reg_rtx (Pmode);
8515 if (TARGET_64BIT)
8516 {
8517 tlsreg = gen_rtx_REG (Pmode, 13);
8518 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8519 }
8520 else
8521 {
8522 tlsreg = gen_rtx_REG (Pmode, 2);
8523 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8524 }
8525 emit_insn (insn);
8526 if (TARGET_64BIT)
8527 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8528 else
8529 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8530 emit_insn (insn);
8531 }
8532 else
8533 {
8534 rtx got, tga, tmp1, tmp2;
8535
8536 /* We currently use relocations like @got@tlsgd for tls, which
8537 means the linker will handle allocation of tls entries, placing
8538 them in the .got section. So use a pointer to the .got section,
8539 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8540 or to secondary GOT sections used by 32-bit -fPIC. */
8541 if (TARGET_64BIT)
8542 got = gen_rtx_REG (Pmode, 2);
8543 else
8544 {
8545 if (flag_pic == 1)
8546 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8547 else
8548 {
8549 rtx gsym = rs6000_got_sym ();
8550 got = gen_reg_rtx (Pmode);
8551 if (flag_pic == 0)
8552 rs6000_emit_move (got, gsym, Pmode);
8553 else
8554 {
8555 rtx mem, lab;
8556
8557 tmp1 = gen_reg_rtx (Pmode);
8558 tmp2 = gen_reg_rtx (Pmode);
8559 mem = gen_const_mem (Pmode, tmp1);
8560 lab = gen_label_rtx ();
8561 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8562 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8563 if (TARGET_LINK_STACK)
8564 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8565 emit_move_insn (tmp2, mem);
8566 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8567 set_unique_reg_note (last, REG_EQUAL, gsym);
8568 }
8569 }
8570 }
8571
8572 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8573 {
8574 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8575 UNSPEC_TLSGD);
8576 tga = rs6000_tls_get_addr ();
8577 global_tlsarg = arg;
8578 if (TARGET_TLS_MARKERS)
8579 {
8580 rtx argreg = gen_rtx_REG (Pmode, 3);
8581 emit_insn (gen_rtx_SET (argreg, arg));
8582 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8583 argreg, Pmode);
8584 }
8585 else
8586 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8587 global_tlsarg = NULL_RTX;
8588
8589 /* Make a note so that the result of this call can be CSEd. */
8590 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8591 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8592 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8593 }
8594 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8595 {
8596 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8597 tga = rs6000_tls_get_addr ();
8598 tmp1 = gen_reg_rtx (Pmode);
8599 global_tlsarg = arg;
8600 if (TARGET_TLS_MARKERS)
8601 {
8602 rtx argreg = gen_rtx_REG (Pmode, 3);
8603 emit_insn (gen_rtx_SET (argreg, arg));
8604 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8605 argreg, Pmode);
8606 }
8607 else
8608 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8609 global_tlsarg = NULL_RTX;
8610
8611 /* Make a note so that the result of this call can be CSEd. */
8612 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8613 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8614 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8615
8616 if (rs6000_tls_size == 16)
8617 {
8618 if (TARGET_64BIT)
8619 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8620 else
8621 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8622 }
8623 else if (rs6000_tls_size == 32)
8624 {
8625 tmp2 = gen_reg_rtx (Pmode);
8626 if (TARGET_64BIT)
8627 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8628 else
8629 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8630 emit_insn (insn);
8631 if (TARGET_64BIT)
8632 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8633 else
8634 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8635 }
8636 else
8637 {
8638 tmp2 = gen_reg_rtx (Pmode);
8639 if (TARGET_64BIT)
8640 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8641 else
8642 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8643 emit_insn (insn);
8644 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8645 }
8646 emit_insn (insn);
8647 }
8648 else
8649 {
8650 /* IE, or 64-bit offset LE. */
8651 tmp2 = gen_reg_rtx (Pmode);
8652 if (TARGET_64BIT)
8653 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8654 else
8655 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8656 emit_insn (insn);
8657 if (TARGET_64BIT)
8658 insn = gen_tls_tls_64 (dest, tmp2, addr);
8659 else
8660 insn = gen_tls_tls_32 (dest, tmp2, addr);
8661 emit_insn (insn);
8662 }
8663 }
8664
8665 return dest;
8666 }
8667
8668 /* Only create the global variable for the stack protect guard if we are using
8669 the global flavor of that guard. */
8670 static tree
8671 rs6000_init_stack_protect_guard (void)
8672 {
8673 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8674 return default_stack_protect_guard ();
8675
8676 return NULL_TREE;
8677 }
8678
8679 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8680
8681 static bool
8682 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8683 {
8684 if (GET_CODE (x) == HIGH
8685 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8686 return true;
8687
8688 /* A TLS symbol in the TOC cannot contain a sum. */
8689 if (GET_CODE (x) == CONST
8690 && GET_CODE (XEXP (x, 0)) == PLUS
8691 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8692 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8693 return true;
8694
8695 /* Do not place an ELF TLS symbol in the constant pool. */
8696 return TARGET_ELF && tls_referenced_p (x);
8697 }
8698
8699 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8700 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8701 can be addressed relative to the toc pointer. */
8702
8703 static bool
8704 use_toc_relative_ref (rtx sym, machine_mode mode)
8705 {
8706 return ((constant_pool_expr_p (sym)
8707 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8708 get_pool_mode (sym)))
8709 || (TARGET_CMODEL == CMODEL_MEDIUM
8710 && SYMBOL_REF_LOCAL_P (sym)
8711 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8712 }
8713
8714 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8715 that is a valid memory address for an instruction.
8716 The MODE argument is the machine mode for the MEM expression
8717 that wants to use this address.
8718
8719 On the RS/6000, there are four valid address: a SYMBOL_REF that
8720 refers to a constant pool entry of an address (or the sum of it
8721 plus a constant), a short (16-bit signed) constant plus a register,
8722 the sum of two registers, or a register indirect, possibly with an
8723 auto-increment. For DFmode, DDmode and DImode with a constant plus
8724 register, we must ensure that both words are addressable or PowerPC64
8725 with offset word aligned.
8726
8727 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8728 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8729 because adjacent memory cells are accessed by adding word-sized offsets
8730 during assembly output. */
8731 static bool
8732 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8733 {
8734 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8735 bool quad_offset_p = mode_supports_dq_form (mode);
8736
8737 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8738 if (VECTOR_MEM_ALTIVEC_P (mode)
8739 && GET_CODE (x) == AND
8740 && CONST_INT_P (XEXP (x, 1))
8741 && INTVAL (XEXP (x, 1)) == -16)
8742 x = XEXP (x, 0);
8743
8744 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8745 return 0;
8746 if (legitimate_indirect_address_p (x, reg_ok_strict))
8747 return 1;
8748 if (TARGET_UPDATE
8749 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8750 && mode_supports_pre_incdec_p (mode)
8751 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8752 return 1;
8753 /* Handle restricted vector d-form offsets in ISA 3.0. */
8754 if (quad_offset_p)
8755 {
8756 if (quad_address_p (x, mode, reg_ok_strict))
8757 return 1;
8758 }
8759 else if (virtual_stack_registers_memory_p (x))
8760 return 1;
8761
8762 else if (reg_offset_p)
8763 {
8764 if (legitimate_small_data_p (mode, x))
8765 return 1;
8766 if (legitimate_constant_pool_address_p (x, mode,
8767 reg_ok_strict || lra_in_progress))
8768 return 1;
8769 }
8770
8771 /* For TImode, if we have TImode in VSX registers, only allow register
8772 indirect addresses. This will allow the values to go in either GPRs
8773 or VSX registers without reloading. The vector types would tend to
8774 go into VSX registers, so we allow REG+REG, while TImode seems
8775 somewhat split, in that some uses are GPR based, and some VSX based. */
8776 /* FIXME: We could loosen this by changing the following to
8777 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8778 but currently we cannot allow REG+REG addressing for TImode. See
8779 PR72827 for complete details on how this ends up hoodwinking DSE. */
8780 if (mode == TImode && TARGET_VSX)
8781 return 0;
8782 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8783 if (! reg_ok_strict
8784 && reg_offset_p
8785 && GET_CODE (x) == PLUS
8786 && REG_P (XEXP (x, 0))
8787 && (XEXP (x, 0) == virtual_stack_vars_rtx
8788 || XEXP (x, 0) == arg_pointer_rtx)
8789 && CONST_INT_P (XEXP (x, 1)))
8790 return 1;
8791 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8792 return 1;
8793 if (!FLOAT128_2REG_P (mode)
8794 && (TARGET_HARD_FLOAT
8795 || TARGET_POWERPC64
8796 || (mode != DFmode && mode != DDmode))
8797 && (TARGET_POWERPC64 || mode != DImode)
8798 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8799 && mode != PTImode
8800 && !avoiding_indexed_address_p (mode)
8801 && legitimate_indexed_address_p (x, reg_ok_strict))
8802 return 1;
8803 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8804 && mode_supports_pre_modify_p (mode)
8805 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8806 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8807 reg_ok_strict, false)
8808 || (!avoiding_indexed_address_p (mode)
8809 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8810 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8811 return 1;
8812 if (reg_offset_p && !quad_offset_p
8813 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8814 return 1;
8815 return 0;
8816 }
8817
8818 /* Debug version of rs6000_legitimate_address_p. */
8819 static bool
8820 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8821 bool reg_ok_strict)
8822 {
8823 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8824 fprintf (stderr,
8825 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8826 "strict = %d, reload = %s, code = %s\n",
8827 ret ? "true" : "false",
8828 GET_MODE_NAME (mode),
8829 reg_ok_strict,
8830 (reload_completed ? "after" : "before"),
8831 GET_RTX_NAME (GET_CODE (x)));
8832 debug_rtx (x);
8833
8834 return ret;
8835 }
8836
8837 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8838
8839 static bool
8840 rs6000_mode_dependent_address_p (const_rtx addr,
8841 addr_space_t as ATTRIBUTE_UNUSED)
8842 {
8843 return rs6000_mode_dependent_address_ptr (addr);
8844 }
8845
8846 /* Go to LABEL if ADDR (a legitimate address expression)
8847 has an effect that depends on the machine mode it is used for.
8848
8849 On the RS/6000 this is true of all integral offsets (since AltiVec
8850 and VSX modes don't allow them) or is a pre-increment or decrement.
8851
8852 ??? Except that due to conceptual problems in offsettable_address_p
8853 we can't really report the problems of integral offsets. So leave
8854 this assuming that the adjustable offset must be valid for the
8855 sub-words of a TFmode operand, which is what we had before. */
8856
8857 static bool
8858 rs6000_mode_dependent_address (const_rtx addr)
8859 {
8860 switch (GET_CODE (addr))
8861 {
8862 case PLUS:
8863 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8864 is considered a legitimate address before reload, so there
8865 are no offset restrictions in that case. Note that this
8866 condition is safe in strict mode because any address involving
8867 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8868 been rejected as illegitimate. */
8869 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8870 && XEXP (addr, 0) != arg_pointer_rtx
8871 && CONST_INT_P (XEXP (addr, 1)))
8872 {
8873 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8874 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8875 }
8876 break;
8877
8878 case LO_SUM:
8879 /* Anything in the constant pool is sufficiently aligned that
8880 all bytes have the same high part address. */
8881 return !legitimate_constant_pool_address_p (addr, QImode, false);
8882
8883 /* Auto-increment cases are now treated generically in recog.c. */
8884 case PRE_MODIFY:
8885 return TARGET_UPDATE;
8886
8887 /* AND is only allowed in Altivec loads. */
8888 case AND:
8889 return true;
8890
8891 default:
8892 break;
8893 }
8894
8895 return false;
8896 }
8897
8898 /* Debug version of rs6000_mode_dependent_address. */
8899 static bool
8900 rs6000_debug_mode_dependent_address (const_rtx addr)
8901 {
8902 bool ret = rs6000_mode_dependent_address (addr);
8903
8904 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8905 ret ? "true" : "false");
8906 debug_rtx (addr);
8907
8908 return ret;
8909 }
8910
8911 /* Implement FIND_BASE_TERM. */
8912
8913 rtx
8914 rs6000_find_base_term (rtx op)
8915 {
8916 rtx base;
8917
8918 base = op;
8919 if (GET_CODE (base) == CONST)
8920 base = XEXP (base, 0);
8921 if (GET_CODE (base) == PLUS)
8922 base = XEXP (base, 0);
8923 if (GET_CODE (base) == UNSPEC)
8924 switch (XINT (base, 1))
8925 {
8926 case UNSPEC_TOCREL:
8927 case UNSPEC_MACHOPIC_OFFSET:
8928 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8929 for aliasing purposes. */
8930 return XVECEXP (base, 0, 0);
8931 }
8932
8933 return op;
8934 }
8935
8936 /* More elaborate version of recog's offsettable_memref_p predicate
8937 that works around the ??? note of rs6000_mode_dependent_address.
8938 In particular it accepts
8939
8940 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8941
8942 in 32-bit mode, that the recog predicate rejects. */
8943
8944 static bool
8945 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8946 {
8947 bool worst_case;
8948
8949 if (!MEM_P (op))
8950 return false;
8951
8952 /* First mimic offsettable_memref_p. */
8953 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8954 return true;
8955
8956 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8957 the latter predicate knows nothing about the mode of the memory
8958 reference and, therefore, assumes that it is the largest supported
8959 mode (TFmode). As a consequence, legitimate offsettable memory
8960 references are rejected. rs6000_legitimate_offset_address_p contains
8961 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8962 at least with a little bit of help here given that we know the
8963 actual registers used. */
8964 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8965 || GET_MODE_SIZE (reg_mode) == 4);
8966 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8967 strict, worst_case);
8968 }
8969
8970 /* Determine the reassociation width to be used in reassociate_bb.
8971 This takes into account how many parallel operations we
8972 can actually do of a given type, and also the latency.
8973 P8:
8974 int add/sub 6/cycle
8975 mul 2/cycle
8976 vect add/sub/mul 2/cycle
8977 fp add/sub/mul 2/cycle
8978 dfp 1/cycle
8979 */
8980
8981 static int
8982 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
8983 machine_mode mode)
8984 {
8985 switch (rs6000_tune)
8986 {
8987 case PROCESSOR_POWER8:
8988 case PROCESSOR_POWER9:
8989 case PROCESSOR_FUTURE:
8990 if (DECIMAL_FLOAT_MODE_P (mode))
8991 return 1;
8992 if (VECTOR_MODE_P (mode))
8993 return 4;
8994 if (INTEGRAL_MODE_P (mode))
8995 return 1;
8996 if (FLOAT_MODE_P (mode))
8997 return 4;
8998 break;
8999 default:
9000 break;
9001 }
9002 return 1;
9003 }
9004
9005 /* Change register usage conditional on target flags. */
9006 static void
9007 rs6000_conditional_register_usage (void)
9008 {
9009 int i;
9010
9011 if (TARGET_DEBUG_TARGET)
9012 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9013
9014 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9015 if (TARGET_64BIT)
9016 fixed_regs[13] = call_used_regs[13]
9017 = call_really_used_regs[13] = 1;
9018
9019 /* Conditionally disable FPRs. */
9020 if (TARGET_SOFT_FLOAT)
9021 for (i = 32; i < 64; i++)
9022 fixed_regs[i] = call_used_regs[i]
9023 = call_really_used_regs[i] = 1;
9024
9025 /* The TOC register is not killed across calls in a way that is
9026 visible to the compiler. */
9027 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9028 call_really_used_regs[2] = 0;
9029
9030 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9031 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9032
9033 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9034 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9035 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9036 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9037
9038 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9039 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9040 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9041 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9042
9043 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9044 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9045 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9046
9047 if (!TARGET_ALTIVEC && !TARGET_VSX)
9048 {
9049 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9050 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9051 call_really_used_regs[VRSAVE_REGNO] = 1;
9052 }
9053
9054 if (TARGET_ALTIVEC || TARGET_VSX)
9055 global_regs[VSCR_REGNO] = 1;
9056
9057 if (TARGET_ALTIVEC_ABI)
9058 {
9059 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9060 call_used_regs[i] = call_really_used_regs[i] = 1;
9061
9062 /* AIX reserves VR20:31 in non-extended ABI mode. */
9063 if (TARGET_XCOFF)
9064 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9065 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9066 }
9067 }
9068
9069 \f
9070 /* Output insns to set DEST equal to the constant SOURCE as a series of
9071 lis, ori and shl instructions and return TRUE. */
9072
9073 bool
9074 rs6000_emit_set_const (rtx dest, rtx source)
9075 {
9076 machine_mode mode = GET_MODE (dest);
9077 rtx temp, set;
9078 rtx_insn *insn;
9079 HOST_WIDE_INT c;
9080
9081 gcc_checking_assert (CONST_INT_P (source));
9082 c = INTVAL (source);
9083 switch (mode)
9084 {
9085 case E_QImode:
9086 case E_HImode:
9087 emit_insn (gen_rtx_SET (dest, source));
9088 return true;
9089
9090 case E_SImode:
9091 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9092
9093 emit_insn (gen_rtx_SET (copy_rtx (temp),
9094 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9095 emit_insn (gen_rtx_SET (dest,
9096 gen_rtx_IOR (SImode, copy_rtx (temp),
9097 GEN_INT (c & 0xffff))));
9098 break;
9099
9100 case E_DImode:
9101 if (!TARGET_POWERPC64)
9102 {
9103 rtx hi, lo;
9104
9105 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9106 DImode);
9107 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9108 DImode);
9109 emit_move_insn (hi, GEN_INT (c >> 32));
9110 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9111 emit_move_insn (lo, GEN_INT (c));
9112 }
9113 else
9114 rs6000_emit_set_long_const (dest, c);
9115 break;
9116
9117 default:
9118 gcc_unreachable ();
9119 }
9120
9121 insn = get_last_insn ();
9122 set = single_set (insn);
9123 if (! CONSTANT_P (SET_SRC (set)))
9124 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9125
9126 return true;
9127 }
9128
9129 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9130 Output insns to set DEST equal to the constant C as a series of
9131 lis, ori and shl instructions. */
9132
9133 static void
9134 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9135 {
9136 rtx temp;
9137 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9138
9139 ud1 = c & 0xffff;
9140 c = c >> 16;
9141 ud2 = c & 0xffff;
9142 c = c >> 16;
9143 ud3 = c & 0xffff;
9144 c = c >> 16;
9145 ud4 = c & 0xffff;
9146
9147 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9148 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9149 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9150
9151 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9152 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9153 {
9154 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9155
9156 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9157 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9158 if (ud1 != 0)
9159 emit_move_insn (dest,
9160 gen_rtx_IOR (DImode, copy_rtx (temp),
9161 GEN_INT (ud1)));
9162 }
9163 else if (ud3 == 0 && ud4 == 0)
9164 {
9165 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9166
9167 gcc_assert (ud2 & 0x8000);
9168 emit_move_insn (copy_rtx (temp),
9169 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9170 if (ud1 != 0)
9171 emit_move_insn (copy_rtx (temp),
9172 gen_rtx_IOR (DImode, copy_rtx (temp),
9173 GEN_INT (ud1)));
9174 emit_move_insn (dest,
9175 gen_rtx_ZERO_EXTEND (DImode,
9176 gen_lowpart (SImode,
9177 copy_rtx (temp))));
9178 }
9179 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9180 || (ud4 == 0 && ! (ud3 & 0x8000)))
9181 {
9182 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9183
9184 emit_move_insn (copy_rtx (temp),
9185 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9186 if (ud2 != 0)
9187 emit_move_insn (copy_rtx (temp),
9188 gen_rtx_IOR (DImode, copy_rtx (temp),
9189 GEN_INT (ud2)));
9190 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9191 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9192 GEN_INT (16)));
9193 if (ud1 != 0)
9194 emit_move_insn (dest,
9195 gen_rtx_IOR (DImode, copy_rtx (temp),
9196 GEN_INT (ud1)));
9197 }
9198 else
9199 {
9200 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9201
9202 emit_move_insn (copy_rtx (temp),
9203 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9204 if (ud3 != 0)
9205 emit_move_insn (copy_rtx (temp),
9206 gen_rtx_IOR (DImode, copy_rtx (temp),
9207 GEN_INT (ud3)));
9208
9209 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9210 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9211 GEN_INT (32)));
9212 if (ud2 != 0)
9213 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9214 gen_rtx_IOR (DImode, copy_rtx (temp),
9215 GEN_INT (ud2 << 16)));
9216 if (ud1 != 0)
9217 emit_move_insn (dest,
9218 gen_rtx_IOR (DImode, copy_rtx (temp),
9219 GEN_INT (ud1)));
9220 }
9221 }
9222
9223 /* Helper for the following. Get rid of [r+r] memory refs
9224 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9225
9226 static void
9227 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9228 {
9229 if (MEM_P (operands[0])
9230 && !REG_P (XEXP (operands[0], 0))
9231 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9232 GET_MODE (operands[0]), false))
9233 operands[0]
9234 = replace_equiv_address (operands[0],
9235 copy_addr_to_reg (XEXP (operands[0], 0)));
9236
9237 if (MEM_P (operands[1])
9238 && !REG_P (XEXP (operands[1], 0))
9239 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9240 GET_MODE (operands[1]), false))
9241 operands[1]
9242 = replace_equiv_address (operands[1],
9243 copy_addr_to_reg (XEXP (operands[1], 0)));
9244 }
9245
9246 /* Generate a vector of constants to permute MODE for a little-endian
9247 storage operation by swapping the two halves of a vector. */
9248 static rtvec
9249 rs6000_const_vec (machine_mode mode)
9250 {
9251 int i, subparts;
9252 rtvec v;
9253
9254 switch (mode)
9255 {
9256 case E_V1TImode:
9257 subparts = 1;
9258 break;
9259 case E_V2DFmode:
9260 case E_V2DImode:
9261 subparts = 2;
9262 break;
9263 case E_V4SFmode:
9264 case E_V4SImode:
9265 subparts = 4;
9266 break;
9267 case E_V8HImode:
9268 subparts = 8;
9269 break;
9270 case E_V16QImode:
9271 subparts = 16;
9272 break;
9273 default:
9274 gcc_unreachable();
9275 }
9276
9277 v = rtvec_alloc (subparts);
9278
9279 for (i = 0; i < subparts / 2; ++i)
9280 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9281 for (i = subparts / 2; i < subparts; ++i)
9282 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9283
9284 return v;
9285 }
9286
9287 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9288 store operation. */
9289 void
9290 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9291 {
9292 /* Scalar permutations are easier to express in integer modes rather than
9293 floating-point modes, so cast them here. We use V1TImode instead
9294 of TImode to ensure that the values don't go through GPRs. */
9295 if (FLOAT128_VECTOR_P (mode))
9296 {
9297 dest = gen_lowpart (V1TImode, dest);
9298 source = gen_lowpart (V1TImode, source);
9299 mode = V1TImode;
9300 }
9301
9302 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9303 scalar. */
9304 if (mode == TImode || mode == V1TImode)
9305 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9306 GEN_INT (64))));
9307 else
9308 {
9309 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9310 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9311 }
9312 }
9313
9314 /* Emit a little-endian load from vector memory location SOURCE to VSX
9315 register DEST in mode MODE. The load is done with two permuting
9316 insn's that represent an lxvd2x and xxpermdi. */
9317 void
9318 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9319 {
9320 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9321 V1TImode). */
9322 if (mode == TImode || mode == V1TImode)
9323 {
9324 mode = V2DImode;
9325 dest = gen_lowpart (V2DImode, dest);
9326 source = adjust_address (source, V2DImode, 0);
9327 }
9328
9329 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9330 rs6000_emit_le_vsx_permute (tmp, source, mode);
9331 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9332 }
9333
9334 /* Emit a little-endian store to vector memory location DEST from VSX
9335 register SOURCE in mode MODE. The store is done with two permuting
9336 insn's that represent an xxpermdi and an stxvd2x. */
9337 void
9338 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9339 {
9340 /* This should never be called during or after LRA, because it does
9341 not re-permute the source register. It is intended only for use
9342 during expand. */
9343 gcc_assert (!lra_in_progress && !reload_completed);
9344
9345 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9346 V1TImode). */
9347 if (mode == TImode || mode == V1TImode)
9348 {
9349 mode = V2DImode;
9350 dest = adjust_address (dest, V2DImode, 0);
9351 source = gen_lowpart (V2DImode, source);
9352 }
9353
9354 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9355 rs6000_emit_le_vsx_permute (tmp, source, mode);
9356 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9357 }
9358
9359 /* Emit a sequence representing a little-endian VSX load or store,
9360 moving data from SOURCE to DEST in mode MODE. This is done
9361 separately from rs6000_emit_move to ensure it is called only
9362 during expand. LE VSX loads and stores introduced later are
9363 handled with a split. The expand-time RTL generation allows
9364 us to optimize away redundant pairs of register-permutes. */
9365 void
9366 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9367 {
9368 gcc_assert (!BYTES_BIG_ENDIAN
9369 && VECTOR_MEM_VSX_P (mode)
9370 && !TARGET_P9_VECTOR
9371 && !gpr_or_gpr_p (dest, source)
9372 && (MEM_P (source) ^ MEM_P (dest)));
9373
9374 if (MEM_P (source))
9375 {
9376 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9377 rs6000_emit_le_vsx_load (dest, source, mode);
9378 }
9379 else
9380 {
9381 if (!REG_P (source))
9382 source = force_reg (mode, source);
9383 rs6000_emit_le_vsx_store (dest, source, mode);
9384 }
9385 }
9386
9387 /* Return whether a SFmode or SImode move can be done without converting one
9388 mode to another. This arrises when we have:
9389
9390 (SUBREG:SF (REG:SI ...))
9391 (SUBREG:SI (REG:SF ...))
9392
9393 and one of the values is in a floating point/vector register, where SFmode
9394 scalars are stored in DFmode format. */
9395
9396 bool
9397 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9398 {
9399 if (TARGET_ALLOW_SF_SUBREG)
9400 return true;
9401
9402 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9403 return true;
9404
9405 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9406 return true;
9407
9408 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9409 if (SUBREG_P (dest))
9410 {
9411 rtx dest_subreg = SUBREG_REG (dest);
9412 rtx src_subreg = SUBREG_REG (src);
9413 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9414 }
9415
9416 return false;
9417 }
9418
9419
9420 /* Helper function to change moves with:
9421
9422 (SUBREG:SF (REG:SI)) and
9423 (SUBREG:SI (REG:SF))
9424
9425 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9426 values are stored as DFmode values in the VSX registers. We need to convert
9427 the bits before we can use a direct move or operate on the bits in the
9428 vector register as an integer type.
9429
9430 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9431
9432 static bool
9433 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9434 {
9435 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9436 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9437 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9438 {
9439 rtx inner_source = SUBREG_REG (source);
9440 machine_mode inner_mode = GET_MODE (inner_source);
9441
9442 if (mode == SImode && inner_mode == SFmode)
9443 {
9444 emit_insn (gen_movsi_from_sf (dest, inner_source));
9445 return true;
9446 }
9447
9448 if (mode == SFmode && inner_mode == SImode)
9449 {
9450 emit_insn (gen_movsf_from_si (dest, inner_source));
9451 return true;
9452 }
9453 }
9454
9455 return false;
9456 }
9457
9458 /* Emit a move from SOURCE to DEST in mode MODE. */
9459 void
9460 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9461 {
9462 rtx operands[2];
9463 operands[0] = dest;
9464 operands[1] = source;
9465
9466 if (TARGET_DEBUG_ADDR)
9467 {
9468 fprintf (stderr,
9469 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9470 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9471 GET_MODE_NAME (mode),
9472 lra_in_progress,
9473 reload_completed,
9474 can_create_pseudo_p ());
9475 debug_rtx (dest);
9476 fprintf (stderr, "source:\n");
9477 debug_rtx (source);
9478 }
9479
9480 /* Check that we get CONST_WIDE_INT only when we should. */
9481 if (CONST_WIDE_INT_P (operands[1])
9482 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9483 gcc_unreachable ();
9484
9485 #ifdef HAVE_AS_GNU_ATTRIBUTE
9486 /* If we use a long double type, set the flags in .gnu_attribute that say
9487 what the long double type is. This is to allow the linker's warning
9488 message for the wrong long double to be useful, even if the function does
9489 not do a call (for example, doing a 128-bit add on power9 if the long
9490 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9491 used if they aren't the default long dobule type. */
9492 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9493 {
9494 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9495 rs6000_passes_float = rs6000_passes_long_double = true;
9496
9497 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9498 rs6000_passes_float = rs6000_passes_long_double = true;
9499 }
9500 #endif
9501
9502 /* See if we need to special case SImode/SFmode SUBREG moves. */
9503 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9504 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9505 return;
9506
9507 /* Check if GCC is setting up a block move that will end up using FP
9508 registers as temporaries. We must make sure this is acceptable. */
9509 if (MEM_P (operands[0])
9510 && MEM_P (operands[1])
9511 && mode == DImode
9512 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9513 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9514 && ! (rs6000_slow_unaligned_access (SImode,
9515 (MEM_ALIGN (operands[0]) > 32
9516 ? 32 : MEM_ALIGN (operands[0])))
9517 || rs6000_slow_unaligned_access (SImode,
9518 (MEM_ALIGN (operands[1]) > 32
9519 ? 32 : MEM_ALIGN (operands[1]))))
9520 && ! MEM_VOLATILE_P (operands [0])
9521 && ! MEM_VOLATILE_P (operands [1]))
9522 {
9523 emit_move_insn (adjust_address (operands[0], SImode, 0),
9524 adjust_address (operands[1], SImode, 0));
9525 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9526 adjust_address (copy_rtx (operands[1]), SImode, 4));
9527 return;
9528 }
9529
9530 if (can_create_pseudo_p () && MEM_P (operands[0])
9531 && !gpc_reg_operand (operands[1], mode))
9532 operands[1] = force_reg (mode, operands[1]);
9533
9534 /* Recognize the case where operand[1] is a reference to thread-local
9535 data and load its address to a register. */
9536 if (tls_referenced_p (operands[1]))
9537 {
9538 enum tls_model model;
9539 rtx tmp = operands[1];
9540 rtx addend = NULL;
9541
9542 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9543 {
9544 addend = XEXP (XEXP (tmp, 0), 1);
9545 tmp = XEXP (XEXP (tmp, 0), 0);
9546 }
9547
9548 gcc_assert (SYMBOL_REF_P (tmp));
9549 model = SYMBOL_REF_TLS_MODEL (tmp);
9550 gcc_assert (model != 0);
9551
9552 tmp = rs6000_legitimize_tls_address (tmp, model);
9553 if (addend)
9554 {
9555 tmp = gen_rtx_PLUS (mode, tmp, addend);
9556 tmp = force_operand (tmp, operands[0]);
9557 }
9558 operands[1] = tmp;
9559 }
9560
9561 /* 128-bit constant floating-point values on Darwin should really be loaded
9562 as two parts. However, this premature splitting is a problem when DFmode
9563 values can go into Altivec registers. */
9564 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9565 && !reg_addr[DFmode].scalar_in_vmx_p)
9566 {
9567 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9568 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9569 DFmode);
9570 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9571 GET_MODE_SIZE (DFmode)),
9572 simplify_gen_subreg (DFmode, operands[1], mode,
9573 GET_MODE_SIZE (DFmode)),
9574 DFmode);
9575 return;
9576 }
9577
9578 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9579 p1:SD) if p1 is not of floating point class and p0 is spilled as
9580 we can have no analogous movsd_store for this. */
9581 if (lra_in_progress && mode == DDmode
9582 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9583 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9584 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9585 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9586 {
9587 enum reg_class cl;
9588 int regno = REGNO (SUBREG_REG (operands[1]));
9589
9590 if (!HARD_REGISTER_NUM_P (regno))
9591 {
9592 cl = reg_preferred_class (regno);
9593 regno = reg_renumber[regno];
9594 if (regno < 0)
9595 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9596 }
9597 if (regno >= 0 && ! FP_REGNO_P (regno))
9598 {
9599 mode = SDmode;
9600 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9601 operands[1] = SUBREG_REG (operands[1]);
9602 }
9603 }
9604 if (lra_in_progress
9605 && mode == SDmode
9606 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9607 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9608 && (REG_P (operands[1])
9609 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9610 {
9611 int regno = reg_or_subregno (operands[1]);
9612 enum reg_class cl;
9613
9614 if (!HARD_REGISTER_NUM_P (regno))
9615 {
9616 cl = reg_preferred_class (regno);
9617 gcc_assert (cl != NO_REGS);
9618 regno = reg_renumber[regno];
9619 if (regno < 0)
9620 regno = ira_class_hard_regs[cl][0];
9621 }
9622 if (FP_REGNO_P (regno))
9623 {
9624 if (GET_MODE (operands[0]) != DDmode)
9625 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9626 emit_insn (gen_movsd_store (operands[0], operands[1]));
9627 }
9628 else if (INT_REGNO_P (regno))
9629 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9630 else
9631 gcc_unreachable();
9632 return;
9633 }
9634 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9635 p:DD)) if p0 is not of floating point class and p1 is spilled as
9636 we can have no analogous movsd_load for this. */
9637 if (lra_in_progress && mode == DDmode
9638 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9639 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9640 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9641 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9642 {
9643 enum reg_class cl;
9644 int regno = REGNO (SUBREG_REG (operands[0]));
9645
9646 if (!HARD_REGISTER_NUM_P (regno))
9647 {
9648 cl = reg_preferred_class (regno);
9649 regno = reg_renumber[regno];
9650 if (regno < 0)
9651 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9652 }
9653 if (regno >= 0 && ! FP_REGNO_P (regno))
9654 {
9655 mode = SDmode;
9656 operands[0] = SUBREG_REG (operands[0]);
9657 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9658 }
9659 }
9660 if (lra_in_progress
9661 && mode == SDmode
9662 && (REG_P (operands[0])
9663 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9664 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9665 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9666 {
9667 int regno = reg_or_subregno (operands[0]);
9668 enum reg_class cl;
9669
9670 if (!HARD_REGISTER_NUM_P (regno))
9671 {
9672 cl = reg_preferred_class (regno);
9673 gcc_assert (cl != NO_REGS);
9674 regno = reg_renumber[regno];
9675 if (regno < 0)
9676 regno = ira_class_hard_regs[cl][0];
9677 }
9678 if (FP_REGNO_P (regno))
9679 {
9680 if (GET_MODE (operands[1]) != DDmode)
9681 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9682 emit_insn (gen_movsd_load (operands[0], operands[1]));
9683 }
9684 else if (INT_REGNO_P (regno))
9685 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9686 else
9687 gcc_unreachable();
9688 return;
9689 }
9690
9691 /* FIXME: In the long term, this switch statement should go away
9692 and be replaced by a sequence of tests based on things like
9693 mode == Pmode. */
9694 switch (mode)
9695 {
9696 case E_HImode:
9697 case E_QImode:
9698 if (CONSTANT_P (operands[1])
9699 && !CONST_INT_P (operands[1]))
9700 operands[1] = force_const_mem (mode, operands[1]);
9701 break;
9702
9703 case E_TFmode:
9704 case E_TDmode:
9705 case E_IFmode:
9706 case E_KFmode:
9707 if (FLOAT128_2REG_P (mode))
9708 rs6000_eliminate_indexed_memrefs (operands);
9709 /* fall through */
9710
9711 case E_DFmode:
9712 case E_DDmode:
9713 case E_SFmode:
9714 case E_SDmode:
9715 if (CONSTANT_P (operands[1])
9716 && ! easy_fp_constant (operands[1], mode))
9717 operands[1] = force_const_mem (mode, operands[1]);
9718 break;
9719
9720 case E_V16QImode:
9721 case E_V8HImode:
9722 case E_V4SFmode:
9723 case E_V4SImode:
9724 case E_V2DFmode:
9725 case E_V2DImode:
9726 case E_V1TImode:
9727 if (CONSTANT_P (operands[1])
9728 && !easy_vector_constant (operands[1], mode))
9729 operands[1] = force_const_mem (mode, operands[1]);
9730 break;
9731
9732 case E_SImode:
9733 case E_DImode:
9734 /* Use default pattern for address of ELF small data */
9735 if (TARGET_ELF
9736 && mode == Pmode
9737 && DEFAULT_ABI == ABI_V4
9738 && (SYMBOL_REF_P (operands[1])
9739 || GET_CODE (operands[1]) == CONST)
9740 && small_data_operand (operands[1], mode))
9741 {
9742 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9743 return;
9744 }
9745
9746 if (DEFAULT_ABI == ABI_V4
9747 && mode == Pmode && mode == SImode
9748 && flag_pic == 1 && got_operand (operands[1], mode))
9749 {
9750 emit_insn (gen_movsi_got (operands[0], operands[1]));
9751 return;
9752 }
9753
9754 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9755 && TARGET_NO_TOC
9756 && ! flag_pic
9757 && mode == Pmode
9758 && CONSTANT_P (operands[1])
9759 && GET_CODE (operands[1]) != HIGH
9760 && !CONST_INT_P (operands[1]))
9761 {
9762 rtx target = (!can_create_pseudo_p ()
9763 ? operands[0]
9764 : gen_reg_rtx (mode));
9765
9766 /* If this is a function address on -mcall-aixdesc,
9767 convert it to the address of the descriptor. */
9768 if (DEFAULT_ABI == ABI_AIX
9769 && SYMBOL_REF_P (operands[1])
9770 && XSTR (operands[1], 0)[0] == '.')
9771 {
9772 const char *name = XSTR (operands[1], 0);
9773 rtx new_ref;
9774 while (*name == '.')
9775 name++;
9776 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9777 CONSTANT_POOL_ADDRESS_P (new_ref)
9778 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9779 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9780 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9781 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9782 operands[1] = new_ref;
9783 }
9784
9785 if (DEFAULT_ABI == ABI_DARWIN)
9786 {
9787 #if TARGET_MACHO
9788 if (MACHO_DYNAMIC_NO_PIC_P)
9789 {
9790 /* Take care of any required data indirection. */
9791 operands[1] = rs6000_machopic_legitimize_pic_address (
9792 operands[1], mode, operands[0]);
9793 if (operands[0] != operands[1])
9794 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9795 return;
9796 }
9797 #endif
9798 emit_insn (gen_macho_high (target, operands[1]));
9799 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9800 return;
9801 }
9802
9803 emit_insn (gen_elf_high (target, operands[1]));
9804 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9805 return;
9806 }
9807
9808 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9809 and we have put it in the TOC, we just need to make a TOC-relative
9810 reference to it. */
9811 if (TARGET_TOC
9812 && SYMBOL_REF_P (operands[1])
9813 && use_toc_relative_ref (operands[1], mode))
9814 operands[1] = create_TOC_reference (operands[1], operands[0]);
9815 else if (mode == Pmode
9816 && CONSTANT_P (operands[1])
9817 && GET_CODE (operands[1]) != HIGH
9818 && ((REG_P (operands[0])
9819 && FP_REGNO_P (REGNO (operands[0])))
9820 || !CONST_INT_P (operands[1])
9821 || (num_insns_constant (operands[1], mode)
9822 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9823 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9824 && (TARGET_CMODEL == CMODEL_SMALL
9825 || can_create_pseudo_p ()
9826 || (REG_P (operands[0])
9827 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9828 {
9829
9830 #if TARGET_MACHO
9831 /* Darwin uses a special PIC legitimizer. */
9832 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9833 {
9834 operands[1] =
9835 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9836 operands[0]);
9837 if (operands[0] != operands[1])
9838 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9839 return;
9840 }
9841 #endif
9842
9843 /* If we are to limit the number of things we put in the TOC and
9844 this is a symbol plus a constant we can add in one insn,
9845 just put the symbol in the TOC and add the constant. */
9846 if (GET_CODE (operands[1]) == CONST
9847 && TARGET_NO_SUM_IN_TOC
9848 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9849 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9850 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9851 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9852 && ! side_effects_p (operands[0]))
9853 {
9854 rtx sym =
9855 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9856 rtx other = XEXP (XEXP (operands[1], 0), 1);
9857
9858 sym = force_reg (mode, sym);
9859 emit_insn (gen_add3_insn (operands[0], sym, other));
9860 return;
9861 }
9862
9863 operands[1] = force_const_mem (mode, operands[1]);
9864
9865 if (TARGET_TOC
9866 && SYMBOL_REF_P (XEXP (operands[1], 0))
9867 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9868 {
9869 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9870 operands[0]);
9871 operands[1] = gen_const_mem (mode, tocref);
9872 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9873 }
9874 }
9875 break;
9876
9877 case E_TImode:
9878 if (!VECTOR_MEM_VSX_P (TImode))
9879 rs6000_eliminate_indexed_memrefs (operands);
9880 break;
9881
9882 case E_PTImode:
9883 rs6000_eliminate_indexed_memrefs (operands);
9884 break;
9885
9886 default:
9887 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9888 }
9889
9890 /* Above, we may have called force_const_mem which may have returned
9891 an invalid address. If we can, fix this up; otherwise, reload will
9892 have to deal with it. */
9893 if (MEM_P (operands[1]))
9894 operands[1] = validize_mem (operands[1]);
9895
9896 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9897 }
9898 \f
9899 /* Nonzero if we can use a floating-point register to pass this arg. */
9900 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9901 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9902 && (CUM)->fregno <= FP_ARG_MAX_REG \
9903 && TARGET_HARD_FLOAT)
9904
9905 /* Nonzero if we can use an AltiVec register to pass this arg. */
9906 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9907 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9908 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9909 && TARGET_ALTIVEC_ABI \
9910 && (NAMED))
9911
9912 /* Walk down the type tree of TYPE counting consecutive base elements.
9913 If *MODEP is VOIDmode, then set it to the first valid floating point
9914 or vector type. If a non-floating point or vector type is found, or
9915 if a floating point or vector type that doesn't match a non-VOIDmode
9916 *MODEP is found, then return -1, otherwise return the count in the
9917 sub-tree. */
9918
9919 static int
9920 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9921 {
9922 machine_mode mode;
9923 HOST_WIDE_INT size;
9924
9925 switch (TREE_CODE (type))
9926 {
9927 case REAL_TYPE:
9928 mode = TYPE_MODE (type);
9929 if (!SCALAR_FLOAT_MODE_P (mode))
9930 return -1;
9931
9932 if (*modep == VOIDmode)
9933 *modep = mode;
9934
9935 if (*modep == mode)
9936 return 1;
9937
9938 break;
9939
9940 case COMPLEX_TYPE:
9941 mode = TYPE_MODE (TREE_TYPE (type));
9942 if (!SCALAR_FLOAT_MODE_P (mode))
9943 return -1;
9944
9945 if (*modep == VOIDmode)
9946 *modep = mode;
9947
9948 if (*modep == mode)
9949 return 2;
9950
9951 break;
9952
9953 case VECTOR_TYPE:
9954 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9955 return -1;
9956
9957 /* Use V4SImode as representative of all 128-bit vector types. */
9958 size = int_size_in_bytes (type);
9959 switch (size)
9960 {
9961 case 16:
9962 mode = V4SImode;
9963 break;
9964 default:
9965 return -1;
9966 }
9967
9968 if (*modep == VOIDmode)
9969 *modep = mode;
9970
9971 /* Vector modes are considered to be opaque: two vectors are
9972 equivalent for the purposes of being homogeneous aggregates
9973 if they are the same size. */
9974 if (*modep == mode)
9975 return 1;
9976
9977 break;
9978
9979 case ARRAY_TYPE:
9980 {
9981 int count;
9982 tree index = TYPE_DOMAIN (type);
9983
9984 /* Can't handle incomplete types nor sizes that are not
9985 fixed. */
9986 if (!COMPLETE_TYPE_P (type)
9987 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9988 return -1;
9989
9990 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9991 if (count == -1
9992 || !index
9993 || !TYPE_MAX_VALUE (index)
9994 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9995 || !TYPE_MIN_VALUE (index)
9996 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9997 || count < 0)
9998 return -1;
9999
10000 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10001 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10002
10003 /* There must be no padding. */
10004 if (wi::to_wide (TYPE_SIZE (type))
10005 != count * GET_MODE_BITSIZE (*modep))
10006 return -1;
10007
10008 return count;
10009 }
10010
10011 case RECORD_TYPE:
10012 {
10013 int count = 0;
10014 int sub_count;
10015 tree field;
10016
10017 /* Can't handle incomplete types nor sizes that are not
10018 fixed. */
10019 if (!COMPLETE_TYPE_P (type)
10020 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10021 return -1;
10022
10023 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10024 {
10025 if (TREE_CODE (field) != FIELD_DECL)
10026 continue;
10027
10028 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10029 if (sub_count < 0)
10030 return -1;
10031 count += sub_count;
10032 }
10033
10034 /* There must be no padding. */
10035 if (wi::to_wide (TYPE_SIZE (type))
10036 != count * GET_MODE_BITSIZE (*modep))
10037 return -1;
10038
10039 return count;
10040 }
10041
10042 case UNION_TYPE:
10043 case QUAL_UNION_TYPE:
10044 {
10045 /* These aren't very interesting except in a degenerate case. */
10046 int count = 0;
10047 int sub_count;
10048 tree field;
10049
10050 /* Can't handle incomplete types nor sizes that are not
10051 fixed. */
10052 if (!COMPLETE_TYPE_P (type)
10053 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10054 return -1;
10055
10056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10057 {
10058 if (TREE_CODE (field) != FIELD_DECL)
10059 continue;
10060
10061 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10062 if (sub_count < 0)
10063 return -1;
10064 count = count > sub_count ? count : sub_count;
10065 }
10066
10067 /* There must be no padding. */
10068 if (wi::to_wide (TYPE_SIZE (type))
10069 != count * GET_MODE_BITSIZE (*modep))
10070 return -1;
10071
10072 return count;
10073 }
10074
10075 default:
10076 break;
10077 }
10078
10079 return -1;
10080 }
10081
10082 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10083 float or vector aggregate that shall be passed in FP/vector registers
10084 according to the ELFv2 ABI, return the homogeneous element mode in
10085 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10086
10087 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10088
10089 static bool
10090 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10091 machine_mode *elt_mode,
10092 int *n_elts)
10093 {
10094 /* Note that we do not accept complex types at the top level as
10095 homogeneous aggregates; these types are handled via the
10096 targetm.calls.split_complex_arg mechanism. Complex types
10097 can be elements of homogeneous aggregates, however. */
10098 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10099 && AGGREGATE_TYPE_P (type))
10100 {
10101 machine_mode field_mode = VOIDmode;
10102 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10103
10104 if (field_count > 0)
10105 {
10106 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10107 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10108
10109 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10110 up to AGGR_ARG_NUM_REG registers. */
10111 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10112 {
10113 if (elt_mode)
10114 *elt_mode = field_mode;
10115 if (n_elts)
10116 *n_elts = field_count;
10117 return true;
10118 }
10119 }
10120 }
10121
10122 if (elt_mode)
10123 *elt_mode = mode;
10124 if (n_elts)
10125 *n_elts = 1;
10126 return false;
10127 }
10128
10129 /* Return a nonzero value to say to return the function value in
10130 memory, just as large structures are always returned. TYPE will be
10131 the data type of the value, and FNTYPE will be the type of the
10132 function doing the returning, or @code{NULL} for libcalls.
10133
10134 The AIX ABI for the RS/6000 specifies that all structures are
10135 returned in memory. The Darwin ABI does the same.
10136
10137 For the Darwin 64 Bit ABI, a function result can be returned in
10138 registers or in memory, depending on the size of the return data
10139 type. If it is returned in registers, the value occupies the same
10140 registers as it would if it were the first and only function
10141 argument. Otherwise, the function places its result in memory at
10142 the location pointed to by GPR3.
10143
10144 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10145 but a draft put them in memory, and GCC used to implement the draft
10146 instead of the final standard. Therefore, aix_struct_return
10147 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10148 compatibility can change DRAFT_V4_STRUCT_RET to override the
10149 default, and -m switches get the final word. See
10150 rs6000_option_override_internal for more details.
10151
10152 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10153 long double support is enabled. These values are returned in memory.
10154
10155 int_size_in_bytes returns -1 for variable size objects, which go in
10156 memory always. The cast to unsigned makes -1 > 8. */
10157
10158 static bool
10159 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10160 {
10161 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10162 if (TARGET_MACHO
10163 && rs6000_darwin64_abi
10164 && TREE_CODE (type) == RECORD_TYPE
10165 && int_size_in_bytes (type) > 0)
10166 {
10167 CUMULATIVE_ARGS valcum;
10168 rtx valret;
10169
10170 valcum.words = 0;
10171 valcum.fregno = FP_ARG_MIN_REG;
10172 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10173 /* Do a trial code generation as if this were going to be passed
10174 as an argument; if any part goes in memory, we return NULL. */
10175 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10176 if (valret)
10177 return false;
10178 /* Otherwise fall through to more conventional ABI rules. */
10179 }
10180
10181 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10182 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10183 NULL, NULL))
10184 return false;
10185
10186 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10187 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10188 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10189 return false;
10190
10191 if (AGGREGATE_TYPE_P (type)
10192 && (aix_struct_return
10193 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10194 return true;
10195
10196 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10197 modes only exist for GCC vector types if -maltivec. */
10198 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10199 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10200 return false;
10201
10202 /* Return synthetic vectors in memory. */
10203 if (TREE_CODE (type) == VECTOR_TYPE
10204 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10205 {
10206 static bool warned_for_return_big_vectors = false;
10207 if (!warned_for_return_big_vectors)
10208 {
10209 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10210 "non-standard ABI extension with no compatibility "
10211 "guarantee");
10212 warned_for_return_big_vectors = true;
10213 }
10214 return true;
10215 }
10216
10217 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10218 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10219 return true;
10220
10221 return false;
10222 }
10223
10224 /* Specify whether values returned in registers should be at the most
10225 significant end of a register. We want aggregates returned by
10226 value to match the way aggregates are passed to functions. */
10227
10228 static bool
10229 rs6000_return_in_msb (const_tree valtype)
10230 {
10231 return (DEFAULT_ABI == ABI_ELFv2
10232 && BYTES_BIG_ENDIAN
10233 && AGGREGATE_TYPE_P (valtype)
10234 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
10235 == PAD_UPWARD));
10236 }
10237
10238 #ifdef HAVE_AS_GNU_ATTRIBUTE
10239 /* Return TRUE if a call to function FNDECL may be one that
10240 potentially affects the function calling ABI of the object file. */
10241
10242 static bool
10243 call_ABI_of_interest (tree fndecl)
10244 {
10245 if (rs6000_gnu_attr && symtab->state == EXPANSION)
10246 {
10247 struct cgraph_node *c_node;
10248
10249 /* Libcalls are always interesting. */
10250 if (fndecl == NULL_TREE)
10251 return true;
10252
10253 /* Any call to an external function is interesting. */
10254 if (DECL_EXTERNAL (fndecl))
10255 return true;
10256
10257 /* Interesting functions that we are emitting in this object file. */
10258 c_node = cgraph_node::get (fndecl);
10259 c_node = c_node->ultimate_alias_target ();
10260 return !c_node->only_called_directly_p ();
10261 }
10262 return false;
10263 }
10264 #endif
10265
10266 /* Initialize a variable CUM of type CUMULATIVE_ARGS
10267 for a call to a function whose data type is FNTYPE.
10268 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
10269
10270 For incoming args we set the number of arguments in the prototype large
10271 so we never return a PARALLEL. */
10272
10273 void
10274 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
10275 rtx libname ATTRIBUTE_UNUSED, int incoming,
10276 int libcall, int n_named_args,
10277 tree fndecl,
10278 machine_mode return_mode ATTRIBUTE_UNUSED)
10279 {
10280 static CUMULATIVE_ARGS zero_cumulative;
10281
10282 *cum = zero_cumulative;
10283 cum->words = 0;
10284 cum->fregno = FP_ARG_MIN_REG;
10285 cum->vregno = ALTIVEC_ARG_MIN_REG;
10286 cum->prototype = (fntype && prototype_p (fntype));
10287 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
10288 ? CALL_LIBCALL : CALL_NORMAL);
10289 cum->sysv_gregno = GP_ARG_MIN_REG;
10290 cum->stdarg = stdarg_p (fntype);
10291 cum->libcall = libcall;
10292
10293 cum->nargs_prototype = 0;
10294 if (incoming || cum->prototype)
10295 cum->nargs_prototype = n_named_args;
10296
10297 /* Check for a longcall attribute. */
10298 if ((!fntype && rs6000_default_long_calls)
10299 || (fntype
10300 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
10301 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
10302 cum->call_cookie |= CALL_LONG;
10303 else if (DEFAULT_ABI != ABI_DARWIN)
10304 {
10305 bool is_local = (fndecl
10306 && !DECL_EXTERNAL (fndecl)
10307 && !DECL_WEAK (fndecl)
10308 && (*targetm.binds_local_p) (fndecl));
10309 if (is_local)
10310 ;
10311 else if (flag_plt)
10312 {
10313 if (fntype
10314 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
10315 cum->call_cookie |= CALL_LONG;
10316 }
10317 else
10318 {
10319 if (!(fntype
10320 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
10321 cum->call_cookie |= CALL_LONG;
10322 }
10323 }
10324
10325 if (TARGET_DEBUG_ARG)
10326 {
10327 fprintf (stderr, "\ninit_cumulative_args:");
10328 if (fntype)
10329 {
10330 tree ret_type = TREE_TYPE (fntype);
10331 fprintf (stderr, " ret code = %s,",
10332 get_tree_code_name (TREE_CODE (ret_type)));
10333 }
10334
10335 if (cum->call_cookie & CALL_LONG)
10336 fprintf (stderr, " longcall,");
10337
10338 fprintf (stderr, " proto = %d, nargs = %d\n",
10339 cum->prototype, cum->nargs_prototype);
10340 }
10341
10342 #ifdef HAVE_AS_GNU_ATTRIBUTE
10343 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
10344 {
10345 cum->escapes = call_ABI_of_interest (fndecl);
10346 if (cum->escapes)
10347 {
10348 tree return_type;
10349
10350 if (fntype)
10351 {
10352 return_type = TREE_TYPE (fntype);
10353 return_mode = TYPE_MODE (return_type);
10354 }
10355 else
10356 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
10357
10358 if (return_type != NULL)
10359 {
10360 if (TREE_CODE (return_type) == RECORD_TYPE
10361 && TYPE_TRANSPARENT_AGGR (return_type))
10362 {
10363 return_type = TREE_TYPE (first_field (return_type));
10364 return_mode = TYPE_MODE (return_type);
10365 }
10366 if (AGGREGATE_TYPE_P (return_type)
10367 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
10368 <= 8))
10369 rs6000_returns_struct = true;
10370 }
10371 if (SCALAR_FLOAT_MODE_P (return_mode))
10372 {
10373 rs6000_passes_float = true;
10374 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10375 && (FLOAT128_IBM_P (return_mode)
10376 || FLOAT128_IEEE_P (return_mode)
10377 || (return_type != NULL
10378 && (TYPE_MAIN_VARIANT (return_type)
10379 == long_double_type_node))))
10380 rs6000_passes_long_double = true;
10381
10382 /* Note if we passed or return a IEEE 128-bit type. We changed
10383 the mangling for these types, and we may need to make an alias
10384 with the old mangling. */
10385 if (FLOAT128_IEEE_P (return_mode))
10386 rs6000_passes_ieee128 = true;
10387 }
10388 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
10389 rs6000_passes_vector = true;
10390 }
10391 }
10392 #endif
10393
10394 if (fntype
10395 && !TARGET_ALTIVEC
10396 && TARGET_ALTIVEC_ABI
10397 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
10398 {
10399 error ("cannot return value in vector register because"
10400 " altivec instructions are disabled, use %qs"
10401 " to enable them", "-maltivec");
10402 }
10403 }
10404 \f
10405 /* The mode the ABI uses for a word. This is not the same as word_mode
10406 for -m32 -mpowerpc64. This is used to implement various target hooks. */
10407
10408 static scalar_int_mode
10409 rs6000_abi_word_mode (void)
10410 {
10411 return TARGET_32BIT ? SImode : DImode;
10412 }
10413
10414 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
10415 static char *
10416 rs6000_offload_options (void)
10417 {
10418 if (TARGET_64BIT)
10419 return xstrdup ("-foffload-abi=lp64");
10420 else
10421 return xstrdup ("-foffload-abi=ilp32");
10422 }
10423
10424 /* On rs6000, function arguments are promoted, as are function return
10425 values. */
10426
10427 static machine_mode
10428 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10429 machine_mode mode,
10430 int *punsignedp ATTRIBUTE_UNUSED,
10431 const_tree, int)
10432 {
10433 PROMOTE_MODE (mode, *punsignedp, type);
10434
10435 return mode;
10436 }
10437
10438 /* Return true if TYPE must be passed on the stack and not in registers. */
10439
10440 static bool
10441 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
10442 {
10443 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
10444 return must_pass_in_stack_var_size (mode, type);
10445 else
10446 return must_pass_in_stack_var_size_or_pad (mode, type);
10447 }
10448
10449 static inline bool
10450 is_complex_IBM_long_double (machine_mode mode)
10451 {
10452 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
10453 }
10454
10455 /* Whether ABI_V4 passes MODE args to a function in floating point
10456 registers. */
10457
10458 static bool
10459 abi_v4_pass_in_fpr (machine_mode mode, bool named)
10460 {
10461 if (!TARGET_HARD_FLOAT)
10462 return false;
10463 if (mode == DFmode)
10464 return true;
10465 if (mode == SFmode && named)
10466 return true;
10467 /* ABI_V4 passes complex IBM long double in 8 gprs.
10468 Stupid, but we can't change the ABI now. */
10469 if (is_complex_IBM_long_double (mode))
10470 return false;
10471 if (FLOAT128_2REG_P (mode))
10472 return true;
10473 if (DECIMAL_FLOAT_MODE_P (mode))
10474 return true;
10475 return false;
10476 }
10477
10478 /* Implement TARGET_FUNCTION_ARG_PADDING.
10479
10480 For the AIX ABI structs are always stored left shifted in their
10481 argument slot. */
10482
10483 static pad_direction
10484 rs6000_function_arg_padding (machine_mode mode, const_tree type)
10485 {
10486 #ifndef AGGREGATE_PADDING_FIXED
10487 #define AGGREGATE_PADDING_FIXED 0
10488 #endif
10489 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
10490 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
10491 #endif
10492
10493 if (!AGGREGATE_PADDING_FIXED)
10494 {
10495 /* GCC used to pass structures of the same size as integer types as
10496 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
10497 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
10498 passed padded downward, except that -mstrict-align further
10499 muddied the water in that multi-component structures of 2 and 4
10500 bytes in size were passed padded upward.
10501
10502 The following arranges for best compatibility with previous
10503 versions of gcc, but removes the -mstrict-align dependency. */
10504 if (BYTES_BIG_ENDIAN)
10505 {
10506 HOST_WIDE_INT size = 0;
10507
10508 if (mode == BLKmode)
10509 {
10510 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
10511 size = int_size_in_bytes (type);
10512 }
10513 else
10514 size = GET_MODE_SIZE (mode);
10515
10516 if (size == 1 || size == 2 || size == 4)
10517 return PAD_DOWNWARD;
10518 }
10519 return PAD_UPWARD;
10520 }
10521
10522 if (AGGREGATES_PAD_UPWARD_ALWAYS)
10523 {
10524 if (type != 0 && AGGREGATE_TYPE_P (type))
10525 return PAD_UPWARD;
10526 }
10527
10528 /* Fall back to the default. */
10529 return default_function_arg_padding (mode, type);
10530 }
10531
10532 /* If defined, a C expression that gives the alignment boundary, in bits,
10533 of an argument with the specified mode and type. If it is not defined,
10534 PARM_BOUNDARY is used for all arguments.
10535
10536 V.4 wants long longs and doubles to be double word aligned. Just
10537 testing the mode size is a boneheaded way to do this as it means
10538 that other types such as complex int are also double word aligned.
10539 However, we're stuck with this because changing the ABI might break
10540 existing library interfaces.
10541
10542 Quadword align Altivec/VSX vectors.
10543 Quadword align large synthetic vector types. */
10544
10545 static unsigned int
10546 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10547 {
10548 machine_mode elt_mode;
10549 int n_elts;
10550
10551 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10552
10553 if (DEFAULT_ABI == ABI_V4
10554 && (GET_MODE_SIZE (mode) == 8
10555 || (TARGET_HARD_FLOAT
10556 && !is_complex_IBM_long_double (mode)
10557 && FLOAT128_2REG_P (mode))))
10558 return 64;
10559 else if (FLOAT128_VECTOR_P (mode))
10560 return 128;
10561 else if (type && TREE_CODE (type) == VECTOR_TYPE
10562 && int_size_in_bytes (type) >= 8
10563 && int_size_in_bytes (type) < 16)
10564 return 64;
10565 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10566 || (type && TREE_CODE (type) == VECTOR_TYPE
10567 && int_size_in_bytes (type) >= 16))
10568 return 128;
10569
10570 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10571 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10572 -mcompat-align-parm is used. */
10573 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10574 || DEFAULT_ABI == ABI_ELFv2)
10575 && type && TYPE_ALIGN (type) > 64)
10576 {
10577 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10578 or homogeneous float/vector aggregates here. We already handled
10579 vector aggregates above, but still need to check for float here. */
10580 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10581 && !SCALAR_FLOAT_MODE_P (elt_mode));
10582
10583 /* We used to check for BLKmode instead of the above aggregate type
10584 check. Warn when this results in any difference to the ABI. */
10585 if (aggregate_p != (mode == BLKmode))
10586 {
10587 static bool warned;
10588 if (!warned && warn_psabi)
10589 {
10590 warned = true;
10591 inform (input_location,
10592 "the ABI of passing aggregates with %d-byte alignment"
10593 " has changed in GCC 5",
10594 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10595 }
10596 }
10597
10598 if (aggregate_p)
10599 return 128;
10600 }
10601
10602 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10603 implement the "aggregate type" check as a BLKmode check here; this
10604 means certain aggregate types are in fact not aligned. */
10605 if (TARGET_MACHO && rs6000_darwin64_abi
10606 && mode == BLKmode
10607 && type && TYPE_ALIGN (type) > 64)
10608 return 128;
10609
10610 return PARM_BOUNDARY;
10611 }
10612
10613 /* The offset in words to the start of the parameter save area. */
10614
10615 static unsigned int
10616 rs6000_parm_offset (void)
10617 {
10618 return (DEFAULT_ABI == ABI_V4 ? 2
10619 : DEFAULT_ABI == ABI_ELFv2 ? 4
10620 : 6);
10621 }
10622
10623 /* For a function parm of MODE and TYPE, return the starting word in
10624 the parameter area. NWORDS of the parameter area are already used. */
10625
10626 static unsigned int
10627 rs6000_parm_start (machine_mode mode, const_tree type,
10628 unsigned int nwords)
10629 {
10630 unsigned int align;
10631
10632 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10633 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10634 }
10635
10636 /* Compute the size (in words) of a function argument. */
10637
10638 static unsigned long
10639 rs6000_arg_size (machine_mode mode, const_tree type)
10640 {
10641 unsigned long size;
10642
10643 if (mode != BLKmode)
10644 size = GET_MODE_SIZE (mode);
10645 else
10646 size = int_size_in_bytes (type);
10647
10648 if (TARGET_32BIT)
10649 return (size + 3) >> 2;
10650 else
10651 return (size + 7) >> 3;
10652 }
10653 \f
10654 /* Use this to flush pending int fields. */
10655
10656 static void
10657 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10658 HOST_WIDE_INT bitpos, int final)
10659 {
10660 unsigned int startbit, endbit;
10661 int intregs, intoffset;
10662
10663 /* Handle the situations where a float is taking up the first half
10664 of the GPR, and the other half is empty (typically due to
10665 alignment restrictions). We can detect this by a 8-byte-aligned
10666 int field, or by seeing that this is the final flush for this
10667 argument. Count the word and continue on. */
10668 if (cum->floats_in_gpr == 1
10669 && (cum->intoffset % 64 == 0
10670 || (cum->intoffset == -1 && final)))
10671 {
10672 cum->words++;
10673 cum->floats_in_gpr = 0;
10674 }
10675
10676 if (cum->intoffset == -1)
10677 return;
10678
10679 intoffset = cum->intoffset;
10680 cum->intoffset = -1;
10681 cum->floats_in_gpr = 0;
10682
10683 if (intoffset % BITS_PER_WORD != 0)
10684 {
10685 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
10686 if (!int_mode_for_size (bits, 0).exists ())
10687 {
10688 /* We couldn't find an appropriate mode, which happens,
10689 e.g., in packed structs when there are 3 bytes to load.
10690 Back intoffset back to the beginning of the word in this
10691 case. */
10692 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10693 }
10694 }
10695
10696 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10697 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10698 intregs = (endbit - startbit) / BITS_PER_WORD;
10699 cum->words += intregs;
10700 /* words should be unsigned. */
10701 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10702 {
10703 int pad = (endbit/BITS_PER_WORD) - cum->words;
10704 cum->words += pad;
10705 }
10706 }
10707
10708 /* The darwin64 ABI calls for us to recurse down through structs,
10709 looking for elements passed in registers. Unfortunately, we have
10710 to track int register count here also because of misalignments
10711 in powerpc alignment mode. */
10712
10713 static void
10714 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10715 const_tree type,
10716 HOST_WIDE_INT startbitpos)
10717 {
10718 tree f;
10719
10720 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10721 if (TREE_CODE (f) == FIELD_DECL)
10722 {
10723 HOST_WIDE_INT bitpos = startbitpos;
10724 tree ftype = TREE_TYPE (f);
10725 machine_mode mode;
10726 if (ftype == error_mark_node)
10727 continue;
10728 mode = TYPE_MODE (ftype);
10729
10730 if (DECL_SIZE (f) != 0
10731 && tree_fits_uhwi_p (bit_position (f)))
10732 bitpos += int_bit_position (f);
10733
10734 /* ??? FIXME: else assume zero offset. */
10735
10736 if (TREE_CODE (ftype) == RECORD_TYPE)
10737 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10738 else if (USE_FP_FOR_ARG_P (cum, mode))
10739 {
10740 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10741 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10742 cum->fregno += n_fpregs;
10743 /* Single-precision floats present a special problem for
10744 us, because they are smaller than an 8-byte GPR, and so
10745 the structure-packing rules combined with the standard
10746 varargs behavior mean that we want to pack float/float
10747 and float/int combinations into a single register's
10748 space. This is complicated by the arg advance flushing,
10749 which works on arbitrarily large groups of int-type
10750 fields. */
10751 if (mode == SFmode)
10752 {
10753 if (cum->floats_in_gpr == 1)
10754 {
10755 /* Two floats in a word; count the word and reset
10756 the float count. */
10757 cum->words++;
10758 cum->floats_in_gpr = 0;
10759 }
10760 else if (bitpos % 64 == 0)
10761 {
10762 /* A float at the beginning of an 8-byte word;
10763 count it and put off adjusting cum->words until
10764 we see if a arg advance flush is going to do it
10765 for us. */
10766 cum->floats_in_gpr++;
10767 }
10768 else
10769 {
10770 /* The float is at the end of a word, preceded
10771 by integer fields, so the arg advance flush
10772 just above has already set cum->words and
10773 everything is taken care of. */
10774 }
10775 }
10776 else
10777 cum->words += n_fpregs;
10778 }
10779 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10780 {
10781 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10782 cum->vregno++;
10783 cum->words += 2;
10784 }
10785 else if (cum->intoffset == -1)
10786 cum->intoffset = bitpos;
10787 }
10788 }
10789
10790 /* Check for an item that needs to be considered specially under the darwin 64
10791 bit ABI. These are record types where the mode is BLK or the structure is
10792 8 bytes in size. */
10793 static int
10794 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10795 {
10796 return rs6000_darwin64_abi
10797 && ((mode == BLKmode
10798 && TREE_CODE (type) == RECORD_TYPE
10799 && int_size_in_bytes (type) > 0)
10800 || (type && TREE_CODE (type) == RECORD_TYPE
10801 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10802 }
10803
10804 /* Update the data in CUM to advance over an argument
10805 of mode MODE and data type TYPE.
10806 (TYPE is null for libcalls where that information may not be available.)
10807
10808 Note that for args passed by reference, function_arg will be called
10809 with MODE and TYPE set to that of the pointer to the arg, not the arg
10810 itself. */
10811
10812 static void
10813 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10814 const_tree type, bool named, int depth)
10815 {
10816 machine_mode elt_mode;
10817 int n_elts;
10818
10819 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10820
10821 /* Only tick off an argument if we're not recursing. */
10822 if (depth == 0)
10823 cum->nargs_prototype--;
10824
10825 #ifdef HAVE_AS_GNU_ATTRIBUTE
10826 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
10827 && cum->escapes)
10828 {
10829 if (SCALAR_FLOAT_MODE_P (mode))
10830 {
10831 rs6000_passes_float = true;
10832 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
10833 && (FLOAT128_IBM_P (mode)
10834 || FLOAT128_IEEE_P (mode)
10835 || (type != NULL
10836 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
10837 rs6000_passes_long_double = true;
10838
10839 /* Note if we passed or return a IEEE 128-bit type. We changed the
10840 mangling for these types, and we may need to make an alias with
10841 the old mangling. */
10842 if (FLOAT128_IEEE_P (mode))
10843 rs6000_passes_ieee128 = true;
10844 }
10845 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10846 rs6000_passes_vector = true;
10847 }
10848 #endif
10849
10850 if (TARGET_ALTIVEC_ABI
10851 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10852 || (type && TREE_CODE (type) == VECTOR_TYPE
10853 && int_size_in_bytes (type) == 16)))
10854 {
10855 bool stack = false;
10856
10857 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10858 {
10859 cum->vregno += n_elts;
10860
10861 if (!TARGET_ALTIVEC)
10862 error ("cannot pass argument in vector register because"
10863 " altivec instructions are disabled, use %qs"
10864 " to enable them", "-maltivec");
10865
10866 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10867 even if it is going to be passed in a vector register.
10868 Darwin does the same for variable-argument functions. */
10869 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10870 && TARGET_64BIT)
10871 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10872 stack = true;
10873 }
10874 else
10875 stack = true;
10876
10877 if (stack)
10878 {
10879 int align;
10880
10881 /* Vector parameters must be 16-byte aligned. In 32-bit
10882 mode this means we need to take into account the offset
10883 to the parameter save area. In 64-bit mode, they just
10884 have to start on an even word, since the parameter save
10885 area is 16-byte aligned. */
10886 if (TARGET_32BIT)
10887 align = -(rs6000_parm_offset () + cum->words) & 3;
10888 else
10889 align = cum->words & 1;
10890 cum->words += align + rs6000_arg_size (mode, type);
10891
10892 if (TARGET_DEBUG_ARG)
10893 {
10894 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10895 cum->words, align);
10896 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10897 cum->nargs_prototype, cum->prototype,
10898 GET_MODE_NAME (mode));
10899 }
10900 }
10901 }
10902 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10903 {
10904 int size = int_size_in_bytes (type);
10905 /* Variable sized types have size == -1 and are
10906 treated as if consisting entirely of ints.
10907 Pad to 16 byte boundary if needed. */
10908 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10909 && (cum->words % 2) != 0)
10910 cum->words++;
10911 /* For varargs, we can just go up by the size of the struct. */
10912 if (!named)
10913 cum->words += (size + 7) / 8;
10914 else
10915 {
10916 /* It is tempting to say int register count just goes up by
10917 sizeof(type)/8, but this is wrong in a case such as
10918 { int; double; int; } [powerpc alignment]. We have to
10919 grovel through the fields for these too. */
10920 cum->intoffset = 0;
10921 cum->floats_in_gpr = 0;
10922 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10923 rs6000_darwin64_record_arg_advance_flush (cum,
10924 size * BITS_PER_UNIT, 1);
10925 }
10926 if (TARGET_DEBUG_ARG)
10927 {
10928 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10929 cum->words, TYPE_ALIGN (type), size);
10930 fprintf (stderr,
10931 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10932 cum->nargs_prototype, cum->prototype,
10933 GET_MODE_NAME (mode));
10934 }
10935 }
10936 else if (DEFAULT_ABI == ABI_V4)
10937 {
10938 if (abi_v4_pass_in_fpr (mode, named))
10939 {
10940 /* _Decimal128 must use an even/odd register pair. This assumes
10941 that the register number is odd when fregno is odd. */
10942 if (mode == TDmode && (cum->fregno % 2) == 1)
10943 cum->fregno++;
10944
10945 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10946 <= FP_ARG_V4_MAX_REG)
10947 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10948 else
10949 {
10950 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10951 if (mode == DFmode || FLOAT128_IBM_P (mode)
10952 || mode == DDmode || mode == TDmode)
10953 cum->words += cum->words & 1;
10954 cum->words += rs6000_arg_size (mode, type);
10955 }
10956 }
10957 else
10958 {
10959 int n_words = rs6000_arg_size (mode, type);
10960 int gregno = cum->sysv_gregno;
10961
10962 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
10963 As does any other 2 word item such as complex int due to a
10964 historical mistake. */
10965 if (n_words == 2)
10966 gregno += (1 - gregno) & 1;
10967
10968 /* Multi-reg args are not split between registers and stack. */
10969 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10970 {
10971 /* Long long is aligned on the stack. So are other 2 word
10972 items such as complex int due to a historical mistake. */
10973 if (n_words == 2)
10974 cum->words += cum->words & 1;
10975 cum->words += n_words;
10976 }
10977
10978 /* Note: continuing to accumulate gregno past when we've started
10979 spilling to the stack indicates the fact that we've started
10980 spilling to the stack to expand_builtin_saveregs. */
10981 cum->sysv_gregno = gregno + n_words;
10982 }
10983
10984 if (TARGET_DEBUG_ARG)
10985 {
10986 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10987 cum->words, cum->fregno);
10988 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10989 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10990 fprintf (stderr, "mode = %4s, named = %d\n",
10991 GET_MODE_NAME (mode), named);
10992 }
10993 }
10994 else
10995 {
10996 int n_words = rs6000_arg_size (mode, type);
10997 int start_words = cum->words;
10998 int align_words = rs6000_parm_start (mode, type, start_words);
10999
11000 cum->words = align_words + n_words;
11001
11002 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11003 {
11004 /* _Decimal128 must be passed in an even/odd float register pair.
11005 This assumes that the register number is odd when fregno is
11006 odd. */
11007 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11008 cum->fregno++;
11009 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11010 }
11011
11012 if (TARGET_DEBUG_ARG)
11013 {
11014 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11015 cum->words, cum->fregno);
11016 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11017 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11018 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11019 named, align_words - start_words, depth);
11020 }
11021 }
11022 }
11023
11024 static void
11025 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11026 const_tree type, bool named)
11027 {
11028 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11029 0);
11030 }
11031
11032 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11033 structure between cum->intoffset and bitpos to integer registers. */
11034
11035 static void
11036 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11037 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11038 {
11039 machine_mode mode;
11040 unsigned int regno;
11041 unsigned int startbit, endbit;
11042 int this_regno, intregs, intoffset;
11043 rtx reg;
11044
11045 if (cum->intoffset == -1)
11046 return;
11047
11048 intoffset = cum->intoffset;
11049 cum->intoffset = -1;
11050
11051 /* If this is the trailing part of a word, try to only load that
11052 much into the register. Otherwise load the whole register. Note
11053 that in the latter case we may pick up unwanted bits. It's not a
11054 problem at the moment but may wish to revisit. */
11055
11056 if (intoffset % BITS_PER_WORD != 0)
11057 {
11058 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11059 if (!int_mode_for_size (bits, 0).exists (&mode))
11060 {
11061 /* We couldn't find an appropriate mode, which happens,
11062 e.g., in packed structs when there are 3 bytes to load.
11063 Back intoffset back to the beginning of the word in this
11064 case. */
11065 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11066 mode = word_mode;
11067 }
11068 }
11069 else
11070 mode = word_mode;
11071
11072 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11073 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11074 intregs = (endbit - startbit) / BITS_PER_WORD;
11075 this_regno = cum->words + intoffset / BITS_PER_WORD;
11076
11077 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11078 cum->use_stack = 1;
11079
11080 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11081 if (intregs <= 0)
11082 return;
11083
11084 intoffset /= BITS_PER_UNIT;
11085 do
11086 {
11087 regno = GP_ARG_MIN_REG + this_regno;
11088 reg = gen_rtx_REG (mode, regno);
11089 rvec[(*k)++] =
11090 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11091
11092 this_regno += 1;
11093 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11094 mode = word_mode;
11095 intregs -= 1;
11096 }
11097 while (intregs > 0);
11098 }
11099
11100 /* Recursive workhorse for the following. */
11101
11102 static void
11103 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11104 HOST_WIDE_INT startbitpos, rtx rvec[],
11105 int *k)
11106 {
11107 tree f;
11108
11109 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11110 if (TREE_CODE (f) == FIELD_DECL)
11111 {
11112 HOST_WIDE_INT bitpos = startbitpos;
11113 tree ftype = TREE_TYPE (f);
11114 machine_mode mode;
11115 if (ftype == error_mark_node)
11116 continue;
11117 mode = TYPE_MODE (ftype);
11118
11119 if (DECL_SIZE (f) != 0
11120 && tree_fits_uhwi_p (bit_position (f)))
11121 bitpos += int_bit_position (f);
11122
11123 /* ??? FIXME: else assume zero offset. */
11124
11125 if (TREE_CODE (ftype) == RECORD_TYPE)
11126 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11127 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11128 {
11129 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11130 #if 0
11131 switch (mode)
11132 {
11133 case E_SCmode: mode = SFmode; break;
11134 case E_DCmode: mode = DFmode; break;
11135 case E_TCmode: mode = TFmode; break;
11136 default: break;
11137 }
11138 #endif
11139 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11140 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11141 {
11142 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11143 && (mode == TFmode || mode == TDmode));
11144 /* Long double or _Decimal128 split over regs and memory. */
11145 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11146 cum->use_stack=1;
11147 }
11148 rvec[(*k)++]
11149 = gen_rtx_EXPR_LIST (VOIDmode,
11150 gen_rtx_REG (mode, cum->fregno++),
11151 GEN_INT (bitpos / BITS_PER_UNIT));
11152 if (FLOAT128_2REG_P (mode))
11153 cum->fregno++;
11154 }
11155 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11156 {
11157 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11158 rvec[(*k)++]
11159 = gen_rtx_EXPR_LIST (VOIDmode,
11160 gen_rtx_REG (mode, cum->vregno++),
11161 GEN_INT (bitpos / BITS_PER_UNIT));
11162 }
11163 else if (cum->intoffset == -1)
11164 cum->intoffset = bitpos;
11165 }
11166 }
11167
11168 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11169 the register(s) to be used for each field and subfield of a struct
11170 being passed by value, along with the offset of where the
11171 register's value may be found in the block. FP fields go in FP
11172 register, vector fields go in vector registers, and everything
11173 else goes in int registers, packed as in memory.
11174
11175 This code is also used for function return values. RETVAL indicates
11176 whether this is the case.
11177
11178 Much of this is taken from the SPARC V9 port, which has a similar
11179 calling convention. */
11180
11181 static rtx
11182 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11183 bool named, bool retval)
11184 {
11185 rtx rvec[FIRST_PSEUDO_REGISTER];
11186 int k = 1, kbase = 1;
11187 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11188 /* This is a copy; modifications are not visible to our caller. */
11189 CUMULATIVE_ARGS copy_cum = *orig_cum;
11190 CUMULATIVE_ARGS *cum = &copy_cum;
11191
11192 /* Pad to 16 byte boundary if needed. */
11193 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11194 && (cum->words % 2) != 0)
11195 cum->words++;
11196
11197 cum->intoffset = 0;
11198 cum->use_stack = 0;
11199 cum->named = named;
11200
11201 /* Put entries into rvec[] for individual FP and vector fields, and
11202 for the chunks of memory that go in int regs. Note we start at
11203 element 1; 0 is reserved for an indication of using memory, and
11204 may or may not be filled in below. */
11205 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11206 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11207
11208 /* If any part of the struct went on the stack put all of it there.
11209 This hack is because the generic code for
11210 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11211 parts of the struct are not at the beginning. */
11212 if (cum->use_stack)
11213 {
11214 if (retval)
11215 return NULL_RTX; /* doesn't go in registers at all */
11216 kbase = 0;
11217 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11218 }
11219 if (k > 1 || cum->use_stack)
11220 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11221 else
11222 return NULL_RTX;
11223 }
11224
11225 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11226
11227 static rtx
11228 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11229 int align_words)
11230 {
11231 int n_units;
11232 int i, k;
11233 rtx rvec[GP_ARG_NUM_REG + 1];
11234
11235 if (align_words >= GP_ARG_NUM_REG)
11236 return NULL_RTX;
11237
11238 n_units = rs6000_arg_size (mode, type);
11239
11240 /* Optimize the simple case where the arg fits in one gpr, except in
11241 the case of BLKmode due to assign_parms assuming that registers are
11242 BITS_PER_WORD wide. */
11243 if (n_units == 0
11244 || (n_units == 1 && mode != BLKmode))
11245 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11246
11247 k = 0;
11248 if (align_words + n_units > GP_ARG_NUM_REG)
11249 /* Not all of the arg fits in gprs. Say that it goes in memory too,
11250 using a magic NULL_RTX component.
11251 This is not strictly correct. Only some of the arg belongs in
11252 memory, not all of it. However, the normal scheme using
11253 function_arg_partial_nregs can result in unusual subregs, eg.
11254 (subreg:SI (reg:DF) 4), which are not handled well. The code to
11255 store the whole arg to memory is often more efficient than code
11256 to store pieces, and we know that space is available in the right
11257 place for the whole arg. */
11258 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11259
11260 i = 0;
11261 do
11262 {
11263 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
11264 rtx off = GEN_INT (i++ * 4);
11265 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11266 }
11267 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
11268
11269 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11270 }
11271
11272 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
11273 but must also be copied into the parameter save area starting at
11274 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
11275 to the GPRs and/or memory. Return the number of elements used. */
11276
11277 static int
11278 rs6000_psave_function_arg (machine_mode mode, const_tree type,
11279 int align_words, rtx *rvec)
11280 {
11281 int k = 0;
11282
11283 if (align_words < GP_ARG_NUM_REG)
11284 {
11285 int n_words = rs6000_arg_size (mode, type);
11286
11287 if (align_words + n_words > GP_ARG_NUM_REG
11288 || mode == BLKmode
11289 || (TARGET_32BIT && TARGET_POWERPC64))
11290 {
11291 /* If this is partially on the stack, then we only
11292 include the portion actually in registers here. */
11293 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11294 int i = 0;
11295
11296 if (align_words + n_words > GP_ARG_NUM_REG)
11297 {
11298 /* Not all of the arg fits in gprs. Say that it goes in memory
11299 too, using a magic NULL_RTX component. Also see comment in
11300 rs6000_mixed_function_arg for why the normal
11301 function_arg_partial_nregs scheme doesn't work in this case. */
11302 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11303 }
11304
11305 do
11306 {
11307 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11308 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
11309 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11310 }
11311 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11312 }
11313 else
11314 {
11315 /* The whole arg fits in gprs. */
11316 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11317 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
11318 }
11319 }
11320 else
11321 {
11322 /* It's entirely in memory. */
11323 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11324 }
11325
11326 return k;
11327 }
11328
11329 /* RVEC is a vector of K components of an argument of mode MODE.
11330 Construct the final function_arg return value from it. */
11331
11332 static rtx
11333 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
11334 {
11335 gcc_assert (k >= 1);
11336
11337 /* Avoid returning a PARALLEL in the trivial cases. */
11338 if (k == 1)
11339 {
11340 if (XEXP (rvec[0], 0) == NULL_RTX)
11341 return NULL_RTX;
11342
11343 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
11344 return XEXP (rvec[0], 0);
11345 }
11346
11347 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
11348 }
11349
11350 /* Determine where to put an argument to a function.
11351 Value is zero to push the argument on the stack,
11352 or a hard register in which to store the argument.
11353
11354 MODE is the argument's machine mode.
11355 TYPE is the data type of the argument (as a tree).
11356 This is null for libcalls where that information may
11357 not be available.
11358 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11359 the preceding args and about the function being called. It is
11360 not modified in this routine.
11361 NAMED is nonzero if this argument is a named parameter
11362 (otherwise it is an extra parameter matching an ellipsis).
11363
11364 On RS/6000 the first eight words of non-FP are normally in registers
11365 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
11366 Under V.4, the first 8 FP args are in registers.
11367
11368 If this is floating-point and no prototype is specified, we use
11369 both an FP and integer register (or possibly FP reg and stack). Library
11370 functions (when CALL_LIBCALL is set) always have the proper types for args,
11371 so we can pass the FP value just in one register. emit_library_function
11372 doesn't support PARALLEL anyway.
11373
11374 Note that for args passed by reference, function_arg will be called
11375 with MODE and TYPE set to that of the pointer to the arg, not the arg
11376 itself. */
11377
11378 static rtx
11379 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
11380 const_tree type, bool named)
11381 {
11382 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11383 enum rs6000_abi abi = DEFAULT_ABI;
11384 machine_mode elt_mode;
11385 int n_elts;
11386
11387 /* Return a marker to indicate whether CR1 needs to set or clear the
11388 bit that V.4 uses to say fp args were passed in registers.
11389 Assume that we don't need the marker for software floating point,
11390 or compiler generated library calls. */
11391 if (mode == VOIDmode)
11392 {
11393 if (abi == ABI_V4
11394 && (cum->call_cookie & CALL_LIBCALL) == 0
11395 && (cum->stdarg
11396 || (cum->nargs_prototype < 0
11397 && (cum->prototype || TARGET_NO_PROTOTYPE)))
11398 && TARGET_HARD_FLOAT)
11399 return GEN_INT (cum->call_cookie
11400 | ((cum->fregno == FP_ARG_MIN_REG)
11401 ? CALL_V4_SET_FP_ARGS
11402 : CALL_V4_CLEAR_FP_ARGS));
11403
11404 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
11405 }
11406
11407 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11408
11409 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11410 {
11411 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
11412 if (rslt != NULL_RTX)
11413 return rslt;
11414 /* Else fall through to usual handling. */
11415 }
11416
11417 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11418 {
11419 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11420 rtx r, off;
11421 int i, k = 0;
11422
11423 /* Do we also need to pass this argument in the parameter save area?
11424 Library support functions for IEEE 128-bit are assumed to not need the
11425 value passed both in GPRs and in vector registers. */
11426 if (TARGET_64BIT && !cum->prototype
11427 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11428 {
11429 int align_words = ROUND_UP (cum->words, 2);
11430 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11431 }
11432
11433 /* Describe where this argument goes in the vector registers. */
11434 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11435 {
11436 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11437 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11438 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11439 }
11440
11441 return rs6000_finish_function_arg (mode, rvec, k);
11442 }
11443 else if (TARGET_ALTIVEC_ABI
11444 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11445 || (type && TREE_CODE (type) == VECTOR_TYPE
11446 && int_size_in_bytes (type) == 16)))
11447 {
11448 if (named || abi == ABI_V4)
11449 return NULL_RTX;
11450 else
11451 {
11452 /* Vector parameters to varargs functions under AIX or Darwin
11453 get passed in memory and possibly also in GPRs. */
11454 int align, align_words, n_words;
11455 machine_mode part_mode;
11456
11457 /* Vector parameters must be 16-byte aligned. In 32-bit
11458 mode this means we need to take into account the offset
11459 to the parameter save area. In 64-bit mode, they just
11460 have to start on an even word, since the parameter save
11461 area is 16-byte aligned. */
11462 if (TARGET_32BIT)
11463 align = -(rs6000_parm_offset () + cum->words) & 3;
11464 else
11465 align = cum->words & 1;
11466 align_words = cum->words + align;
11467
11468 /* Out of registers? Memory, then. */
11469 if (align_words >= GP_ARG_NUM_REG)
11470 return NULL_RTX;
11471
11472 if (TARGET_32BIT && TARGET_POWERPC64)
11473 return rs6000_mixed_function_arg (mode, type, align_words);
11474
11475 /* The vector value goes in GPRs. Only the part of the
11476 value in GPRs is reported here. */
11477 part_mode = mode;
11478 n_words = rs6000_arg_size (mode, type);
11479 if (align_words + n_words > GP_ARG_NUM_REG)
11480 /* Fortunately, there are only two possibilities, the value
11481 is either wholly in GPRs or half in GPRs and half not. */
11482 part_mode = DImode;
11483
11484 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11485 }
11486 }
11487
11488 else if (abi == ABI_V4)
11489 {
11490 if (abi_v4_pass_in_fpr (mode, named))
11491 {
11492 /* _Decimal128 must use an even/odd register pair. This assumes
11493 that the register number is odd when fregno is odd. */
11494 if (mode == TDmode && (cum->fregno % 2) == 1)
11495 cum->fregno++;
11496
11497 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11498 <= FP_ARG_V4_MAX_REG)
11499 return gen_rtx_REG (mode, cum->fregno);
11500 else
11501 return NULL_RTX;
11502 }
11503 else
11504 {
11505 int n_words = rs6000_arg_size (mode, type);
11506 int gregno = cum->sysv_gregno;
11507
11508 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11509 As does any other 2 word item such as complex int due to a
11510 historical mistake. */
11511 if (n_words == 2)
11512 gregno += (1 - gregno) & 1;
11513
11514 /* Multi-reg args are not split between registers and stack. */
11515 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11516 return NULL_RTX;
11517
11518 if (TARGET_32BIT && TARGET_POWERPC64)
11519 return rs6000_mixed_function_arg (mode, type,
11520 gregno - GP_ARG_MIN_REG);
11521 return gen_rtx_REG (mode, gregno);
11522 }
11523 }
11524 else
11525 {
11526 int align_words = rs6000_parm_start (mode, type, cum->words);
11527
11528 /* _Decimal128 must be passed in an even/odd float register pair.
11529 This assumes that the register number is odd when fregno is odd. */
11530 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11531 cum->fregno++;
11532
11533 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11534 && !(TARGET_AIX && !TARGET_ELF
11535 && type != NULL && AGGREGATE_TYPE_P (type)))
11536 {
11537 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11538 rtx r, off;
11539 int i, k = 0;
11540 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11541 int fpr_words;
11542
11543 /* Do we also need to pass this argument in the parameter
11544 save area? */
11545 if (type && (cum->nargs_prototype <= 0
11546 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11547 && TARGET_XL_COMPAT
11548 && align_words >= GP_ARG_NUM_REG)))
11549 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11550
11551 /* Describe where this argument goes in the fprs. */
11552 for (i = 0; i < n_elts
11553 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11554 {
11555 /* Check if the argument is split over registers and memory.
11556 This can only ever happen for long double or _Decimal128;
11557 complex types are handled via split_complex_arg. */
11558 machine_mode fmode = elt_mode;
11559 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11560 {
11561 gcc_assert (FLOAT128_2REG_P (fmode));
11562 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11563 }
11564
11565 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11566 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11567 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11568 }
11569
11570 /* If there were not enough FPRs to hold the argument, the rest
11571 usually goes into memory. However, if the current position
11572 is still within the register parameter area, a portion may
11573 actually have to go into GPRs.
11574
11575 Note that it may happen that the portion of the argument
11576 passed in the first "half" of the first GPR was already
11577 passed in the last FPR as well.
11578
11579 For unnamed arguments, we already set up GPRs to cover the
11580 whole argument in rs6000_psave_function_arg, so there is
11581 nothing further to do at this point. */
11582 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11583 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11584 && cum->nargs_prototype > 0)
11585 {
11586 static bool warned;
11587
11588 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11589 int n_words = rs6000_arg_size (mode, type);
11590
11591 align_words += fpr_words;
11592 n_words -= fpr_words;
11593
11594 do
11595 {
11596 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11597 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11598 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11599 }
11600 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11601
11602 if (!warned && warn_psabi)
11603 {
11604 warned = true;
11605 inform (input_location,
11606 "the ABI of passing homogeneous %<float%> aggregates"
11607 " has changed in GCC 5");
11608 }
11609 }
11610
11611 return rs6000_finish_function_arg (mode, rvec, k);
11612 }
11613 else if (align_words < GP_ARG_NUM_REG)
11614 {
11615 if (TARGET_32BIT && TARGET_POWERPC64)
11616 return rs6000_mixed_function_arg (mode, type, align_words);
11617
11618 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11619 }
11620 else
11621 return NULL_RTX;
11622 }
11623 }
11624 \f
11625 /* For an arg passed partly in registers and partly in memory, this is
11626 the number of bytes passed in registers. For args passed entirely in
11627 registers or entirely in memory, zero. When an arg is described by a
11628 PARALLEL, perhaps using more than one register type, this function
11629 returns the number of bytes used by the first element of the PARALLEL. */
11630
11631 static int
11632 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11633 tree type, bool named)
11634 {
11635 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11636 bool passed_in_gprs = true;
11637 int ret = 0;
11638 int align_words;
11639 machine_mode elt_mode;
11640 int n_elts;
11641
11642 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11643
11644 if (DEFAULT_ABI == ABI_V4)
11645 return 0;
11646
11647 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11648 {
11649 /* If we are passing this arg in the fixed parameter save area (gprs or
11650 memory) as well as VRs, we do not use the partial bytes mechanism;
11651 instead, rs6000_function_arg will return a PARALLEL including a memory
11652 element as necessary. Library support functions for IEEE 128-bit are
11653 assumed to not need the value passed both in GPRs and in vector
11654 registers. */
11655 if (TARGET_64BIT && !cum->prototype
11656 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11657 return 0;
11658
11659 /* Otherwise, we pass in VRs only. Check for partial copies. */
11660 passed_in_gprs = false;
11661 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11662 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11663 }
11664
11665 /* In this complicated case we just disable the partial_nregs code. */
11666 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11667 return 0;
11668
11669 align_words = rs6000_parm_start (mode, type, cum->words);
11670
11671 if (USE_FP_FOR_ARG_P (cum, elt_mode)
11672 && !(TARGET_AIX && !TARGET_ELF
11673 && type != NULL && AGGREGATE_TYPE_P (type)))
11674 {
11675 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11676
11677 /* If we are passing this arg in the fixed parameter save area
11678 (gprs or memory) as well as FPRs, we do not use the partial
11679 bytes mechanism; instead, rs6000_function_arg will return a
11680 PARALLEL including a memory element as necessary. */
11681 if (type
11682 && (cum->nargs_prototype <= 0
11683 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11684 && TARGET_XL_COMPAT
11685 && align_words >= GP_ARG_NUM_REG)))
11686 return 0;
11687
11688 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11689 passed_in_gprs = false;
11690 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11691 {
11692 /* Compute number of bytes / words passed in FPRs. If there
11693 is still space available in the register parameter area
11694 *after* that amount, a part of the argument will be passed
11695 in GPRs. In that case, the total amount passed in any
11696 registers is equal to the amount that would have been passed
11697 in GPRs if everything were passed there, so we fall back to
11698 the GPR code below to compute the appropriate value. */
11699 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11700 * MIN (8, GET_MODE_SIZE (elt_mode)));
11701 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11702
11703 if (align_words + fpr_words < GP_ARG_NUM_REG)
11704 passed_in_gprs = true;
11705 else
11706 ret = fpr;
11707 }
11708 }
11709
11710 if (passed_in_gprs
11711 && align_words < GP_ARG_NUM_REG
11712 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11713 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11714
11715 if (ret != 0 && TARGET_DEBUG_ARG)
11716 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11717
11718 return ret;
11719 }
11720 \f
11721 /* A C expression that indicates when an argument must be passed by
11722 reference. If nonzero for an argument, a copy of that argument is
11723 made in memory and a pointer to the argument is passed instead of
11724 the argument itself. The pointer is passed in whatever way is
11725 appropriate for passing a pointer to that type.
11726
11727 Under V.4, aggregates and long double are passed by reference.
11728
11729 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11730 reference unless the AltiVec vector extension ABI is in force.
11731
11732 As an extension to all ABIs, variable sized types are passed by
11733 reference. */
11734
11735 static bool
11736 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11737 machine_mode mode, const_tree type,
11738 bool named ATTRIBUTE_UNUSED)
11739 {
11740 if (!type)
11741 return 0;
11742
11743 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11744 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11745 {
11746 if (TARGET_DEBUG_ARG)
11747 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11748 return 1;
11749 }
11750
11751 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11752 {
11753 if (TARGET_DEBUG_ARG)
11754 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11755 return 1;
11756 }
11757
11758 if (int_size_in_bytes (type) < 0)
11759 {
11760 if (TARGET_DEBUG_ARG)
11761 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11762 return 1;
11763 }
11764
11765 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11766 modes only exist for GCC vector types if -maltivec. */
11767 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11768 {
11769 if (TARGET_DEBUG_ARG)
11770 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11771 return 1;
11772 }
11773
11774 /* Pass synthetic vectors in memory. */
11775 if (TREE_CODE (type) == VECTOR_TYPE
11776 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11777 {
11778 static bool warned_for_pass_big_vectors = false;
11779 if (TARGET_DEBUG_ARG)
11780 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11781 if (!warned_for_pass_big_vectors)
11782 {
11783 warning (OPT_Wpsabi, "GCC vector passed by reference: "
11784 "non-standard ABI extension with no compatibility "
11785 "guarantee");
11786 warned_for_pass_big_vectors = true;
11787 }
11788 return 1;
11789 }
11790
11791 return 0;
11792 }
11793
11794 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11795 already processes. Return true if the parameter must be passed
11796 (fully or partially) on the stack. */
11797
11798 static bool
11799 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11800 {
11801 machine_mode mode;
11802 int unsignedp;
11803 rtx entry_parm;
11804
11805 /* Catch errors. */
11806 if (type == NULL || type == error_mark_node)
11807 return true;
11808
11809 /* Handle types with no storage requirement. */
11810 if (TYPE_MODE (type) == VOIDmode)
11811 return false;
11812
11813 /* Handle complex types. */
11814 if (TREE_CODE (type) == COMPLEX_TYPE)
11815 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11816 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11817
11818 /* Handle transparent aggregates. */
11819 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11820 && TYPE_TRANSPARENT_AGGR (type))
11821 type = TREE_TYPE (first_field (type));
11822
11823 /* See if this arg was passed by invisible reference. */
11824 if (pass_by_reference (get_cumulative_args (args_so_far),
11825 TYPE_MODE (type), type, true))
11826 type = build_pointer_type (type);
11827
11828 /* Find mode as it is passed by the ABI. */
11829 unsignedp = TYPE_UNSIGNED (type);
11830 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11831
11832 /* If we must pass in stack, we need a stack. */
11833 if (rs6000_must_pass_in_stack (mode, type))
11834 return true;
11835
11836 /* If there is no incoming register, we need a stack. */
11837 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11838 if (entry_parm == NULL)
11839 return true;
11840
11841 /* Likewise if we need to pass both in registers and on the stack. */
11842 if (GET_CODE (entry_parm) == PARALLEL
11843 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11844 return true;
11845
11846 /* Also true if we're partially in registers and partially not. */
11847 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11848 return true;
11849
11850 /* Update info on where next arg arrives in registers. */
11851 rs6000_function_arg_advance (args_so_far, mode, type, true);
11852 return false;
11853 }
11854
11855 /* Return true if FUN has no prototype, has a variable argument
11856 list, or passes any parameter in memory. */
11857
11858 static bool
11859 rs6000_function_parms_need_stack (tree fun, bool incoming)
11860 {
11861 tree fntype, result;
11862 CUMULATIVE_ARGS args_so_far_v;
11863 cumulative_args_t args_so_far;
11864
11865 if (!fun)
11866 /* Must be a libcall, all of which only use reg parms. */
11867 return false;
11868
11869 fntype = fun;
11870 if (!TYPE_P (fun))
11871 fntype = TREE_TYPE (fun);
11872
11873 /* Varargs functions need the parameter save area. */
11874 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11875 return true;
11876
11877 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11878 args_so_far = pack_cumulative_args (&args_so_far_v);
11879
11880 /* When incoming, we will have been passed the function decl.
11881 It is necessary to use the decl to handle K&R style functions,
11882 where TYPE_ARG_TYPES may not be available. */
11883 if (incoming)
11884 {
11885 gcc_assert (DECL_P (fun));
11886 result = DECL_RESULT (fun);
11887 }
11888 else
11889 result = TREE_TYPE (fntype);
11890
11891 if (result && aggregate_value_p (result, fntype))
11892 {
11893 if (!TYPE_P (result))
11894 result = TREE_TYPE (result);
11895 result = build_pointer_type (result);
11896 rs6000_parm_needs_stack (args_so_far, result);
11897 }
11898
11899 if (incoming)
11900 {
11901 tree parm;
11902
11903 for (parm = DECL_ARGUMENTS (fun);
11904 parm && parm != void_list_node;
11905 parm = TREE_CHAIN (parm))
11906 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11907 return true;
11908 }
11909 else
11910 {
11911 function_args_iterator args_iter;
11912 tree arg_type;
11913
11914 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11915 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11916 return true;
11917 }
11918
11919 return false;
11920 }
11921
11922 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11923 usually a constant depending on the ABI. However, in the ELFv2 ABI
11924 the register parameter area is optional when calling a function that
11925 has a prototype is scope, has no variable argument list, and passes
11926 all parameters in registers. */
11927
11928 int
11929 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11930 {
11931 int reg_parm_stack_space;
11932
11933 switch (DEFAULT_ABI)
11934 {
11935 default:
11936 reg_parm_stack_space = 0;
11937 break;
11938
11939 case ABI_AIX:
11940 case ABI_DARWIN:
11941 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11942 break;
11943
11944 case ABI_ELFv2:
11945 /* ??? Recomputing this every time is a bit expensive. Is there
11946 a place to cache this information? */
11947 if (rs6000_function_parms_need_stack (fun, incoming))
11948 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11949 else
11950 reg_parm_stack_space = 0;
11951 break;
11952 }
11953
11954 return reg_parm_stack_space;
11955 }
11956
11957 static void
11958 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11959 {
11960 int i;
11961 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11962
11963 if (nregs == 0)
11964 return;
11965
11966 for (i = 0; i < nregs; i++)
11967 {
11968 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11969 if (reload_completed)
11970 {
11971 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11972 tem = NULL_RTX;
11973 else
11974 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11975 i * GET_MODE_SIZE (reg_mode));
11976 }
11977 else
11978 tem = replace_equiv_address (tem, XEXP (tem, 0));
11979
11980 gcc_assert (tem);
11981
11982 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11983 }
11984 }
11985 \f
11986 /* Perform any needed actions needed for a function that is receiving a
11987 variable number of arguments.
11988
11989 CUM is as above.
11990
11991 MODE and TYPE are the mode and type of the current parameter.
11992
11993 PRETEND_SIZE is a variable that should be set to the amount of stack
11994 that must be pushed by the prolog to pretend that our caller pushed
11995 it.
11996
11997 Normally, this macro will push all remaining incoming registers on the
11998 stack and set PRETEND_SIZE to the length of the registers pushed. */
11999
12000 static void
12001 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12002 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12003 int no_rtl)
12004 {
12005 CUMULATIVE_ARGS next_cum;
12006 int reg_size = TARGET_32BIT ? 4 : 8;
12007 rtx save_area = NULL_RTX, mem;
12008 int first_reg_offset;
12009 alias_set_type set;
12010
12011 /* Skip the last named argument. */
12012 next_cum = *get_cumulative_args (cum);
12013 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12014
12015 if (DEFAULT_ABI == ABI_V4)
12016 {
12017 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12018
12019 if (! no_rtl)
12020 {
12021 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12022 HOST_WIDE_INT offset = 0;
12023
12024 /* Try to optimize the size of the varargs save area.
12025 The ABI requires that ap.reg_save_area is doubleword
12026 aligned, but we don't need to allocate space for all
12027 the bytes, only those to which we actually will save
12028 anything. */
12029 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12030 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12031 if (TARGET_HARD_FLOAT
12032 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12033 && cfun->va_list_fpr_size)
12034 {
12035 if (gpr_reg_num)
12036 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12037 * UNITS_PER_FP_WORD;
12038 if (cfun->va_list_fpr_size
12039 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12040 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12041 else
12042 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12043 * UNITS_PER_FP_WORD;
12044 }
12045 if (gpr_reg_num)
12046 {
12047 offset = -((first_reg_offset * reg_size) & ~7);
12048 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12049 {
12050 gpr_reg_num = cfun->va_list_gpr_size;
12051 if (reg_size == 4 && (first_reg_offset & 1))
12052 gpr_reg_num++;
12053 }
12054 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12055 }
12056 else if (fpr_size)
12057 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12058 * UNITS_PER_FP_WORD
12059 - (int) (GP_ARG_NUM_REG * reg_size);
12060
12061 if (gpr_size + fpr_size)
12062 {
12063 rtx reg_save_area
12064 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12065 gcc_assert (MEM_P (reg_save_area));
12066 reg_save_area = XEXP (reg_save_area, 0);
12067 if (GET_CODE (reg_save_area) == PLUS)
12068 {
12069 gcc_assert (XEXP (reg_save_area, 0)
12070 == virtual_stack_vars_rtx);
12071 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
12072 offset += INTVAL (XEXP (reg_save_area, 1));
12073 }
12074 else
12075 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12076 }
12077
12078 cfun->machine->varargs_save_offset = offset;
12079 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12080 }
12081 }
12082 else
12083 {
12084 first_reg_offset = next_cum.words;
12085 save_area = crtl->args.internal_arg_pointer;
12086
12087 if (targetm.calls.must_pass_in_stack (mode, type))
12088 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12089 }
12090
12091 set = get_varargs_alias_set ();
12092 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12093 && cfun->va_list_gpr_size)
12094 {
12095 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12096
12097 if (va_list_gpr_counter_field)
12098 /* V4 va_list_gpr_size counts number of registers needed. */
12099 n_gpr = cfun->va_list_gpr_size;
12100 else
12101 /* char * va_list instead counts number of bytes needed. */
12102 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12103
12104 if (nregs > n_gpr)
12105 nregs = n_gpr;
12106
12107 mem = gen_rtx_MEM (BLKmode,
12108 plus_constant (Pmode, save_area,
12109 first_reg_offset * reg_size));
12110 MEM_NOTRAP_P (mem) = 1;
12111 set_mem_alias_set (mem, set);
12112 set_mem_align (mem, BITS_PER_WORD);
12113
12114 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12115 nregs);
12116 }
12117
12118 /* Save FP registers if needed. */
12119 if (DEFAULT_ABI == ABI_V4
12120 && TARGET_HARD_FLOAT
12121 && ! no_rtl
12122 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12123 && cfun->va_list_fpr_size)
12124 {
12125 int fregno = next_cum.fregno, nregs;
12126 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12127 rtx lab = gen_label_rtx ();
12128 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12129 * UNITS_PER_FP_WORD);
12130
12131 emit_jump_insn
12132 (gen_rtx_SET (pc_rtx,
12133 gen_rtx_IF_THEN_ELSE (VOIDmode,
12134 gen_rtx_NE (VOIDmode, cr1,
12135 const0_rtx),
12136 gen_rtx_LABEL_REF (VOIDmode, lab),
12137 pc_rtx)));
12138
12139 for (nregs = 0;
12140 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12141 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12142 {
12143 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12144 plus_constant (Pmode, save_area, off));
12145 MEM_NOTRAP_P (mem) = 1;
12146 set_mem_alias_set (mem, set);
12147 set_mem_align (mem, GET_MODE_ALIGNMENT (
12148 TARGET_HARD_FLOAT ? DFmode : SFmode));
12149 emit_move_insn (mem, gen_rtx_REG (
12150 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12151 }
12152
12153 emit_label (lab);
12154 }
12155 }
12156
12157 /* Create the va_list data type. */
12158
12159 static tree
12160 rs6000_build_builtin_va_list (void)
12161 {
12162 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12163
12164 /* For AIX, prefer 'char *' because that's what the system
12165 header files like. */
12166 if (DEFAULT_ABI != ABI_V4)
12167 return build_pointer_type (char_type_node);
12168
12169 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12170 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12171 get_identifier ("__va_list_tag"), record);
12172
12173 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12174 unsigned_char_type_node);
12175 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12176 unsigned_char_type_node);
12177 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12178 every user file. */
12179 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12180 get_identifier ("reserved"), short_unsigned_type_node);
12181 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12182 get_identifier ("overflow_arg_area"),
12183 ptr_type_node);
12184 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12185 get_identifier ("reg_save_area"),
12186 ptr_type_node);
12187
12188 va_list_gpr_counter_field = f_gpr;
12189 va_list_fpr_counter_field = f_fpr;
12190
12191 DECL_FIELD_CONTEXT (f_gpr) = record;
12192 DECL_FIELD_CONTEXT (f_fpr) = record;
12193 DECL_FIELD_CONTEXT (f_res) = record;
12194 DECL_FIELD_CONTEXT (f_ovf) = record;
12195 DECL_FIELD_CONTEXT (f_sav) = record;
12196
12197 TYPE_STUB_DECL (record) = type_decl;
12198 TYPE_NAME (record) = type_decl;
12199 TYPE_FIELDS (record) = f_gpr;
12200 DECL_CHAIN (f_gpr) = f_fpr;
12201 DECL_CHAIN (f_fpr) = f_res;
12202 DECL_CHAIN (f_res) = f_ovf;
12203 DECL_CHAIN (f_ovf) = f_sav;
12204
12205 layout_type (record);
12206
12207 /* The correct type is an array type of one element. */
12208 return build_array_type (record, build_index_type (size_zero_node));
12209 }
12210
12211 /* Implement va_start. */
12212
12213 static void
12214 rs6000_va_start (tree valist, rtx nextarg)
12215 {
12216 HOST_WIDE_INT words, n_gpr, n_fpr;
12217 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12218 tree gpr, fpr, ovf, sav, t;
12219
12220 /* Only SVR4 needs something special. */
12221 if (DEFAULT_ABI != ABI_V4)
12222 {
12223 std_expand_builtin_va_start (valist, nextarg);
12224 return;
12225 }
12226
12227 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12228 f_fpr = DECL_CHAIN (f_gpr);
12229 f_res = DECL_CHAIN (f_fpr);
12230 f_ovf = DECL_CHAIN (f_res);
12231 f_sav = DECL_CHAIN (f_ovf);
12232
12233 valist = build_simple_mem_ref (valist);
12234 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12235 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12236 f_fpr, NULL_TREE);
12237 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12238 f_ovf, NULL_TREE);
12239 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12240 f_sav, NULL_TREE);
12241
12242 /* Count number of gp and fp argument registers used. */
12243 words = crtl->args.info.words;
12244 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
12245 GP_ARG_NUM_REG);
12246 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
12247 FP_ARG_NUM_REG);
12248
12249 if (TARGET_DEBUG_ARG)
12250 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
12251 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
12252 words, n_gpr, n_fpr);
12253
12254 if (cfun->va_list_gpr_size)
12255 {
12256 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12257 build_int_cst (NULL_TREE, n_gpr));
12258 TREE_SIDE_EFFECTS (t) = 1;
12259 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12260 }
12261
12262 if (cfun->va_list_fpr_size)
12263 {
12264 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12265 build_int_cst (NULL_TREE, n_fpr));
12266 TREE_SIDE_EFFECTS (t) = 1;
12267 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12268
12269 #ifdef HAVE_AS_GNU_ATTRIBUTE
12270 if (call_ABI_of_interest (cfun->decl))
12271 rs6000_passes_float = true;
12272 #endif
12273 }
12274
12275 /* Find the overflow area. */
12276 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
12277 if (words != 0)
12278 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
12279 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12280 TREE_SIDE_EFFECTS (t) = 1;
12281 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12282
12283 /* If there were no va_arg invocations, don't set up the register
12284 save area. */
12285 if (!cfun->va_list_gpr_size
12286 && !cfun->va_list_fpr_size
12287 && n_gpr < GP_ARG_NUM_REG
12288 && n_fpr < FP_ARG_V4_MAX_REG)
12289 return;
12290
12291 /* Find the register save area. */
12292 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
12293 if (cfun->machine->varargs_save_offset)
12294 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
12295 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12296 TREE_SIDE_EFFECTS (t) = 1;
12297 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12298 }
12299
12300 /* Implement va_arg. */
12301
12302 static tree
12303 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12304 gimple_seq *post_p)
12305 {
12306 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12307 tree gpr, fpr, ovf, sav, reg, t, u;
12308 int size, rsize, n_reg, sav_ofs, sav_scale;
12309 tree lab_false, lab_over, addr;
12310 int align;
12311 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
12312 int regalign = 0;
12313 gimple *stmt;
12314
12315 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12316 {
12317 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
12318 return build_va_arg_indirect_ref (t);
12319 }
12320
12321 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
12322 earlier version of gcc, with the property that it always applied alignment
12323 adjustments to the va-args (even for zero-sized types). The cheapest way
12324 to deal with this is to replicate the effect of the part of
12325 std_gimplify_va_arg_expr that carries out the align adjust, for the case
12326 of relevance.
12327 We don't need to check for pass-by-reference because of the test above.
12328 We can return a simplifed answer, since we know there's no offset to add. */
12329
12330 if (((TARGET_MACHO
12331 && rs6000_darwin64_abi)
12332 || DEFAULT_ABI == ABI_ELFv2
12333 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
12334 && integer_zerop (TYPE_SIZE (type)))
12335 {
12336 unsigned HOST_WIDE_INT align, boundary;
12337 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
12338 align = PARM_BOUNDARY / BITS_PER_UNIT;
12339 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
12340 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
12341 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
12342 boundary /= BITS_PER_UNIT;
12343 if (boundary > align)
12344 {
12345 tree t ;
12346 /* This updates arg ptr by the amount that would be necessary
12347 to align the zero-sized (but not zero-alignment) item. */
12348 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12349 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
12350 gimplify_and_add (t, pre_p);
12351
12352 t = fold_convert (sizetype, valist_tmp);
12353 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
12354 fold_convert (TREE_TYPE (valist),
12355 fold_build2 (BIT_AND_EXPR, sizetype, t,
12356 size_int (-boundary))));
12357 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
12358 gimplify_and_add (t, pre_p);
12359 }
12360 /* Since it is zero-sized there's no increment for the item itself. */
12361 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
12362 return build_va_arg_indirect_ref (valist_tmp);
12363 }
12364
12365 if (DEFAULT_ABI != ABI_V4)
12366 {
12367 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
12368 {
12369 tree elem_type = TREE_TYPE (type);
12370 machine_mode elem_mode = TYPE_MODE (elem_type);
12371 int elem_size = GET_MODE_SIZE (elem_mode);
12372
12373 if (elem_size < UNITS_PER_WORD)
12374 {
12375 tree real_part, imag_part;
12376 gimple_seq post = NULL;
12377
12378 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12379 &post);
12380 /* Copy the value into a temporary, lest the formal temporary
12381 be reused out from under us. */
12382 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
12383 gimple_seq_add_seq (pre_p, post);
12384
12385 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
12386 post_p);
12387
12388 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
12389 }
12390 }
12391
12392 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
12393 }
12394
12395 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12396 f_fpr = DECL_CHAIN (f_gpr);
12397 f_res = DECL_CHAIN (f_fpr);
12398 f_ovf = DECL_CHAIN (f_res);
12399 f_sav = DECL_CHAIN (f_ovf);
12400
12401 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12402 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12403 f_fpr, NULL_TREE);
12404 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12405 f_ovf, NULL_TREE);
12406 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12407 f_sav, NULL_TREE);
12408
12409 size = int_size_in_bytes (type);
12410 rsize = (size + 3) / 4;
12411 int pad = 4 * rsize - size;
12412 align = 1;
12413
12414 machine_mode mode = TYPE_MODE (type);
12415 if (abi_v4_pass_in_fpr (mode, false))
12416 {
12417 /* FP args go in FP registers, if present. */
12418 reg = fpr;
12419 n_reg = (size + 7) / 8;
12420 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
12421 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
12422 if (mode != SFmode && mode != SDmode)
12423 align = 8;
12424 }
12425 else
12426 {
12427 /* Otherwise into GP registers. */
12428 reg = gpr;
12429 n_reg = rsize;
12430 sav_ofs = 0;
12431 sav_scale = 4;
12432 if (n_reg == 2)
12433 align = 8;
12434 }
12435
12436 /* Pull the value out of the saved registers.... */
12437
12438 lab_over = NULL;
12439 addr = create_tmp_var (ptr_type_node, "addr");
12440
12441 /* AltiVec vectors never go in registers when -mabi=altivec. */
12442 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12443 align = 16;
12444 else
12445 {
12446 lab_false = create_artificial_label (input_location);
12447 lab_over = create_artificial_label (input_location);
12448
12449 /* Long long is aligned in the registers. As are any other 2 gpr
12450 item such as complex int due to a historical mistake. */
12451 u = reg;
12452 if (n_reg == 2 && reg == gpr)
12453 {
12454 regalign = 1;
12455 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12456 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12457 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12458 unshare_expr (reg), u);
12459 }
12460 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12461 reg number is 0 for f1, so we want to make it odd. */
12462 else if (reg == fpr && mode == TDmode)
12463 {
12464 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12465 build_int_cst (TREE_TYPE (reg), 1));
12466 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12467 }
12468
12469 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12470 t = build2 (GE_EXPR, boolean_type_node, u, t);
12471 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12472 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12473 gimplify_and_add (t, pre_p);
12474
12475 t = sav;
12476 if (sav_ofs)
12477 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12478
12479 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12480 build_int_cst (TREE_TYPE (reg), n_reg));
12481 u = fold_convert (sizetype, u);
12482 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12483 t = fold_build_pointer_plus (t, u);
12484
12485 /* _Decimal32 varargs are located in the second word of the 64-bit
12486 FP register for 32-bit binaries. */
12487 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
12488 t = fold_build_pointer_plus_hwi (t, size);
12489
12490 /* Args are passed right-aligned. */
12491 if (BYTES_BIG_ENDIAN)
12492 t = fold_build_pointer_plus_hwi (t, pad);
12493
12494 gimplify_assign (addr, t, pre_p);
12495
12496 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12497
12498 stmt = gimple_build_label (lab_false);
12499 gimple_seq_add_stmt (pre_p, stmt);
12500
12501 if ((n_reg == 2 && !regalign) || n_reg > 2)
12502 {
12503 /* Ensure that we don't find any more args in regs.
12504 Alignment has taken care of for special cases. */
12505 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12506 }
12507 }
12508
12509 /* ... otherwise out of the overflow area. */
12510
12511 /* Care for on-stack alignment if needed. */
12512 t = ovf;
12513 if (align != 1)
12514 {
12515 t = fold_build_pointer_plus_hwi (t, align - 1);
12516 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12517 build_int_cst (TREE_TYPE (t), -align));
12518 }
12519
12520 /* Args are passed right-aligned. */
12521 if (BYTES_BIG_ENDIAN)
12522 t = fold_build_pointer_plus_hwi (t, pad);
12523
12524 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12525
12526 gimplify_assign (unshare_expr (addr), t, pre_p);
12527
12528 t = fold_build_pointer_plus_hwi (t, size);
12529 gimplify_assign (unshare_expr (ovf), t, pre_p);
12530
12531 if (lab_over)
12532 {
12533 stmt = gimple_build_label (lab_over);
12534 gimple_seq_add_stmt (pre_p, stmt);
12535 }
12536
12537 if (STRICT_ALIGNMENT
12538 && (TYPE_ALIGN (type)
12539 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12540 {
12541 /* The value (of type complex double, for example) may not be
12542 aligned in memory in the saved registers, so copy via a
12543 temporary. (This is the same code as used for SPARC.) */
12544 tree tmp = create_tmp_var (type, "va_arg_tmp");
12545 tree dest_addr = build_fold_addr_expr (tmp);
12546
12547 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12548 3, dest_addr, addr, size_int (rsize * 4));
12549 TREE_ADDRESSABLE (tmp) = 1;
12550
12551 gimplify_and_add (copy, pre_p);
12552 addr = dest_addr;
12553 }
12554
12555 addr = fold_convert (ptrtype, addr);
12556 return build_va_arg_indirect_ref (addr);
12557 }
12558
12559 /* Builtins. */
12560
12561 static void
12562 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12563 {
12564 tree t;
12565 unsigned classify = rs6000_builtin_info[(int)code].attr;
12566 const char *attr_string = "";
12567
12568 gcc_assert (name != NULL);
12569 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12570
12571 if (rs6000_builtin_decls[(int)code])
12572 fatal_error (input_location,
12573 "internal error: builtin function %qs already processed",
12574 name);
12575
12576 rs6000_builtin_decls[(int)code] = t =
12577 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12578
12579 /* Set any special attributes. */
12580 if ((classify & RS6000_BTC_CONST) != 0)
12581 {
12582 /* const function, function only depends on the inputs. */
12583 TREE_READONLY (t) = 1;
12584 TREE_NOTHROW (t) = 1;
12585 attr_string = ", const";
12586 }
12587 else if ((classify & RS6000_BTC_PURE) != 0)
12588 {
12589 /* pure function, function can read global memory, but does not set any
12590 external state. */
12591 DECL_PURE_P (t) = 1;
12592 TREE_NOTHROW (t) = 1;
12593 attr_string = ", pure";
12594 }
12595 else if ((classify & RS6000_BTC_FP) != 0)
12596 {
12597 /* Function is a math function. If rounding mode is on, then treat the
12598 function as not reading global memory, but it can have arbitrary side
12599 effects. If it is off, then assume the function is a const function.
12600 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12601 builtin-attribute.def that is used for the math functions. */
12602 TREE_NOTHROW (t) = 1;
12603 if (flag_rounding_math)
12604 {
12605 DECL_PURE_P (t) = 1;
12606 DECL_IS_NOVOPS (t) = 1;
12607 attr_string = ", fp, pure";
12608 }
12609 else
12610 {
12611 TREE_READONLY (t) = 1;
12612 attr_string = ", fp, const";
12613 }
12614 }
12615 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12616 gcc_unreachable ();
12617
12618 if (TARGET_DEBUG_BUILTIN)
12619 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12620 (int)code, name, attr_string);
12621 }
12622
12623 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12624
12625 #undef RS6000_BUILTIN_0
12626 #undef RS6000_BUILTIN_1
12627 #undef RS6000_BUILTIN_2
12628 #undef RS6000_BUILTIN_3
12629 #undef RS6000_BUILTIN_A
12630 #undef RS6000_BUILTIN_D
12631 #undef RS6000_BUILTIN_H
12632 #undef RS6000_BUILTIN_P
12633 #undef RS6000_BUILTIN_X
12634
12635 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12636 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12637 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12638 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12639 { MASK, ICODE, NAME, ENUM },
12640
12641 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12642 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12643 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12644 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12645 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12646
12647 static const struct builtin_description bdesc_3arg[] =
12648 {
12649 #include "rs6000-builtin.def"
12650 };
12651
12652 /* DST operations: void foo (void *, const int, const char). */
12653
12654 #undef RS6000_BUILTIN_0
12655 #undef RS6000_BUILTIN_1
12656 #undef RS6000_BUILTIN_2
12657 #undef RS6000_BUILTIN_3
12658 #undef RS6000_BUILTIN_A
12659 #undef RS6000_BUILTIN_D
12660 #undef RS6000_BUILTIN_H
12661 #undef RS6000_BUILTIN_P
12662 #undef RS6000_BUILTIN_X
12663
12664 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12665 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12666 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12667 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12668 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12669 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12670 { MASK, ICODE, NAME, ENUM },
12671
12672 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12673 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12674 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12675
12676 static const struct builtin_description bdesc_dst[] =
12677 {
12678 #include "rs6000-builtin.def"
12679 };
12680
12681 /* Simple binary operations: VECc = foo (VECa, VECb). */
12682
12683 #undef RS6000_BUILTIN_0
12684 #undef RS6000_BUILTIN_1
12685 #undef RS6000_BUILTIN_2
12686 #undef RS6000_BUILTIN_3
12687 #undef RS6000_BUILTIN_A
12688 #undef RS6000_BUILTIN_D
12689 #undef RS6000_BUILTIN_H
12690 #undef RS6000_BUILTIN_P
12691 #undef RS6000_BUILTIN_X
12692
12693 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12694 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12695 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12696 { MASK, ICODE, NAME, ENUM },
12697
12698 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12699 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12700 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12701 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12702 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12703 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12704
12705 static const struct builtin_description bdesc_2arg[] =
12706 {
12707 #include "rs6000-builtin.def"
12708 };
12709
12710 #undef RS6000_BUILTIN_0
12711 #undef RS6000_BUILTIN_1
12712 #undef RS6000_BUILTIN_2
12713 #undef RS6000_BUILTIN_3
12714 #undef RS6000_BUILTIN_A
12715 #undef RS6000_BUILTIN_D
12716 #undef RS6000_BUILTIN_H
12717 #undef RS6000_BUILTIN_P
12718 #undef RS6000_BUILTIN_X
12719
12720 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12721 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12722 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12723 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12724 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12725 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12726 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12727 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12728 { MASK, ICODE, NAME, ENUM },
12729
12730 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12731
12732 /* AltiVec predicates. */
12733
12734 static const struct builtin_description bdesc_altivec_preds[] =
12735 {
12736 #include "rs6000-builtin.def"
12737 };
12738
12739 /* ABS* operations. */
12740
12741 #undef RS6000_BUILTIN_0
12742 #undef RS6000_BUILTIN_1
12743 #undef RS6000_BUILTIN_2
12744 #undef RS6000_BUILTIN_3
12745 #undef RS6000_BUILTIN_A
12746 #undef RS6000_BUILTIN_D
12747 #undef RS6000_BUILTIN_H
12748 #undef RS6000_BUILTIN_P
12749 #undef RS6000_BUILTIN_X
12750
12751 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12752 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12753 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12754 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12755 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12756 { MASK, ICODE, NAME, ENUM },
12757
12758 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12759 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12760 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12761 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12762
12763 static const struct builtin_description bdesc_abs[] =
12764 {
12765 #include "rs6000-builtin.def"
12766 };
12767
12768 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12769 foo (VECa). */
12770
12771 #undef RS6000_BUILTIN_0
12772 #undef RS6000_BUILTIN_1
12773 #undef RS6000_BUILTIN_2
12774 #undef RS6000_BUILTIN_3
12775 #undef RS6000_BUILTIN_A
12776 #undef RS6000_BUILTIN_D
12777 #undef RS6000_BUILTIN_H
12778 #undef RS6000_BUILTIN_P
12779 #undef RS6000_BUILTIN_X
12780
12781 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12782 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12783 { MASK, ICODE, NAME, ENUM },
12784
12785 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12786 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12787 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12788 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12789 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12790 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12791 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12792
12793 static const struct builtin_description bdesc_1arg[] =
12794 {
12795 #include "rs6000-builtin.def"
12796 };
12797
12798 /* Simple no-argument operations: result = __builtin_darn_32 () */
12799
12800 #undef RS6000_BUILTIN_0
12801 #undef RS6000_BUILTIN_1
12802 #undef RS6000_BUILTIN_2
12803 #undef RS6000_BUILTIN_3
12804 #undef RS6000_BUILTIN_A
12805 #undef RS6000_BUILTIN_D
12806 #undef RS6000_BUILTIN_H
12807 #undef RS6000_BUILTIN_P
12808 #undef RS6000_BUILTIN_X
12809
12810 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
12811 { MASK, ICODE, NAME, ENUM },
12812
12813 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12814 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12815 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12816 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12817 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12818 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12819 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12820 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12821
12822 static const struct builtin_description bdesc_0arg[] =
12823 {
12824 #include "rs6000-builtin.def"
12825 };
12826
12827 /* HTM builtins. */
12828 #undef RS6000_BUILTIN_0
12829 #undef RS6000_BUILTIN_1
12830 #undef RS6000_BUILTIN_2
12831 #undef RS6000_BUILTIN_3
12832 #undef RS6000_BUILTIN_A
12833 #undef RS6000_BUILTIN_D
12834 #undef RS6000_BUILTIN_H
12835 #undef RS6000_BUILTIN_P
12836 #undef RS6000_BUILTIN_X
12837
12838 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
12839 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12840 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12841 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12842 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12843 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12844 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12845 { MASK, ICODE, NAME, ENUM },
12846
12847 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12848 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12849
12850 static const struct builtin_description bdesc_htm[] =
12851 {
12852 #include "rs6000-builtin.def"
12853 };
12854
12855 #undef RS6000_BUILTIN_0
12856 #undef RS6000_BUILTIN_1
12857 #undef RS6000_BUILTIN_2
12858 #undef RS6000_BUILTIN_3
12859 #undef RS6000_BUILTIN_A
12860 #undef RS6000_BUILTIN_D
12861 #undef RS6000_BUILTIN_H
12862 #undef RS6000_BUILTIN_P
12863
12864 /* Return true if a builtin function is overloaded. */
12865 bool
12866 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12867 {
12868 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12869 }
12870
12871 const char *
12872 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
12873 {
12874 return rs6000_builtin_info[(int)fncode].name;
12875 }
12876
12877 /* Expand an expression EXP that calls a builtin without arguments. */
12878 static rtx
12879 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12880 {
12881 rtx pat;
12882 machine_mode tmode = insn_data[icode].operand[0].mode;
12883
12884 if (icode == CODE_FOR_nothing)
12885 /* Builtin not supported on this processor. */
12886 return 0;
12887
12888 if (icode == CODE_FOR_rs6000_mffsl
12889 && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12890 {
12891 error ("%<__builtin_mffsl%> not supported with %<-msoft-float%>");
12892 return const0_rtx;
12893 }
12894
12895 if (target == 0
12896 || GET_MODE (target) != tmode
12897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12898 target = gen_reg_rtx (tmode);
12899
12900 pat = GEN_FCN (icode) (target);
12901 if (! pat)
12902 return 0;
12903 emit_insn (pat);
12904
12905 return target;
12906 }
12907
12908
12909 static rtx
12910 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12911 {
12912 rtx pat;
12913 tree arg0 = CALL_EXPR_ARG (exp, 0);
12914 tree arg1 = CALL_EXPR_ARG (exp, 1);
12915 rtx op0 = expand_normal (arg0);
12916 rtx op1 = expand_normal (arg1);
12917 machine_mode mode0 = insn_data[icode].operand[0].mode;
12918 machine_mode mode1 = insn_data[icode].operand[1].mode;
12919
12920 if (icode == CODE_FOR_nothing)
12921 /* Builtin not supported on this processor. */
12922 return 0;
12923
12924 /* If we got invalid arguments bail out before generating bad rtl. */
12925 if (arg0 == error_mark_node || arg1 == error_mark_node)
12926 return const0_rtx;
12927
12928 if (!CONST_INT_P (op0)
12929 || INTVAL (op0) > 255
12930 || INTVAL (op0) < 0)
12931 {
12932 error ("argument 1 must be an 8-bit field value");
12933 return const0_rtx;
12934 }
12935
12936 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12937 op0 = copy_to_mode_reg (mode0, op0);
12938
12939 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12940 op1 = copy_to_mode_reg (mode1, op1);
12941
12942 pat = GEN_FCN (icode) (op0, op1);
12943 if (!pat)
12944 return const0_rtx;
12945 emit_insn (pat);
12946
12947 return NULL_RTX;
12948 }
12949
12950 static rtx
12951 rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp)
12952 {
12953 rtx pat;
12954 tree arg0 = CALL_EXPR_ARG (exp, 0);
12955 rtx op0 = expand_normal (arg0);
12956
12957 if (icode == CODE_FOR_nothing)
12958 /* Builtin not supported on this processor. */
12959 return 0;
12960
12961 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
12962 {
12963 error ("%<__builtin_mtfsb0%> and %<__builtin_mtfsb1%> not supported with "
12964 "%<-msoft-float%>");
12965 return const0_rtx;
12966 }
12967
12968 /* If we got invalid arguments bail out before generating bad rtl. */
12969 if (arg0 == error_mark_node)
12970 return const0_rtx;
12971
12972 /* Only allow bit numbers 0 to 31. */
12973 if (!u5bit_cint_operand (op0, VOIDmode))
12974 {
12975 error ("Argument must be a constant between 0 and 31.");
12976 return const0_rtx;
12977 }
12978
12979 pat = GEN_FCN (icode) (op0);
12980 if (!pat)
12981 return const0_rtx;
12982 emit_insn (pat);
12983
12984 return NULL_RTX;
12985 }
12986
12987 static rtx
12988 rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp)
12989 {
12990 rtx pat;
12991 tree arg0 = CALL_EXPR_ARG (exp, 0);
12992 rtx op0 = expand_normal (arg0);
12993 machine_mode mode0 = insn_data[icode].operand[0].mode;
12994
12995 if (icode == CODE_FOR_nothing)
12996 /* Builtin not supported on this processor. */
12997 return 0;
12998
12999 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13000 {
13001 error ("%<__builtin_set_fpscr_rn%> not supported with %<-msoft-float%>");
13002 return const0_rtx;
13003 }
13004
13005 /* If we got invalid arguments bail out before generating bad rtl. */
13006 if (arg0 == error_mark_node)
13007 return const0_rtx;
13008
13009 /* If the argument is a constant, check the range. Argument can only be a
13010 2-bit value. Unfortunately, can't check the range of the value at
13011 compile time if the argument is a variable. The least significant two
13012 bits of the argument, regardless of type, are used to set the rounding
13013 mode. All other bits are ignored. */
13014 if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
13015 {
13016 error ("Argument must be a value between 0 and 3.");
13017 return const0_rtx;
13018 }
13019
13020 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13021 op0 = copy_to_mode_reg (mode0, op0);
13022
13023 pat = GEN_FCN (icode) (op0);
13024 if (!pat)
13025 return const0_rtx;
13026 emit_insn (pat);
13027
13028 return NULL_RTX;
13029 }
13030 static rtx
13031 rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp)
13032 {
13033 rtx pat;
13034 tree arg0 = CALL_EXPR_ARG (exp, 0);
13035 rtx op0 = expand_normal (arg0);
13036 machine_mode mode0 = insn_data[icode].operand[0].mode;
13037
13038 if (TARGET_32BIT)
13039 /* Builtin not supported in 32-bit mode. */
13040 fatal_error (input_location,
13041 "%<__builtin_set_fpscr_drn%> is not supported "
13042 "in 32-bit mode");
13043
13044 if (rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
13045 {
13046 error ("%<__builtin_set_fpscr_drn%> not supported with %<-msoft-float%>");
13047 return const0_rtx;
13048 }
13049
13050 if (icode == CODE_FOR_nothing)
13051 /* Builtin not supported on this processor. */
13052 return 0;
13053
13054 /* If we got invalid arguments bail out before generating bad rtl. */
13055 if (arg0 == error_mark_node)
13056 return const0_rtx;
13057
13058 /* If the argument is a constant, check the range. Agrument can only be a
13059 3-bit value. Unfortunately, can't check the range of the value at
13060 compile time if the argument is a variable. The least significant two
13061 bits of the argument, regardless of type, are used to set the rounding
13062 mode. All other bits are ignored. */
13063 if (CONST_INT_P (op0) && !const_0_to_7_operand(op0, VOIDmode))
13064 {
13065 error ("Argument must be a value between 0 and 7.");
13066 return const0_rtx;
13067 }
13068
13069 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13070 op0 = copy_to_mode_reg (mode0, op0);
13071
13072 pat = GEN_FCN (icode) (op0);
13073 if (! pat)
13074 return const0_rtx;
13075 emit_insn (pat);
13076
13077 return NULL_RTX;
13078 }
13079
13080 static rtx
13081 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13082 {
13083 rtx pat;
13084 tree arg0 = CALL_EXPR_ARG (exp, 0);
13085 rtx op0 = expand_normal (arg0);
13086 machine_mode tmode = insn_data[icode].operand[0].mode;
13087 machine_mode mode0 = insn_data[icode].operand[1].mode;
13088
13089 if (icode == CODE_FOR_nothing)
13090 /* Builtin not supported on this processor. */
13091 return 0;
13092
13093 /* If we got invalid arguments bail out before generating bad rtl. */
13094 if (arg0 == error_mark_node)
13095 return const0_rtx;
13096
13097 if (icode == CODE_FOR_altivec_vspltisb
13098 || icode == CODE_FOR_altivec_vspltish
13099 || icode == CODE_FOR_altivec_vspltisw)
13100 {
13101 /* Only allow 5-bit *signed* literals. */
13102 if (!CONST_INT_P (op0)
13103 || INTVAL (op0) > 15
13104 || INTVAL (op0) < -16)
13105 {
13106 error ("argument 1 must be a 5-bit signed literal");
13107 return CONST0_RTX (tmode);
13108 }
13109 }
13110
13111 if (target == 0
13112 || GET_MODE (target) != tmode
13113 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13114 target = gen_reg_rtx (tmode);
13115
13116 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13117 op0 = copy_to_mode_reg (mode0, op0);
13118
13119 pat = GEN_FCN (icode) (target, op0);
13120 if (! pat)
13121 return 0;
13122 emit_insn (pat);
13123
13124 return target;
13125 }
13126
13127 static rtx
13128 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13129 {
13130 rtx pat, scratch1, scratch2;
13131 tree arg0 = CALL_EXPR_ARG (exp, 0);
13132 rtx op0 = expand_normal (arg0);
13133 machine_mode tmode = insn_data[icode].operand[0].mode;
13134 machine_mode mode0 = insn_data[icode].operand[1].mode;
13135
13136 /* If we have invalid arguments, bail out before generating bad rtl. */
13137 if (arg0 == error_mark_node)
13138 return const0_rtx;
13139
13140 if (target == 0
13141 || GET_MODE (target) != tmode
13142 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13143 target = gen_reg_rtx (tmode);
13144
13145 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13146 op0 = copy_to_mode_reg (mode0, op0);
13147
13148 scratch1 = gen_reg_rtx (mode0);
13149 scratch2 = gen_reg_rtx (mode0);
13150
13151 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13152 if (! pat)
13153 return 0;
13154 emit_insn (pat);
13155
13156 return target;
13157 }
13158
13159 static rtx
13160 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13161 {
13162 rtx pat;
13163 tree arg0 = CALL_EXPR_ARG (exp, 0);
13164 tree arg1 = CALL_EXPR_ARG (exp, 1);
13165 rtx op0 = expand_normal (arg0);
13166 rtx op1 = expand_normal (arg1);
13167 machine_mode tmode = insn_data[icode].operand[0].mode;
13168 machine_mode mode0 = insn_data[icode].operand[1].mode;
13169 machine_mode mode1 = insn_data[icode].operand[2].mode;
13170
13171 if (icode == CODE_FOR_nothing)
13172 /* Builtin not supported on this processor. */
13173 return 0;
13174
13175 /* If we got invalid arguments bail out before generating bad rtl. */
13176 if (arg0 == error_mark_node || arg1 == error_mark_node)
13177 return const0_rtx;
13178
13179 if (icode == CODE_FOR_unpackv1ti
13180 || icode == CODE_FOR_unpackkf
13181 || icode == CODE_FOR_unpacktf
13182 || icode == CODE_FOR_unpackif
13183 || icode == CODE_FOR_unpacktd)
13184 {
13185 /* Only allow 1-bit unsigned literals. */
13186 STRIP_NOPS (arg1);
13187 if (TREE_CODE (arg1) != INTEGER_CST
13188 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13189 {
13190 error ("argument 2 must be a 1-bit unsigned literal");
13191 return CONST0_RTX (tmode);
13192 }
13193 }
13194 else if (icode == CODE_FOR_altivec_vspltw)
13195 {
13196 /* Only allow 2-bit unsigned literals. */
13197 STRIP_NOPS (arg1);
13198 if (TREE_CODE (arg1) != INTEGER_CST
13199 || TREE_INT_CST_LOW (arg1) & ~3)
13200 {
13201 error ("argument 2 must be a 2-bit unsigned literal");
13202 return CONST0_RTX (tmode);
13203 }
13204 }
13205 else if (icode == CODE_FOR_altivec_vsplth)
13206 {
13207 /* Only allow 3-bit unsigned literals. */
13208 STRIP_NOPS (arg1);
13209 if (TREE_CODE (arg1) != INTEGER_CST
13210 || TREE_INT_CST_LOW (arg1) & ~7)
13211 {
13212 error ("argument 2 must be a 3-bit unsigned literal");
13213 return CONST0_RTX (tmode);
13214 }
13215 }
13216 else if (icode == CODE_FOR_altivec_vspltb)
13217 {
13218 /* Only allow 4-bit unsigned literals. */
13219 STRIP_NOPS (arg1);
13220 if (TREE_CODE (arg1) != INTEGER_CST
13221 || TREE_INT_CST_LOW (arg1) & ~15)
13222 {
13223 error ("argument 2 must be a 4-bit unsigned literal");
13224 return CONST0_RTX (tmode);
13225 }
13226 }
13227 else if (icode == CODE_FOR_altivec_vcfux
13228 || icode == CODE_FOR_altivec_vcfsx
13229 || icode == CODE_FOR_altivec_vctsxs
13230 || icode == CODE_FOR_altivec_vctuxs)
13231 {
13232 /* Only allow 5-bit unsigned literals. */
13233 STRIP_NOPS (arg1);
13234 if (TREE_CODE (arg1) != INTEGER_CST
13235 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13236 {
13237 error ("argument 2 must be a 5-bit unsigned literal");
13238 return CONST0_RTX (tmode);
13239 }
13240 }
13241 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13242 || icode == CODE_FOR_dfptstsfi_lt_dd
13243 || icode == CODE_FOR_dfptstsfi_gt_dd
13244 || icode == CODE_FOR_dfptstsfi_unordered_dd
13245 || icode == CODE_FOR_dfptstsfi_eq_td
13246 || icode == CODE_FOR_dfptstsfi_lt_td
13247 || icode == CODE_FOR_dfptstsfi_gt_td
13248 || icode == CODE_FOR_dfptstsfi_unordered_td)
13249 {
13250 /* Only allow 6-bit unsigned literals. */
13251 STRIP_NOPS (arg0);
13252 if (TREE_CODE (arg0) != INTEGER_CST
13253 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13254 {
13255 error ("argument 1 must be a 6-bit unsigned literal");
13256 return CONST0_RTX (tmode);
13257 }
13258 }
13259 else if (icode == CODE_FOR_xststdcqp_kf
13260 || icode == CODE_FOR_xststdcqp_tf
13261 || icode == CODE_FOR_xststdcdp
13262 || icode == CODE_FOR_xststdcsp
13263 || icode == CODE_FOR_xvtstdcdp
13264 || icode == CODE_FOR_xvtstdcsp)
13265 {
13266 /* Only allow 7-bit unsigned literals. */
13267 STRIP_NOPS (arg1);
13268 if (TREE_CODE (arg1) != INTEGER_CST
13269 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13270 {
13271 error ("argument 2 must be a 7-bit unsigned literal");
13272 return CONST0_RTX (tmode);
13273 }
13274 }
13275
13276 if (target == 0
13277 || GET_MODE (target) != tmode
13278 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13279 target = gen_reg_rtx (tmode);
13280
13281 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13282 op0 = copy_to_mode_reg (mode0, op0);
13283 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13284 op1 = copy_to_mode_reg (mode1, op1);
13285
13286 pat = GEN_FCN (icode) (target, op0, op1);
13287 if (! pat)
13288 return 0;
13289 emit_insn (pat);
13290
13291 return target;
13292 }
13293
13294 static rtx
13295 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13296 {
13297 rtx pat, scratch;
13298 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13299 tree arg0 = CALL_EXPR_ARG (exp, 1);
13300 tree arg1 = CALL_EXPR_ARG (exp, 2);
13301 rtx op0 = expand_normal (arg0);
13302 rtx op1 = expand_normal (arg1);
13303 machine_mode tmode = SImode;
13304 machine_mode mode0 = insn_data[icode].operand[1].mode;
13305 machine_mode mode1 = insn_data[icode].operand[2].mode;
13306 int cr6_form_int;
13307
13308 if (TREE_CODE (cr6_form) != INTEGER_CST)
13309 {
13310 error ("argument 1 of %qs must be a constant",
13311 "__builtin_altivec_predicate");
13312 return const0_rtx;
13313 }
13314 else
13315 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13316
13317 gcc_assert (mode0 == mode1);
13318
13319 /* If we have invalid arguments, bail out before generating bad rtl. */
13320 if (arg0 == error_mark_node || arg1 == error_mark_node)
13321 return const0_rtx;
13322
13323 if (target == 0
13324 || GET_MODE (target) != tmode
13325 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13326 target = gen_reg_rtx (tmode);
13327
13328 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13329 op0 = copy_to_mode_reg (mode0, op0);
13330 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13331 op1 = copy_to_mode_reg (mode1, op1);
13332
13333 /* Note that for many of the relevant operations (e.g. cmpne or
13334 cmpeq) with float or double operands, it makes more sense for the
13335 mode of the allocated scratch register to select a vector of
13336 integer. But the choice to copy the mode of operand 0 was made
13337 long ago and there are no plans to change it. */
13338 scratch = gen_reg_rtx (mode0);
13339
13340 pat = GEN_FCN (icode) (scratch, op0, op1);
13341 if (! pat)
13342 return 0;
13343 emit_insn (pat);
13344
13345 /* The vec_any* and vec_all* predicates use the same opcodes for two
13346 different operations, but the bits in CR6 will be different
13347 depending on what information we want. So we have to play tricks
13348 with CR6 to get the right bits out.
13349
13350 If you think this is disgusting, look at the specs for the
13351 AltiVec predicates. */
13352
13353 switch (cr6_form_int)
13354 {
13355 case 0:
13356 emit_insn (gen_cr6_test_for_zero (target));
13357 break;
13358 case 1:
13359 emit_insn (gen_cr6_test_for_zero_reverse (target));
13360 break;
13361 case 2:
13362 emit_insn (gen_cr6_test_for_lt (target));
13363 break;
13364 case 3:
13365 emit_insn (gen_cr6_test_for_lt_reverse (target));
13366 break;
13367 default:
13368 error ("argument 1 of %qs is out of range",
13369 "__builtin_altivec_predicate");
13370 break;
13371 }
13372
13373 return target;
13374 }
13375
13376 rtx
13377 swap_endian_selector_for_mode (machine_mode mode)
13378 {
13379 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13380 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13381 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13382 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13383
13384 unsigned int *swaparray, i;
13385 rtx perm[16];
13386
13387 switch (mode)
13388 {
13389 case E_V1TImode:
13390 swaparray = swap1;
13391 break;
13392 case E_V2DFmode:
13393 case E_V2DImode:
13394 swaparray = swap2;
13395 break;
13396 case E_V4SFmode:
13397 case E_V4SImode:
13398 swaparray = swap4;
13399 break;
13400 case E_V8HImode:
13401 swaparray = swap8;
13402 break;
13403 default:
13404 gcc_unreachable ();
13405 }
13406
13407 for (i = 0; i < 16; ++i)
13408 perm[i] = GEN_INT (swaparray[i]);
13409
13410 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13411 gen_rtvec_v (16, perm)));
13412 }
13413
13414 static rtx
13415 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
13416 {
13417 rtx pat, addr;
13418 tree arg0 = CALL_EXPR_ARG (exp, 0);
13419 tree arg1 = CALL_EXPR_ARG (exp, 1);
13420 machine_mode tmode = insn_data[icode].operand[0].mode;
13421 machine_mode mode0 = Pmode;
13422 machine_mode mode1 = Pmode;
13423 rtx op0 = expand_normal (arg0);
13424 rtx op1 = expand_normal (arg1);
13425
13426 if (icode == CODE_FOR_nothing)
13427 /* Builtin not supported on this processor. */
13428 return 0;
13429
13430 /* If we got invalid arguments bail out before generating bad rtl. */
13431 if (arg0 == error_mark_node || arg1 == error_mark_node)
13432 return const0_rtx;
13433
13434 if (target == 0
13435 || GET_MODE (target) != tmode
13436 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13437 target = gen_reg_rtx (tmode);
13438
13439 op1 = copy_to_mode_reg (mode1, op1);
13440
13441 /* For LVX, express the RTL accurately by ANDing the address with -16.
13442 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
13443 so the raw address is fine. */
13444 if (icode == CODE_FOR_altivec_lvx_v1ti
13445 || icode == CODE_FOR_altivec_lvx_v2df
13446 || icode == CODE_FOR_altivec_lvx_v2di
13447 || icode == CODE_FOR_altivec_lvx_v4sf
13448 || icode == CODE_FOR_altivec_lvx_v4si
13449 || icode == CODE_FOR_altivec_lvx_v8hi
13450 || icode == CODE_FOR_altivec_lvx_v16qi)
13451 {
13452 rtx rawaddr;
13453 if (op0 == const0_rtx)
13454 rawaddr = op1;
13455 else
13456 {
13457 op0 = copy_to_mode_reg (mode0, op0);
13458 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
13459 }
13460 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13461 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
13462
13463 emit_insn (gen_rtx_SET (target, addr));
13464 }
13465 else
13466 {
13467 if (op0 == const0_rtx)
13468 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13469 else
13470 {
13471 op0 = copy_to_mode_reg (mode0, op0);
13472 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
13473 gen_rtx_PLUS (Pmode, op1, op0));
13474 }
13475
13476 pat = GEN_FCN (icode) (target, addr);
13477 if (! pat)
13478 return 0;
13479 emit_insn (pat);
13480 }
13481
13482 return target;
13483 }
13484
13485 static rtx
13486 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
13487 {
13488 rtx pat;
13489 tree arg0 = CALL_EXPR_ARG (exp, 0);
13490 tree arg1 = CALL_EXPR_ARG (exp, 1);
13491 tree arg2 = CALL_EXPR_ARG (exp, 2);
13492 rtx op0 = expand_normal (arg0);
13493 rtx op1 = expand_normal (arg1);
13494 rtx op2 = expand_normal (arg2);
13495 machine_mode mode0 = insn_data[icode].operand[0].mode;
13496 machine_mode mode1 = insn_data[icode].operand[1].mode;
13497 machine_mode mode2 = insn_data[icode].operand[2].mode;
13498
13499 if (icode == CODE_FOR_nothing)
13500 /* Builtin not supported on this processor. */
13501 return NULL_RTX;
13502
13503 /* If we got invalid arguments bail out before generating bad rtl. */
13504 if (arg0 == error_mark_node
13505 || arg1 == error_mark_node
13506 || arg2 == error_mark_node)
13507 return NULL_RTX;
13508
13509 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13510 op0 = copy_to_mode_reg (mode0, op0);
13511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13512 op1 = copy_to_mode_reg (mode1, op1);
13513 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13514 op2 = copy_to_mode_reg (mode2, op2);
13515
13516 pat = GEN_FCN (icode) (op0, op1, op2);
13517 if (pat)
13518 emit_insn (pat);
13519
13520 return NULL_RTX;
13521 }
13522
13523 static rtx
13524 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13525 {
13526 tree arg0 = CALL_EXPR_ARG (exp, 0);
13527 tree arg1 = CALL_EXPR_ARG (exp, 1);
13528 tree arg2 = CALL_EXPR_ARG (exp, 2);
13529 rtx op0 = expand_normal (arg0);
13530 rtx op1 = expand_normal (arg1);
13531 rtx op2 = expand_normal (arg2);
13532 rtx pat, addr, rawaddr;
13533 machine_mode tmode = insn_data[icode].operand[0].mode;
13534 machine_mode smode = insn_data[icode].operand[1].mode;
13535 machine_mode mode1 = Pmode;
13536 machine_mode mode2 = Pmode;
13537
13538 /* Invalid arguments. Bail before doing anything stoopid! */
13539 if (arg0 == error_mark_node
13540 || arg1 == error_mark_node
13541 || arg2 == error_mark_node)
13542 return const0_rtx;
13543
13544 op2 = copy_to_mode_reg (mode2, op2);
13545
13546 /* For STVX, express the RTL accurately by ANDing the address with -16.
13547 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
13548 so the raw address is fine. */
13549 if (icode == CODE_FOR_altivec_stvx_v2df
13550 || icode == CODE_FOR_altivec_stvx_v2di
13551 || icode == CODE_FOR_altivec_stvx_v4sf
13552 || icode == CODE_FOR_altivec_stvx_v4si
13553 || icode == CODE_FOR_altivec_stvx_v8hi
13554 || icode == CODE_FOR_altivec_stvx_v16qi)
13555 {
13556 if (op1 == const0_rtx)
13557 rawaddr = op2;
13558 else
13559 {
13560 op1 = copy_to_mode_reg (mode1, op1);
13561 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
13562 }
13563
13564 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
13565 addr = gen_rtx_MEM (tmode, addr);
13566
13567 op0 = copy_to_mode_reg (tmode, op0);
13568
13569 emit_insn (gen_rtx_SET (addr, op0));
13570 }
13571 else
13572 {
13573 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13574 op0 = copy_to_mode_reg (smode, op0);
13575
13576 if (op1 == const0_rtx)
13577 addr = gen_rtx_MEM (tmode, op2);
13578 else
13579 {
13580 op1 = copy_to_mode_reg (mode1, op1);
13581 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
13582 }
13583
13584 pat = GEN_FCN (icode) (addr, op0);
13585 if (pat)
13586 emit_insn (pat);
13587 }
13588
13589 return NULL_RTX;
13590 }
13591
13592 /* Return the appropriate SPR number associated with the given builtin. */
13593 static inline HOST_WIDE_INT
13594 htm_spr_num (enum rs6000_builtins code)
13595 {
13596 if (code == HTM_BUILTIN_GET_TFHAR
13597 || code == HTM_BUILTIN_SET_TFHAR)
13598 return TFHAR_SPR;
13599 else if (code == HTM_BUILTIN_GET_TFIAR
13600 || code == HTM_BUILTIN_SET_TFIAR)
13601 return TFIAR_SPR;
13602 else if (code == HTM_BUILTIN_GET_TEXASR
13603 || code == HTM_BUILTIN_SET_TEXASR)
13604 return TEXASR_SPR;
13605 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13606 || code == HTM_BUILTIN_SET_TEXASRU);
13607 return TEXASRU_SPR;
13608 }
13609
13610 /* Return the correct ICODE value depending on whether we are
13611 setting or reading the HTM SPRs. */
13612 static inline enum insn_code
13613 rs6000_htm_spr_icode (bool nonvoid)
13614 {
13615 if (nonvoid)
13616 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13617 else
13618 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13619 }
13620
13621 /* Expand the HTM builtin in EXP and store the result in TARGET.
13622 Store true in *EXPANDEDP if we found a builtin to expand. */
13623 static rtx
13624 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13625 {
13626 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13627 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13628 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13629 const struct builtin_description *d;
13630 size_t i;
13631
13632 *expandedp = true;
13633
13634 if (!TARGET_POWERPC64
13635 && (fcode == HTM_BUILTIN_TABORTDC
13636 || fcode == HTM_BUILTIN_TABORTDCI))
13637 {
13638 size_t uns_fcode = (size_t)fcode;
13639 const char *name = rs6000_builtin_info[uns_fcode].name;
13640 error ("builtin %qs is only valid in 64-bit mode", name);
13641 return const0_rtx;
13642 }
13643
13644 /* Expand the HTM builtins. */
13645 d = bdesc_htm;
13646 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13647 if (d->code == fcode)
13648 {
13649 rtx op[MAX_HTM_OPERANDS], pat;
13650 int nopnds = 0;
13651 tree arg;
13652 call_expr_arg_iterator iter;
13653 unsigned attr = rs6000_builtin_info[fcode].attr;
13654 enum insn_code icode = d->icode;
13655 const struct insn_operand_data *insn_op;
13656 bool uses_spr = (attr & RS6000_BTC_SPR);
13657 rtx cr = NULL_RTX;
13658
13659 if (uses_spr)
13660 icode = rs6000_htm_spr_icode (nonvoid);
13661 insn_op = &insn_data[icode].operand[0];
13662
13663 if (nonvoid)
13664 {
13665 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
13666 if (!target
13667 || GET_MODE (target) != tmode
13668 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13669 target = gen_reg_rtx (tmode);
13670 if (uses_spr)
13671 op[nopnds++] = target;
13672 }
13673
13674 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13675 {
13676 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13677 return const0_rtx;
13678
13679 insn_op = &insn_data[icode].operand[nopnds];
13680
13681 op[nopnds] = expand_normal (arg);
13682
13683 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13684 {
13685 if (!strcmp (insn_op->constraint, "n"))
13686 {
13687 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13688 if (!CONST_INT_P (op[nopnds]))
13689 error ("argument %d must be an unsigned literal", arg_num);
13690 else
13691 error ("argument %d is an unsigned literal that is "
13692 "out of range", arg_num);
13693 return const0_rtx;
13694 }
13695 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13696 }
13697
13698 nopnds++;
13699 }
13700
13701 /* Handle the builtins for extended mnemonics. These accept
13702 no arguments, but map to builtins that take arguments. */
13703 switch (fcode)
13704 {
13705 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13706 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13707 op[nopnds++] = GEN_INT (1);
13708 if (flag_checking)
13709 attr |= RS6000_BTC_UNARY;
13710 break;
13711 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13712 op[nopnds++] = GEN_INT (0);
13713 if (flag_checking)
13714 attr |= RS6000_BTC_UNARY;
13715 break;
13716 default:
13717 break;
13718 }
13719
13720 /* If this builtin accesses SPRs, then pass in the appropriate
13721 SPR number and SPR regno as the last two operands. */
13722 if (uses_spr)
13723 {
13724 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13725 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13726 }
13727 /* If this builtin accesses a CR, then pass in a scratch
13728 CR as the last operand. */
13729 else if (attr & RS6000_BTC_CR)
13730 { cr = gen_reg_rtx (CCmode);
13731 op[nopnds++] = cr;
13732 }
13733
13734 if (flag_checking)
13735 {
13736 int expected_nopnds = 0;
13737 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13738 expected_nopnds = 1;
13739 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13740 expected_nopnds = 2;
13741 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13742 expected_nopnds = 3;
13743 if (!(attr & RS6000_BTC_VOID))
13744 expected_nopnds += 1;
13745 if (uses_spr)
13746 expected_nopnds += 1;
13747
13748 gcc_assert (nopnds == expected_nopnds
13749 && nopnds <= MAX_HTM_OPERANDS);
13750 }
13751
13752 switch (nopnds)
13753 {
13754 case 1:
13755 pat = GEN_FCN (icode) (op[0]);
13756 break;
13757 case 2:
13758 pat = GEN_FCN (icode) (op[0], op[1]);
13759 break;
13760 case 3:
13761 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13762 break;
13763 case 4:
13764 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13765 break;
13766 default:
13767 gcc_unreachable ();
13768 }
13769 if (!pat)
13770 return NULL_RTX;
13771 emit_insn (pat);
13772
13773 if (attr & RS6000_BTC_CR)
13774 {
13775 if (fcode == HTM_BUILTIN_TBEGIN)
13776 {
13777 /* Emit code to set TARGET to true or false depending on
13778 whether the tbegin. instruction successfully or failed
13779 to start a transaction. We do this by placing the 1's
13780 complement of CR's EQ bit into TARGET. */
13781 rtx scratch = gen_reg_rtx (SImode);
13782 emit_insn (gen_rtx_SET (scratch,
13783 gen_rtx_EQ (SImode, cr,
13784 const0_rtx)));
13785 emit_insn (gen_rtx_SET (target,
13786 gen_rtx_XOR (SImode, scratch,
13787 GEN_INT (1))));
13788 }
13789 else
13790 {
13791 /* Emit code to copy the 4-bit condition register field
13792 CR into the least significant end of register TARGET. */
13793 rtx scratch1 = gen_reg_rtx (SImode);
13794 rtx scratch2 = gen_reg_rtx (SImode);
13795 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13796 emit_insn (gen_movcc (subreg, cr));
13797 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13798 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13799 }
13800 }
13801
13802 if (nonvoid)
13803 return target;
13804 return const0_rtx;
13805 }
13806
13807 *expandedp = false;
13808 return NULL_RTX;
13809 }
13810
13811 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
13812
13813 static rtx
13814 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
13815 rtx target)
13816 {
13817 /* __builtin_cpu_init () is a nop, so expand to nothing. */
13818 if (fcode == RS6000_BUILTIN_CPU_INIT)
13819 return const0_rtx;
13820
13821 if (target == 0 || GET_MODE (target) != SImode)
13822 target = gen_reg_rtx (SImode);
13823
13824 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
13825 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
13826 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
13827 to a STRING_CST. */
13828 if (TREE_CODE (arg) == ARRAY_REF
13829 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
13830 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
13831 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
13832 arg = TREE_OPERAND (arg, 0);
13833
13834 if (TREE_CODE (arg) != STRING_CST)
13835 {
13836 error ("builtin %qs only accepts a string argument",
13837 rs6000_builtin_info[(size_t) fcode].name);
13838 return const0_rtx;
13839 }
13840
13841 if (fcode == RS6000_BUILTIN_CPU_IS)
13842 {
13843 const char *cpu = TREE_STRING_POINTER (arg);
13844 rtx cpuid = NULL_RTX;
13845 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
13846 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
13847 {
13848 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
13849 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
13850 break;
13851 }
13852 if (cpuid == NULL_RTX)
13853 {
13854 /* Invalid CPU argument. */
13855 error ("cpu %qs is an invalid argument to builtin %qs",
13856 cpu, rs6000_builtin_info[(size_t) fcode].name);
13857 return const0_rtx;
13858 }
13859
13860 rtx platform = gen_reg_rtx (SImode);
13861 rtx tcbmem = gen_const_mem (SImode,
13862 gen_rtx_PLUS (Pmode,
13863 gen_rtx_REG (Pmode, TLS_REGNUM),
13864 GEN_INT (TCB_PLATFORM_OFFSET)));
13865 emit_move_insn (platform, tcbmem);
13866 emit_insn (gen_eqsi3 (target, platform, cpuid));
13867 }
13868 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
13869 {
13870 const char *hwcap = TREE_STRING_POINTER (arg);
13871 rtx mask = NULL_RTX;
13872 int hwcap_offset;
13873 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
13874 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
13875 {
13876 mask = GEN_INT (cpu_supports_info[i].mask);
13877 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
13878 break;
13879 }
13880 if (mask == NULL_RTX)
13881 {
13882 /* Invalid HWCAP argument. */
13883 error ("%s %qs is an invalid argument to builtin %qs",
13884 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
13885 return const0_rtx;
13886 }
13887
13888 rtx tcb_hwcap = gen_reg_rtx (SImode);
13889 rtx tcbmem = gen_const_mem (SImode,
13890 gen_rtx_PLUS (Pmode,
13891 gen_rtx_REG (Pmode, TLS_REGNUM),
13892 GEN_INT (hwcap_offset)));
13893 emit_move_insn (tcb_hwcap, tcbmem);
13894 rtx scratch1 = gen_reg_rtx (SImode);
13895 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
13896 rtx scratch2 = gen_reg_rtx (SImode);
13897 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
13898 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
13899 }
13900 else
13901 gcc_unreachable ();
13902
13903 /* Record that we have expanded a CPU builtin, so that we can later
13904 emit a reference to the special symbol exported by LIBC to ensure we
13905 do not link against an old LIBC that doesn't support this feature. */
13906 cpu_builtin_p = true;
13907
13908 #else
13909 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
13910 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
13911
13912 /* For old LIBCs, always return FALSE. */
13913 emit_move_insn (target, GEN_INT (0));
13914 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
13915
13916 return target;
13917 }
13918
13919 static rtx
13920 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13921 {
13922 rtx pat;
13923 tree arg0 = CALL_EXPR_ARG (exp, 0);
13924 tree arg1 = CALL_EXPR_ARG (exp, 1);
13925 tree arg2 = CALL_EXPR_ARG (exp, 2);
13926 rtx op0 = expand_normal (arg0);
13927 rtx op1 = expand_normal (arg1);
13928 rtx op2 = expand_normal (arg2);
13929 machine_mode tmode = insn_data[icode].operand[0].mode;
13930 machine_mode mode0 = insn_data[icode].operand[1].mode;
13931 machine_mode mode1 = insn_data[icode].operand[2].mode;
13932 machine_mode mode2 = insn_data[icode].operand[3].mode;
13933
13934 if (icode == CODE_FOR_nothing)
13935 /* Builtin not supported on this processor. */
13936 return 0;
13937
13938 /* If we got invalid arguments bail out before generating bad rtl. */
13939 if (arg0 == error_mark_node
13940 || arg1 == error_mark_node
13941 || arg2 == error_mark_node)
13942 return const0_rtx;
13943
13944 /* Check and prepare argument depending on the instruction code.
13945
13946 Note that a switch statement instead of the sequence of tests
13947 would be incorrect as many of the CODE_FOR values could be
13948 CODE_FOR_nothing and that would yield multiple alternatives
13949 with identical values. We'd never reach here at runtime in
13950 this case. */
13951 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13952 || icode == CODE_FOR_altivec_vsldoi_v2df
13953 || icode == CODE_FOR_altivec_vsldoi_v4si
13954 || icode == CODE_FOR_altivec_vsldoi_v8hi
13955 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13956 {
13957 /* Only allow 4-bit unsigned literals. */
13958 STRIP_NOPS (arg2);
13959 if (TREE_CODE (arg2) != INTEGER_CST
13960 || TREE_INT_CST_LOW (arg2) & ~0xf)
13961 {
13962 error ("argument 3 must be a 4-bit unsigned literal");
13963 return CONST0_RTX (tmode);
13964 }
13965 }
13966 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13967 || icode == CODE_FOR_vsx_xxpermdi_v2di
13968 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
13969 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
13970 || icode == CODE_FOR_vsx_xxpermdi_v1ti
13971 || icode == CODE_FOR_vsx_xxpermdi_v4sf
13972 || icode == CODE_FOR_vsx_xxpermdi_v4si
13973 || icode == CODE_FOR_vsx_xxpermdi_v8hi
13974 || icode == CODE_FOR_vsx_xxpermdi_v16qi
13975 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13976 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13977 || icode == CODE_FOR_vsx_xxsldwi_v4si
13978 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13979 || icode == CODE_FOR_vsx_xxsldwi_v2di
13980 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13981 {
13982 /* Only allow 2-bit unsigned literals. */
13983 STRIP_NOPS (arg2);
13984 if (TREE_CODE (arg2) != INTEGER_CST
13985 || TREE_INT_CST_LOW (arg2) & ~0x3)
13986 {
13987 error ("argument 3 must be a 2-bit unsigned literal");
13988 return CONST0_RTX (tmode);
13989 }
13990 }
13991 else if (icode == CODE_FOR_vsx_set_v2df
13992 || icode == CODE_FOR_vsx_set_v2di
13993 || icode == CODE_FOR_bcdadd
13994 || icode == CODE_FOR_bcdadd_lt
13995 || icode == CODE_FOR_bcdadd_eq
13996 || icode == CODE_FOR_bcdadd_gt
13997 || icode == CODE_FOR_bcdsub
13998 || icode == CODE_FOR_bcdsub_lt
13999 || icode == CODE_FOR_bcdsub_eq
14000 || icode == CODE_FOR_bcdsub_gt)
14001 {
14002 /* Only allow 1-bit unsigned literals. */
14003 STRIP_NOPS (arg2);
14004 if (TREE_CODE (arg2) != INTEGER_CST
14005 || TREE_INT_CST_LOW (arg2) & ~0x1)
14006 {
14007 error ("argument 3 must be a 1-bit unsigned literal");
14008 return CONST0_RTX (tmode);
14009 }
14010 }
14011 else if (icode == CODE_FOR_dfp_ddedpd_dd
14012 || icode == CODE_FOR_dfp_ddedpd_td)
14013 {
14014 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14015 STRIP_NOPS (arg0);
14016 if (TREE_CODE (arg0) != INTEGER_CST
14017 || TREE_INT_CST_LOW (arg2) & ~0x3)
14018 {
14019 error ("argument 1 must be 0 or 2");
14020 return CONST0_RTX (tmode);
14021 }
14022 }
14023 else if (icode == CODE_FOR_dfp_denbcd_dd
14024 || icode == CODE_FOR_dfp_denbcd_td)
14025 {
14026 /* Only allow 1-bit unsigned literals. */
14027 STRIP_NOPS (arg0);
14028 if (TREE_CODE (arg0) != INTEGER_CST
14029 || TREE_INT_CST_LOW (arg0) & ~0x1)
14030 {
14031 error ("argument 1 must be a 1-bit unsigned literal");
14032 return CONST0_RTX (tmode);
14033 }
14034 }
14035 else if (icode == CODE_FOR_dfp_dscli_dd
14036 || icode == CODE_FOR_dfp_dscli_td
14037 || icode == CODE_FOR_dfp_dscri_dd
14038 || icode == CODE_FOR_dfp_dscri_td)
14039 {
14040 /* Only allow 6-bit unsigned literals. */
14041 STRIP_NOPS (arg1);
14042 if (TREE_CODE (arg1) != INTEGER_CST
14043 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14044 {
14045 error ("argument 2 must be a 6-bit unsigned literal");
14046 return CONST0_RTX (tmode);
14047 }
14048 }
14049 else if (icode == CODE_FOR_crypto_vshasigmaw
14050 || icode == CODE_FOR_crypto_vshasigmad)
14051 {
14052 /* Check whether the 2nd and 3rd arguments are integer constants and in
14053 range and prepare arguments. */
14054 STRIP_NOPS (arg1);
14055 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14056 {
14057 error ("argument 2 must be 0 or 1");
14058 return CONST0_RTX (tmode);
14059 }
14060
14061 STRIP_NOPS (arg2);
14062 if (TREE_CODE (arg2) != INTEGER_CST
14063 || wi::geu_p (wi::to_wide (arg2), 16))
14064 {
14065 error ("argument 3 must be in the range [0, 15]");
14066 return CONST0_RTX (tmode);
14067 }
14068 }
14069
14070 if (target == 0
14071 || GET_MODE (target) != tmode
14072 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14073 target = gen_reg_rtx (tmode);
14074
14075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14076 op0 = copy_to_mode_reg (mode0, op0);
14077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14078 op1 = copy_to_mode_reg (mode1, op1);
14079 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14080 op2 = copy_to_mode_reg (mode2, op2);
14081
14082 pat = GEN_FCN (icode) (target, op0, op1, op2);
14083 if (! pat)
14084 return 0;
14085 emit_insn (pat);
14086
14087 return target;
14088 }
14089
14090
14091 /* Expand the dst builtins. */
14092 static rtx
14093 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14094 bool *expandedp)
14095 {
14096 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14097 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14098 tree arg0, arg1, arg2;
14099 machine_mode mode0, mode1;
14100 rtx pat, op0, op1, op2;
14101 const struct builtin_description *d;
14102 size_t i;
14103
14104 *expandedp = false;
14105
14106 /* Handle DST variants. */
14107 d = bdesc_dst;
14108 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14109 if (d->code == fcode)
14110 {
14111 arg0 = CALL_EXPR_ARG (exp, 0);
14112 arg1 = CALL_EXPR_ARG (exp, 1);
14113 arg2 = CALL_EXPR_ARG (exp, 2);
14114 op0 = expand_normal (arg0);
14115 op1 = expand_normal (arg1);
14116 op2 = expand_normal (arg2);
14117 mode0 = insn_data[d->icode].operand[0].mode;
14118 mode1 = insn_data[d->icode].operand[1].mode;
14119
14120 /* Invalid arguments, bail out before generating bad rtl. */
14121 if (arg0 == error_mark_node
14122 || arg1 == error_mark_node
14123 || arg2 == error_mark_node)
14124 return const0_rtx;
14125
14126 *expandedp = true;
14127 STRIP_NOPS (arg2);
14128 if (TREE_CODE (arg2) != INTEGER_CST
14129 || TREE_INT_CST_LOW (arg2) & ~0x3)
14130 {
14131 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14132 return const0_rtx;
14133 }
14134
14135 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14136 op0 = copy_to_mode_reg (Pmode, op0);
14137 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14138 op1 = copy_to_mode_reg (mode1, op1);
14139
14140 pat = GEN_FCN (d->icode) (op0, op1, op2);
14141 if (pat != 0)
14142 emit_insn (pat);
14143
14144 return NULL_RTX;
14145 }
14146
14147 return NULL_RTX;
14148 }
14149
14150 /* Expand vec_init builtin. */
14151 static rtx
14152 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14153 {
14154 machine_mode tmode = TYPE_MODE (type);
14155 machine_mode inner_mode = GET_MODE_INNER (tmode);
14156 int i, n_elt = GET_MODE_NUNITS (tmode);
14157
14158 gcc_assert (VECTOR_MODE_P (tmode));
14159 gcc_assert (n_elt == call_expr_nargs (exp));
14160
14161 if (!target || !register_operand (target, tmode))
14162 target = gen_reg_rtx (tmode);
14163
14164 /* If we have a vector compromised of a single element, such as V1TImode, do
14165 the initialization directly. */
14166 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14167 {
14168 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14169 emit_move_insn (target, gen_lowpart (tmode, x));
14170 }
14171 else
14172 {
14173 rtvec v = rtvec_alloc (n_elt);
14174
14175 for (i = 0; i < n_elt; ++i)
14176 {
14177 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14178 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14179 }
14180
14181 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14182 }
14183
14184 return target;
14185 }
14186
14187 /* Return the integer constant in ARG. Constrain it to be in the range
14188 of the subparts of VEC_TYPE; issue an error if not. */
14189
14190 static int
14191 get_element_number (tree vec_type, tree arg)
14192 {
14193 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14194
14195 if (!tree_fits_uhwi_p (arg)
14196 || (elt = tree_to_uhwi (arg), elt > max))
14197 {
14198 error ("selector must be an integer constant in the range [0, %wi]", max);
14199 return 0;
14200 }
14201
14202 return elt;
14203 }
14204
14205 /* Expand vec_set builtin. */
14206 static rtx
14207 altivec_expand_vec_set_builtin (tree exp)
14208 {
14209 machine_mode tmode, mode1;
14210 tree arg0, arg1, arg2;
14211 int elt;
14212 rtx op0, op1;
14213
14214 arg0 = CALL_EXPR_ARG (exp, 0);
14215 arg1 = CALL_EXPR_ARG (exp, 1);
14216 arg2 = CALL_EXPR_ARG (exp, 2);
14217
14218 tmode = TYPE_MODE (TREE_TYPE (arg0));
14219 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14220 gcc_assert (VECTOR_MODE_P (tmode));
14221
14222 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14223 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14224 elt = get_element_number (TREE_TYPE (arg0), arg2);
14225
14226 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14227 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14228
14229 op0 = force_reg (tmode, op0);
14230 op1 = force_reg (mode1, op1);
14231
14232 rs6000_expand_vector_set (op0, op1, elt);
14233
14234 return op0;
14235 }
14236
14237 /* Expand vec_ext builtin. */
14238 static rtx
14239 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14240 {
14241 machine_mode tmode, mode0;
14242 tree arg0, arg1;
14243 rtx op0;
14244 rtx op1;
14245
14246 arg0 = CALL_EXPR_ARG (exp, 0);
14247 arg1 = CALL_EXPR_ARG (exp, 1);
14248
14249 op0 = expand_normal (arg0);
14250 op1 = expand_normal (arg1);
14251
14252 if (TREE_CODE (arg1) == INTEGER_CST)
14253 {
14254 unsigned HOST_WIDE_INT elt;
14255 unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
14256 unsigned int truncated_selector;
14257 /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0)
14258 returns low-order bits of INTEGER_CST for modulo indexing. */
14259 elt = TREE_INT_CST_LOW (arg1);
14260 truncated_selector = elt % size;
14261 op1 = GEN_INT (truncated_selector);
14262 }
14263
14264 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14265 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14266 gcc_assert (VECTOR_MODE_P (mode0));
14267
14268 op0 = force_reg (mode0, op0);
14269
14270 if (optimize || !target || !register_operand (target, tmode))
14271 target = gen_reg_rtx (tmode);
14272
14273 rs6000_expand_vector_extract (target, op0, op1);
14274
14275 return target;
14276 }
14277
14278 /* Expand the builtin in EXP and store the result in TARGET. Store
14279 true in *EXPANDEDP if we found a builtin to expand. */
14280 static rtx
14281 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14282 {
14283 const struct builtin_description *d;
14284 size_t i;
14285 enum insn_code icode;
14286 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14287 tree arg0, arg1, arg2;
14288 rtx op0, pat;
14289 machine_mode tmode, mode0;
14290 enum rs6000_builtins fcode
14291 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14292
14293 if (rs6000_overloaded_builtin_p (fcode))
14294 {
14295 *expandedp = true;
14296 error ("unresolved overload for Altivec builtin %qF", fndecl);
14297
14298 /* Given it is invalid, just generate a normal call. */
14299 return expand_call (exp, target, false);
14300 }
14301
14302 target = altivec_expand_dst_builtin (exp, target, expandedp);
14303 if (*expandedp)
14304 return target;
14305
14306 *expandedp = true;
14307
14308 switch (fcode)
14309 {
14310 case ALTIVEC_BUILTIN_STVX_V2DF:
14311 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14312 case ALTIVEC_BUILTIN_STVX_V2DI:
14313 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14314 case ALTIVEC_BUILTIN_STVX_V4SF:
14315 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14316 case ALTIVEC_BUILTIN_STVX:
14317 case ALTIVEC_BUILTIN_STVX_V4SI:
14318 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14319 case ALTIVEC_BUILTIN_STVX_V8HI:
14320 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14321 case ALTIVEC_BUILTIN_STVX_V16QI:
14322 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14323 case ALTIVEC_BUILTIN_STVEBX:
14324 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14325 case ALTIVEC_BUILTIN_STVEHX:
14326 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14327 case ALTIVEC_BUILTIN_STVEWX:
14328 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14329 case ALTIVEC_BUILTIN_STVXL_V2DF:
14330 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14331 case ALTIVEC_BUILTIN_STVXL_V2DI:
14332 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14333 case ALTIVEC_BUILTIN_STVXL_V4SF:
14334 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14335 case ALTIVEC_BUILTIN_STVXL:
14336 case ALTIVEC_BUILTIN_STVXL_V4SI:
14337 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14338 case ALTIVEC_BUILTIN_STVXL_V8HI:
14339 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14340 case ALTIVEC_BUILTIN_STVXL_V16QI:
14341 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14342
14343 case ALTIVEC_BUILTIN_STVLX:
14344 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14345 case ALTIVEC_BUILTIN_STVLXL:
14346 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14347 case ALTIVEC_BUILTIN_STVRX:
14348 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14349 case ALTIVEC_BUILTIN_STVRXL:
14350 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14351
14352 case P9V_BUILTIN_STXVL:
14353 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14354
14355 case P9V_BUILTIN_XST_LEN_R:
14356 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14357
14358 case VSX_BUILTIN_STXVD2X_V1TI:
14359 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14360 case VSX_BUILTIN_STXVD2X_V2DF:
14361 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14362 case VSX_BUILTIN_STXVD2X_V2DI:
14363 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14364 case VSX_BUILTIN_STXVW4X_V4SF:
14365 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14366 case VSX_BUILTIN_STXVW4X_V4SI:
14367 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14368 case VSX_BUILTIN_STXVW4X_V8HI:
14369 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14370 case VSX_BUILTIN_STXVW4X_V16QI:
14371 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14372
14373 /* For the following on big endian, it's ok to use any appropriate
14374 unaligned-supporting store, so use a generic expander. For
14375 little-endian, the exact element-reversing instruction must
14376 be used. */
14377 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14378 {
14379 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14380 : CODE_FOR_vsx_st_elemrev_v1ti);
14381 return altivec_expand_stv_builtin (code, exp);
14382 }
14383 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14384 {
14385 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14386 : CODE_FOR_vsx_st_elemrev_v2df);
14387 return altivec_expand_stv_builtin (code, exp);
14388 }
14389 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14390 {
14391 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14392 : CODE_FOR_vsx_st_elemrev_v2di);
14393 return altivec_expand_stv_builtin (code, exp);
14394 }
14395 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14396 {
14397 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14398 : CODE_FOR_vsx_st_elemrev_v4sf);
14399 return altivec_expand_stv_builtin (code, exp);
14400 }
14401 case VSX_BUILTIN_ST_ELEMREV_V4SI:
14402 {
14403 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
14404 : CODE_FOR_vsx_st_elemrev_v4si);
14405 return altivec_expand_stv_builtin (code, exp);
14406 }
14407 case VSX_BUILTIN_ST_ELEMREV_V8HI:
14408 {
14409 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
14410 : CODE_FOR_vsx_st_elemrev_v8hi);
14411 return altivec_expand_stv_builtin (code, exp);
14412 }
14413 case VSX_BUILTIN_ST_ELEMREV_V16QI:
14414 {
14415 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
14416 : CODE_FOR_vsx_st_elemrev_v16qi);
14417 return altivec_expand_stv_builtin (code, exp);
14418 }
14419
14420 case ALTIVEC_BUILTIN_MFVSCR:
14421 icode = CODE_FOR_altivec_mfvscr;
14422 tmode = insn_data[icode].operand[0].mode;
14423
14424 if (target == 0
14425 || GET_MODE (target) != tmode
14426 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14427 target = gen_reg_rtx (tmode);
14428
14429 pat = GEN_FCN (icode) (target);
14430 if (! pat)
14431 return 0;
14432 emit_insn (pat);
14433 return target;
14434
14435 case ALTIVEC_BUILTIN_MTVSCR:
14436 icode = CODE_FOR_altivec_mtvscr;
14437 arg0 = CALL_EXPR_ARG (exp, 0);
14438 op0 = expand_normal (arg0);
14439 mode0 = insn_data[icode].operand[0].mode;
14440
14441 /* If we got invalid arguments bail out before generating bad rtl. */
14442 if (arg0 == error_mark_node)
14443 return const0_rtx;
14444
14445 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14446 op0 = copy_to_mode_reg (mode0, op0);
14447
14448 pat = GEN_FCN (icode) (op0);
14449 if (pat)
14450 emit_insn (pat);
14451 return NULL_RTX;
14452
14453 case ALTIVEC_BUILTIN_DSSALL:
14454 emit_insn (gen_altivec_dssall ());
14455 return NULL_RTX;
14456
14457 case ALTIVEC_BUILTIN_DSS:
14458 icode = CODE_FOR_altivec_dss;
14459 arg0 = CALL_EXPR_ARG (exp, 0);
14460 STRIP_NOPS (arg0);
14461 op0 = expand_normal (arg0);
14462 mode0 = insn_data[icode].operand[0].mode;
14463
14464 /* If we got invalid arguments bail out before generating bad rtl. */
14465 if (arg0 == error_mark_node)
14466 return const0_rtx;
14467
14468 if (TREE_CODE (arg0) != INTEGER_CST
14469 || TREE_INT_CST_LOW (arg0) & ~0x3)
14470 {
14471 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
14472 return const0_rtx;
14473 }
14474
14475 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14476 op0 = copy_to_mode_reg (mode0, op0);
14477
14478 emit_insn (gen_altivec_dss (op0));
14479 return NULL_RTX;
14480
14481 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
14482 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
14483 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
14484 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
14485 case VSX_BUILTIN_VEC_INIT_V2DF:
14486 case VSX_BUILTIN_VEC_INIT_V2DI:
14487 case VSX_BUILTIN_VEC_INIT_V1TI:
14488 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14489
14490 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14491 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14492 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14493 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14494 case VSX_BUILTIN_VEC_SET_V2DF:
14495 case VSX_BUILTIN_VEC_SET_V2DI:
14496 case VSX_BUILTIN_VEC_SET_V1TI:
14497 return altivec_expand_vec_set_builtin (exp);
14498
14499 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14500 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14501 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14502 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14503 case VSX_BUILTIN_VEC_EXT_V2DF:
14504 case VSX_BUILTIN_VEC_EXT_V2DI:
14505 case VSX_BUILTIN_VEC_EXT_V1TI:
14506 return altivec_expand_vec_ext_builtin (exp, target);
14507
14508 case P9V_BUILTIN_VEC_EXTRACT4B:
14509 arg1 = CALL_EXPR_ARG (exp, 1);
14510 STRIP_NOPS (arg1);
14511
14512 /* Generate a normal call if it is invalid. */
14513 if (arg1 == error_mark_node)
14514 return expand_call (exp, target, false);
14515
14516 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
14517 {
14518 error ("second argument to %qs must be [0, 12]", "vec_vextract4b");
14519 return expand_call (exp, target, false);
14520 }
14521 break;
14522
14523 case P9V_BUILTIN_VEC_INSERT4B:
14524 arg2 = CALL_EXPR_ARG (exp, 2);
14525 STRIP_NOPS (arg2);
14526
14527 /* Generate a normal call if it is invalid. */
14528 if (arg2 == error_mark_node)
14529 return expand_call (exp, target, false);
14530
14531 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
14532 {
14533 error ("third argument to %qs must be [0, 12]", "vec_vinsert4b");
14534 return expand_call (exp, target, false);
14535 }
14536 break;
14537
14538 default:
14539 break;
14540 /* Fall through. */
14541 }
14542
14543 /* Expand abs* operations. */
14544 d = bdesc_abs;
14545 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14546 if (d->code == fcode)
14547 return altivec_expand_abs_builtin (d->icode, exp, target);
14548
14549 /* Expand the AltiVec predicates. */
14550 d = bdesc_altivec_preds;
14551 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14552 if (d->code == fcode)
14553 return altivec_expand_predicate_builtin (d->icode, exp, target);
14554
14555 /* LV* are funky. We initialized them differently. */
14556 switch (fcode)
14557 {
14558 case ALTIVEC_BUILTIN_LVSL:
14559 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14560 exp, target, false);
14561 case ALTIVEC_BUILTIN_LVSR:
14562 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14563 exp, target, false);
14564 case ALTIVEC_BUILTIN_LVEBX:
14565 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14566 exp, target, false);
14567 case ALTIVEC_BUILTIN_LVEHX:
14568 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14569 exp, target, false);
14570 case ALTIVEC_BUILTIN_LVEWX:
14571 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14572 exp, target, false);
14573 case ALTIVEC_BUILTIN_LVXL_V2DF:
14574 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14575 exp, target, false);
14576 case ALTIVEC_BUILTIN_LVXL_V2DI:
14577 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14578 exp, target, false);
14579 case ALTIVEC_BUILTIN_LVXL_V4SF:
14580 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14581 exp, target, false);
14582 case ALTIVEC_BUILTIN_LVXL:
14583 case ALTIVEC_BUILTIN_LVXL_V4SI:
14584 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14585 exp, target, false);
14586 case ALTIVEC_BUILTIN_LVXL_V8HI:
14587 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14588 exp, target, false);
14589 case ALTIVEC_BUILTIN_LVXL_V16QI:
14590 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14591 exp, target, false);
14592 case ALTIVEC_BUILTIN_LVX_V1TI:
14593 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
14594 exp, target, false);
14595 case ALTIVEC_BUILTIN_LVX_V2DF:
14596 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14597 exp, target, false);
14598 case ALTIVEC_BUILTIN_LVX_V2DI:
14599 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14600 exp, target, false);
14601 case ALTIVEC_BUILTIN_LVX_V4SF:
14602 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14603 exp, target, false);
14604 case ALTIVEC_BUILTIN_LVX:
14605 case ALTIVEC_BUILTIN_LVX_V4SI:
14606 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14607 exp, target, false);
14608 case ALTIVEC_BUILTIN_LVX_V8HI:
14609 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14610 exp, target, false);
14611 case ALTIVEC_BUILTIN_LVX_V16QI:
14612 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14613 exp, target, false);
14614 case ALTIVEC_BUILTIN_LVLX:
14615 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14616 exp, target, true);
14617 case ALTIVEC_BUILTIN_LVLXL:
14618 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14619 exp, target, true);
14620 case ALTIVEC_BUILTIN_LVRX:
14621 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14622 exp, target, true);
14623 case ALTIVEC_BUILTIN_LVRXL:
14624 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14625 exp, target, true);
14626 case VSX_BUILTIN_LXVD2X_V1TI:
14627 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14628 exp, target, false);
14629 case VSX_BUILTIN_LXVD2X_V2DF:
14630 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14631 exp, target, false);
14632 case VSX_BUILTIN_LXVD2X_V2DI:
14633 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14634 exp, target, false);
14635 case VSX_BUILTIN_LXVW4X_V4SF:
14636 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14637 exp, target, false);
14638 case VSX_BUILTIN_LXVW4X_V4SI:
14639 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14640 exp, target, false);
14641 case VSX_BUILTIN_LXVW4X_V8HI:
14642 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14643 exp, target, false);
14644 case VSX_BUILTIN_LXVW4X_V16QI:
14645 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14646 exp, target, false);
14647 /* For the following on big endian, it's ok to use any appropriate
14648 unaligned-supporting load, so use a generic expander. For
14649 little-endian, the exact element-reversing instruction must
14650 be used. */
14651 case VSX_BUILTIN_LD_ELEMREV_V2DF:
14652 {
14653 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
14654 : CODE_FOR_vsx_ld_elemrev_v2df);
14655 return altivec_expand_lv_builtin (code, exp, target, false);
14656 }
14657 case VSX_BUILTIN_LD_ELEMREV_V1TI:
14658 {
14659 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
14660 : CODE_FOR_vsx_ld_elemrev_v1ti);
14661 return altivec_expand_lv_builtin (code, exp, target, false);
14662 }
14663 case VSX_BUILTIN_LD_ELEMREV_V2DI:
14664 {
14665 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
14666 : CODE_FOR_vsx_ld_elemrev_v2di);
14667 return altivec_expand_lv_builtin (code, exp, target, false);
14668 }
14669 case VSX_BUILTIN_LD_ELEMREV_V4SF:
14670 {
14671 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
14672 : CODE_FOR_vsx_ld_elemrev_v4sf);
14673 return altivec_expand_lv_builtin (code, exp, target, false);
14674 }
14675 case VSX_BUILTIN_LD_ELEMREV_V4SI:
14676 {
14677 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
14678 : CODE_FOR_vsx_ld_elemrev_v4si);
14679 return altivec_expand_lv_builtin (code, exp, target, false);
14680 }
14681 case VSX_BUILTIN_LD_ELEMREV_V8HI:
14682 {
14683 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
14684 : CODE_FOR_vsx_ld_elemrev_v8hi);
14685 return altivec_expand_lv_builtin (code, exp, target, false);
14686 }
14687 case VSX_BUILTIN_LD_ELEMREV_V16QI:
14688 {
14689 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
14690 : CODE_FOR_vsx_ld_elemrev_v16qi);
14691 return altivec_expand_lv_builtin (code, exp, target, false);
14692 }
14693 break;
14694 default:
14695 break;
14696 /* Fall through. */
14697 }
14698
14699 *expandedp = false;
14700 return NULL_RTX;
14701 }
14702
14703 /* Check whether a builtin function is supported in this target
14704 configuration. */
14705 bool
14706 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
14707 {
14708 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
14709 if ((fnmask & rs6000_builtin_mask) != fnmask)
14710 return false;
14711 else
14712 return true;
14713 }
14714
14715 /* Raise an error message for a builtin function that is called without the
14716 appropriate target options being set. */
14717
14718 static void
14719 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14720 {
14721 size_t uns_fncode = (size_t) fncode;
14722 const char *name = rs6000_builtin_info[uns_fncode].name;
14723 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14724
14725 gcc_assert (name != NULL);
14726 if ((fnmask & RS6000_BTM_CELL) != 0)
14727 error ("builtin function %qs is only valid for the cell processor", name);
14728 else if ((fnmask & RS6000_BTM_VSX) != 0)
14729 error ("builtin function %qs requires the %qs option", name, "-mvsx");
14730 else if ((fnmask & RS6000_BTM_HTM) != 0)
14731 error ("builtin function %qs requires the %qs option", name, "-mhtm");
14732 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14733 error ("builtin function %qs requires the %qs option", name, "-maltivec");
14734 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14735 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14736 error ("builtin function %qs requires the %qs and %qs options",
14737 name, "-mhard-dfp", "-mpower8-vector");
14738 else if ((fnmask & RS6000_BTM_DFP) != 0)
14739 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
14740 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14741 error ("builtin function %qs requires the %qs option", name,
14742 "-mpower8-vector");
14743 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14744 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
14745 error ("builtin function %qs requires the %qs and %qs options",
14746 name, "-mcpu=power9", "-m64");
14747 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
14748 error ("builtin function %qs requires the %qs option", name,
14749 "-mcpu=power9");
14750 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14751 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
14752 error ("builtin function %qs requires the %qs and %qs options",
14753 name, "-mcpu=power9", "-m64");
14754 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
14755 error ("builtin function %qs requires the %qs option", name,
14756 "-mcpu=power9");
14757 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
14758 {
14759 if (!TARGET_HARD_FLOAT)
14760 error ("builtin function %qs requires the %qs option", name,
14761 "-mhard-float");
14762 else
14763 error ("builtin function %qs requires the %qs option", name,
14764 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
14765 }
14766 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14767 error ("builtin function %qs requires the %qs option", name,
14768 "-mhard-float");
14769 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
14770 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
14771 name);
14772 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
14773 error ("builtin function %qs requires the %qs option", name,
14774 "%<-mfloat128%>");
14775 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14776 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
14777 error ("builtin function %qs requires the %qs (or newer), and "
14778 "%qs or %qs options",
14779 name, "-mcpu=power7", "-m64", "-mpowerpc64");
14780 else
14781 error ("builtin function %qs is not supported with the current options",
14782 name);
14783 }
14784
14785 /* Target hook for early folding of built-ins, shamelessly stolen
14786 from ia64.c. */
14787
14788 static tree
14789 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
14790 int n_args ATTRIBUTE_UNUSED,
14791 tree *args ATTRIBUTE_UNUSED,
14792 bool ignore ATTRIBUTE_UNUSED)
14793 {
14794 #ifdef SUBTARGET_FOLD_BUILTIN
14795 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
14796 #else
14797 return NULL_TREE;
14798 #endif
14799 }
14800
14801 /* Helper function to sort out which built-ins may be valid without having
14802 a LHS. */
14803 static bool
14804 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
14805 {
14806 switch (fn_code)
14807 {
14808 case ALTIVEC_BUILTIN_STVX_V16QI:
14809 case ALTIVEC_BUILTIN_STVX_V8HI:
14810 case ALTIVEC_BUILTIN_STVX_V4SI:
14811 case ALTIVEC_BUILTIN_STVX_V4SF:
14812 case ALTIVEC_BUILTIN_STVX_V2DI:
14813 case ALTIVEC_BUILTIN_STVX_V2DF:
14814 case VSX_BUILTIN_STXVW4X_V16QI:
14815 case VSX_BUILTIN_STXVW4X_V8HI:
14816 case VSX_BUILTIN_STXVW4X_V4SF:
14817 case VSX_BUILTIN_STXVW4X_V4SI:
14818 case VSX_BUILTIN_STXVD2X_V2DF:
14819 case VSX_BUILTIN_STXVD2X_V2DI:
14820 return true;
14821 default:
14822 return false;
14823 }
14824 }
14825
14826 /* Helper function to handle the gimple folding of a vector compare
14827 operation. This sets up true/false vectors, and uses the
14828 VEC_COND_EXPR operation.
14829 CODE indicates which comparison is to be made. (EQ, GT, ...).
14830 TYPE indicates the type of the result. */
14831 static tree
14832 fold_build_vec_cmp (tree_code code, tree type,
14833 tree arg0, tree arg1)
14834 {
14835 tree cmp_type = build_same_sized_truth_vector_type (type);
14836 tree zero_vec = build_zero_cst (type);
14837 tree minus_one_vec = build_minus_one_cst (type);
14838 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
14839 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
14840 }
14841
14842 /* Helper function to handle the in-between steps for the
14843 vector compare built-ins. */
14844 static void
14845 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
14846 {
14847 tree arg0 = gimple_call_arg (stmt, 0);
14848 tree arg1 = gimple_call_arg (stmt, 1);
14849 tree lhs = gimple_call_lhs (stmt);
14850 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
14851 gimple *g = gimple_build_assign (lhs, cmp);
14852 gimple_set_location (g, gimple_location (stmt));
14853 gsi_replace (gsi, g, true);
14854 }
14855
14856 /* Helper function to map V2DF and V4SF types to their
14857 integral equivalents (V2DI and V4SI). */
14858 tree map_to_integral_tree_type (tree input_tree_type)
14859 {
14860 if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type)))
14861 return input_tree_type;
14862 else
14863 {
14864 if (types_compatible_p (TREE_TYPE (input_tree_type),
14865 TREE_TYPE (V2DF_type_node)))
14866 return V2DI_type_node;
14867 else if (types_compatible_p (TREE_TYPE (input_tree_type),
14868 TREE_TYPE (V4SF_type_node)))
14869 return V4SI_type_node;
14870 else
14871 gcc_unreachable ();
14872 }
14873 }
14874
14875 /* Helper function to handle the vector merge[hl] built-ins. The
14876 implementation difference between h and l versions for this code are in
14877 the values used when building of the permute vector for high word versus
14878 low word merge. The variance is keyed off the use_high parameter. */
14879 static void
14880 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
14881 {
14882 tree arg0 = gimple_call_arg (stmt, 0);
14883 tree arg1 = gimple_call_arg (stmt, 1);
14884 tree lhs = gimple_call_lhs (stmt);
14885 tree lhs_type = TREE_TYPE (lhs);
14886 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14887 int midpoint = n_elts / 2;
14888 int offset = 0;
14889
14890 if (use_high == 1)
14891 offset = midpoint;
14892
14893 /* The permute_type will match the lhs for integral types. For double and
14894 float types, the permute type needs to map to the V2 or V4 type that
14895 matches size. */
14896 tree permute_type;
14897 permute_type = map_to_integral_tree_type (lhs_type);
14898 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14899
14900 for (int i = 0; i < midpoint; i++)
14901 {
14902 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14903 offset + i));
14904 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14905 offset + n_elts + i));
14906 }
14907
14908 tree permute = elts.build ();
14909
14910 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14911 gimple_set_location (g, gimple_location (stmt));
14912 gsi_replace (gsi, g, true);
14913 }
14914
14915 /* Helper function to handle the vector merge[eo] built-ins. */
14916 static void
14917 fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd)
14918 {
14919 tree arg0 = gimple_call_arg (stmt, 0);
14920 tree arg1 = gimple_call_arg (stmt, 1);
14921 tree lhs = gimple_call_lhs (stmt);
14922 tree lhs_type = TREE_TYPE (lhs);
14923 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
14924
14925 /* The permute_type will match the lhs for integral types. For double and
14926 float types, the permute type needs to map to the V2 or V4 type that
14927 matches size. */
14928 tree permute_type;
14929 permute_type = map_to_integral_tree_type (lhs_type);
14930
14931 tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1);
14932
14933 /* Build the permute vector. */
14934 for (int i = 0; i < n_elts / 2; i++)
14935 {
14936 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14937 2*i + use_odd));
14938 elts.safe_push (build_int_cst (TREE_TYPE (permute_type),
14939 2*i + use_odd + n_elts));
14940 }
14941
14942 tree permute = elts.build ();
14943
14944 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
14945 gimple_set_location (g, gimple_location (stmt));
14946 gsi_replace (gsi, g, true);
14947 }
14948
14949 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
14950 a constant, use rs6000_fold_builtin.) */
14951
14952 bool
14953 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
14954 {
14955 gimple *stmt = gsi_stmt (*gsi);
14956 tree fndecl = gimple_call_fndecl (stmt);
14957 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
14958 enum rs6000_builtins fn_code
14959 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14960 tree arg0, arg1, lhs, temp;
14961 enum tree_code bcode;
14962 gimple *g;
14963
14964 size_t uns_fncode = (size_t) fn_code;
14965 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
14966 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
14967 const char *fn_name2 = (icode != CODE_FOR_nothing)
14968 ? get_insn_name ((int) icode)
14969 : "nothing";
14970
14971 if (TARGET_DEBUG_BUILTIN)
14972 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
14973 fn_code, fn_name1, fn_name2);
14974
14975 if (!rs6000_fold_gimple)
14976 return false;
14977
14978 /* Prevent gimple folding for code that does not have a LHS, unless it is
14979 allowed per the rs6000_builtin_valid_without_lhs helper function. */
14980 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
14981 return false;
14982
14983 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
14984 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
14985 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
14986 if (!func_valid_p)
14987 return false;
14988
14989 switch (fn_code)
14990 {
14991 /* Flavors of vec_add. We deliberately don't expand
14992 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
14993 TImode, resulting in much poorer code generation. */
14994 case ALTIVEC_BUILTIN_VADDUBM:
14995 case ALTIVEC_BUILTIN_VADDUHM:
14996 case ALTIVEC_BUILTIN_VADDUWM:
14997 case P8V_BUILTIN_VADDUDM:
14998 case ALTIVEC_BUILTIN_VADDFP:
14999 case VSX_BUILTIN_XVADDDP:
15000 bcode = PLUS_EXPR;
15001 do_binary:
15002 arg0 = gimple_call_arg (stmt, 0);
15003 arg1 = gimple_call_arg (stmt, 1);
15004 lhs = gimple_call_lhs (stmt);
15005 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs)))
15006 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs))))
15007 {
15008 /* Ensure the binary operation is performed in a type
15009 that wraps if it is integral type. */
15010 gimple_seq stmts = NULL;
15011 tree type = unsigned_type_for (TREE_TYPE (lhs));
15012 tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15013 type, arg0);
15014 tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15015 type, arg1);
15016 tree res = gimple_build (&stmts, gimple_location (stmt), bcode,
15017 type, uarg0, uarg1);
15018 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15019 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR,
15020 build1 (VIEW_CONVERT_EXPR,
15021 TREE_TYPE (lhs), res));
15022 gsi_replace (gsi, g, true);
15023 return true;
15024 }
15025 g = gimple_build_assign (lhs, bcode, arg0, arg1);
15026 gimple_set_location (g, gimple_location (stmt));
15027 gsi_replace (gsi, g, true);
15028 return true;
15029 /* Flavors of vec_sub. We deliberately don't expand
15030 P8V_BUILTIN_VSUBUQM. */
15031 case ALTIVEC_BUILTIN_VSUBUBM:
15032 case ALTIVEC_BUILTIN_VSUBUHM:
15033 case ALTIVEC_BUILTIN_VSUBUWM:
15034 case P8V_BUILTIN_VSUBUDM:
15035 case ALTIVEC_BUILTIN_VSUBFP:
15036 case VSX_BUILTIN_XVSUBDP:
15037 bcode = MINUS_EXPR;
15038 goto do_binary;
15039 case VSX_BUILTIN_XVMULSP:
15040 case VSX_BUILTIN_XVMULDP:
15041 arg0 = gimple_call_arg (stmt, 0);
15042 arg1 = gimple_call_arg (stmt, 1);
15043 lhs = gimple_call_lhs (stmt);
15044 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15045 gimple_set_location (g, gimple_location (stmt));
15046 gsi_replace (gsi, g, true);
15047 return true;
15048 /* Even element flavors of vec_mul (signed). */
15049 case ALTIVEC_BUILTIN_VMULESB:
15050 case ALTIVEC_BUILTIN_VMULESH:
15051 case P8V_BUILTIN_VMULESW:
15052 /* Even element flavors of vec_mul (unsigned). */
15053 case ALTIVEC_BUILTIN_VMULEUB:
15054 case ALTIVEC_BUILTIN_VMULEUH:
15055 case P8V_BUILTIN_VMULEUW:
15056 arg0 = gimple_call_arg (stmt, 0);
15057 arg1 = gimple_call_arg (stmt, 1);
15058 lhs = gimple_call_lhs (stmt);
15059 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15060 gimple_set_location (g, gimple_location (stmt));
15061 gsi_replace (gsi, g, true);
15062 return true;
15063 /* Odd element flavors of vec_mul (signed). */
15064 case ALTIVEC_BUILTIN_VMULOSB:
15065 case ALTIVEC_BUILTIN_VMULOSH:
15066 case P8V_BUILTIN_VMULOSW:
15067 /* Odd element flavors of vec_mul (unsigned). */
15068 case ALTIVEC_BUILTIN_VMULOUB:
15069 case ALTIVEC_BUILTIN_VMULOUH:
15070 case P8V_BUILTIN_VMULOUW:
15071 arg0 = gimple_call_arg (stmt, 0);
15072 arg1 = gimple_call_arg (stmt, 1);
15073 lhs = gimple_call_lhs (stmt);
15074 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15075 gimple_set_location (g, gimple_location (stmt));
15076 gsi_replace (gsi, g, true);
15077 return true;
15078 /* Flavors of vec_div (Integer). */
15079 case VSX_BUILTIN_DIV_V2DI:
15080 case VSX_BUILTIN_UDIV_V2DI:
15081 arg0 = gimple_call_arg (stmt, 0);
15082 arg1 = gimple_call_arg (stmt, 1);
15083 lhs = gimple_call_lhs (stmt);
15084 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15085 gimple_set_location (g, gimple_location (stmt));
15086 gsi_replace (gsi, g, true);
15087 return true;
15088 /* Flavors of vec_div (Float). */
15089 case VSX_BUILTIN_XVDIVSP:
15090 case VSX_BUILTIN_XVDIVDP:
15091 arg0 = gimple_call_arg (stmt, 0);
15092 arg1 = gimple_call_arg (stmt, 1);
15093 lhs = gimple_call_lhs (stmt);
15094 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15095 gimple_set_location (g, gimple_location (stmt));
15096 gsi_replace (gsi, g, true);
15097 return true;
15098 /* Flavors of vec_and. */
15099 case ALTIVEC_BUILTIN_VAND:
15100 arg0 = gimple_call_arg (stmt, 0);
15101 arg1 = gimple_call_arg (stmt, 1);
15102 lhs = gimple_call_lhs (stmt);
15103 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15104 gimple_set_location (g, gimple_location (stmt));
15105 gsi_replace (gsi, g, true);
15106 return true;
15107 /* Flavors of vec_andc. */
15108 case ALTIVEC_BUILTIN_VANDC:
15109 arg0 = gimple_call_arg (stmt, 0);
15110 arg1 = gimple_call_arg (stmt, 1);
15111 lhs = gimple_call_lhs (stmt);
15112 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15113 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15114 gimple_set_location (g, gimple_location (stmt));
15115 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15116 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15117 gimple_set_location (g, gimple_location (stmt));
15118 gsi_replace (gsi, g, true);
15119 return true;
15120 /* Flavors of vec_nand. */
15121 case P8V_BUILTIN_VEC_NAND:
15122 case P8V_BUILTIN_NAND_V16QI:
15123 case P8V_BUILTIN_NAND_V8HI:
15124 case P8V_BUILTIN_NAND_V4SI:
15125 case P8V_BUILTIN_NAND_V4SF:
15126 case P8V_BUILTIN_NAND_V2DF:
15127 case P8V_BUILTIN_NAND_V2DI:
15128 arg0 = gimple_call_arg (stmt, 0);
15129 arg1 = gimple_call_arg (stmt, 1);
15130 lhs = gimple_call_lhs (stmt);
15131 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15132 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15133 gimple_set_location (g, gimple_location (stmt));
15134 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15135 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15136 gimple_set_location (g, gimple_location (stmt));
15137 gsi_replace (gsi, g, true);
15138 return true;
15139 /* Flavors of vec_or. */
15140 case ALTIVEC_BUILTIN_VOR:
15141 arg0 = gimple_call_arg (stmt, 0);
15142 arg1 = gimple_call_arg (stmt, 1);
15143 lhs = gimple_call_lhs (stmt);
15144 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15145 gimple_set_location (g, gimple_location (stmt));
15146 gsi_replace (gsi, g, true);
15147 return true;
15148 /* flavors of vec_orc. */
15149 case P8V_BUILTIN_ORC_V16QI:
15150 case P8V_BUILTIN_ORC_V8HI:
15151 case P8V_BUILTIN_ORC_V4SI:
15152 case P8V_BUILTIN_ORC_V4SF:
15153 case P8V_BUILTIN_ORC_V2DF:
15154 case P8V_BUILTIN_ORC_V2DI:
15155 arg0 = gimple_call_arg (stmt, 0);
15156 arg1 = gimple_call_arg (stmt, 1);
15157 lhs = gimple_call_lhs (stmt);
15158 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15159 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15160 gimple_set_location (g, gimple_location (stmt));
15161 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15162 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15163 gimple_set_location (g, gimple_location (stmt));
15164 gsi_replace (gsi, g, true);
15165 return true;
15166 /* Flavors of vec_xor. */
15167 case ALTIVEC_BUILTIN_VXOR:
15168 arg0 = gimple_call_arg (stmt, 0);
15169 arg1 = gimple_call_arg (stmt, 1);
15170 lhs = gimple_call_lhs (stmt);
15171 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15172 gimple_set_location (g, gimple_location (stmt));
15173 gsi_replace (gsi, g, true);
15174 return true;
15175 /* Flavors of vec_nor. */
15176 case ALTIVEC_BUILTIN_VNOR:
15177 arg0 = gimple_call_arg (stmt, 0);
15178 arg1 = gimple_call_arg (stmt, 1);
15179 lhs = gimple_call_lhs (stmt);
15180 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15181 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15182 gimple_set_location (g, gimple_location (stmt));
15183 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15184 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15185 gimple_set_location (g, gimple_location (stmt));
15186 gsi_replace (gsi, g, true);
15187 return true;
15188 /* flavors of vec_abs. */
15189 case ALTIVEC_BUILTIN_ABS_V16QI:
15190 case ALTIVEC_BUILTIN_ABS_V8HI:
15191 case ALTIVEC_BUILTIN_ABS_V4SI:
15192 case ALTIVEC_BUILTIN_ABS_V4SF:
15193 case P8V_BUILTIN_ABS_V2DI:
15194 case VSX_BUILTIN_XVABSDP:
15195 arg0 = gimple_call_arg (stmt, 0);
15196 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15197 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15198 return false;
15199 lhs = gimple_call_lhs (stmt);
15200 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15201 gimple_set_location (g, gimple_location (stmt));
15202 gsi_replace (gsi, g, true);
15203 return true;
15204 /* flavors of vec_min. */
15205 case VSX_BUILTIN_XVMINDP:
15206 case P8V_BUILTIN_VMINSD:
15207 case P8V_BUILTIN_VMINUD:
15208 case ALTIVEC_BUILTIN_VMINSB:
15209 case ALTIVEC_BUILTIN_VMINSH:
15210 case ALTIVEC_BUILTIN_VMINSW:
15211 case ALTIVEC_BUILTIN_VMINUB:
15212 case ALTIVEC_BUILTIN_VMINUH:
15213 case ALTIVEC_BUILTIN_VMINUW:
15214 case ALTIVEC_BUILTIN_VMINFP:
15215 arg0 = gimple_call_arg (stmt, 0);
15216 arg1 = gimple_call_arg (stmt, 1);
15217 lhs = gimple_call_lhs (stmt);
15218 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15219 gimple_set_location (g, gimple_location (stmt));
15220 gsi_replace (gsi, g, true);
15221 return true;
15222 /* flavors of vec_max. */
15223 case VSX_BUILTIN_XVMAXDP:
15224 case P8V_BUILTIN_VMAXSD:
15225 case P8V_BUILTIN_VMAXUD:
15226 case ALTIVEC_BUILTIN_VMAXSB:
15227 case ALTIVEC_BUILTIN_VMAXSH:
15228 case ALTIVEC_BUILTIN_VMAXSW:
15229 case ALTIVEC_BUILTIN_VMAXUB:
15230 case ALTIVEC_BUILTIN_VMAXUH:
15231 case ALTIVEC_BUILTIN_VMAXUW:
15232 case ALTIVEC_BUILTIN_VMAXFP:
15233 arg0 = gimple_call_arg (stmt, 0);
15234 arg1 = gimple_call_arg (stmt, 1);
15235 lhs = gimple_call_lhs (stmt);
15236 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15237 gimple_set_location (g, gimple_location (stmt));
15238 gsi_replace (gsi, g, true);
15239 return true;
15240 /* Flavors of vec_eqv. */
15241 case P8V_BUILTIN_EQV_V16QI:
15242 case P8V_BUILTIN_EQV_V8HI:
15243 case P8V_BUILTIN_EQV_V4SI:
15244 case P8V_BUILTIN_EQV_V4SF:
15245 case P8V_BUILTIN_EQV_V2DF:
15246 case P8V_BUILTIN_EQV_V2DI:
15247 arg0 = gimple_call_arg (stmt, 0);
15248 arg1 = gimple_call_arg (stmt, 1);
15249 lhs = gimple_call_lhs (stmt);
15250 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15251 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15252 gimple_set_location (g, gimple_location (stmt));
15253 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15254 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15255 gimple_set_location (g, gimple_location (stmt));
15256 gsi_replace (gsi, g, true);
15257 return true;
15258 /* Flavors of vec_rotate_left. */
15259 case ALTIVEC_BUILTIN_VRLB:
15260 case ALTIVEC_BUILTIN_VRLH:
15261 case ALTIVEC_BUILTIN_VRLW:
15262 case P8V_BUILTIN_VRLD:
15263 arg0 = gimple_call_arg (stmt, 0);
15264 arg1 = gimple_call_arg (stmt, 1);
15265 lhs = gimple_call_lhs (stmt);
15266 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15267 gimple_set_location (g, gimple_location (stmt));
15268 gsi_replace (gsi, g, true);
15269 return true;
15270 /* Flavors of vector shift right algebraic.
15271 vec_sra{b,h,w} -> vsra{b,h,w}. */
15272 case ALTIVEC_BUILTIN_VSRAB:
15273 case ALTIVEC_BUILTIN_VSRAH:
15274 case ALTIVEC_BUILTIN_VSRAW:
15275 case P8V_BUILTIN_VSRAD:
15276 {
15277 arg0 = gimple_call_arg (stmt, 0);
15278 arg1 = gimple_call_arg (stmt, 1);
15279 lhs = gimple_call_lhs (stmt);
15280 tree arg1_type = TREE_TYPE (arg1);
15281 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15282 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15283 location_t loc = gimple_location (stmt);
15284 /* Force arg1 into the range valid matching the arg0 type. */
15285 /* Build a vector consisting of the max valid bit-size values. */
15286 int n_elts = VECTOR_CST_NELTS (arg1);
15287 tree element_size = build_int_cst (unsigned_element_type,
15288 128 / n_elts);
15289 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15290 for (int i = 0; i < n_elts; i++)
15291 elts.safe_push (element_size);
15292 tree modulo_tree = elts.build ();
15293 /* Modulo the provided shift value against that vector. */
15294 gimple_seq stmts = NULL;
15295 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15296 unsigned_arg1_type, arg1);
15297 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15298 unsigned_arg1_type, unsigned_arg1,
15299 modulo_tree);
15300 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15301 /* And finally, do the shift. */
15302 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
15303 gimple_set_location (g, loc);
15304 gsi_replace (gsi, g, true);
15305 return true;
15306 }
15307 /* Flavors of vector shift left.
15308 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15309 case ALTIVEC_BUILTIN_VSLB:
15310 case ALTIVEC_BUILTIN_VSLH:
15311 case ALTIVEC_BUILTIN_VSLW:
15312 case P8V_BUILTIN_VSLD:
15313 {
15314 location_t loc;
15315 gimple_seq stmts = NULL;
15316 arg0 = gimple_call_arg (stmt, 0);
15317 tree arg0_type = TREE_TYPE (arg0);
15318 if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type))
15319 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type)))
15320 return false;
15321 arg1 = gimple_call_arg (stmt, 1);
15322 tree arg1_type = TREE_TYPE (arg1);
15323 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15324 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15325 loc = gimple_location (stmt);
15326 lhs = gimple_call_lhs (stmt);
15327 /* Force arg1 into the range valid matching the arg0 type. */
15328 /* Build a vector consisting of the max valid bit-size values. */
15329 int n_elts = VECTOR_CST_NELTS (arg1);
15330 int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type))
15331 * BITS_PER_UNIT;
15332 tree element_size = build_int_cst (unsigned_element_type,
15333 tree_size_in_bits / n_elts);
15334 tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1);
15335 for (int i = 0; i < n_elts; i++)
15336 elts.safe_push (element_size);
15337 tree modulo_tree = elts.build ();
15338 /* Modulo the provided shift value against that vector. */
15339 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15340 unsigned_arg1_type, arg1);
15341 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15342 unsigned_arg1_type, unsigned_arg1,
15343 modulo_tree);
15344 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15345 /* And finally, do the shift. */
15346 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1);
15347 gimple_set_location (g, gimple_location (stmt));
15348 gsi_replace (gsi, g, true);
15349 return true;
15350 }
15351 /* Flavors of vector shift right. */
15352 case ALTIVEC_BUILTIN_VSRB:
15353 case ALTIVEC_BUILTIN_VSRH:
15354 case ALTIVEC_BUILTIN_VSRW:
15355 case P8V_BUILTIN_VSRD:
15356 {
15357 arg0 = gimple_call_arg (stmt, 0);
15358 arg1 = gimple_call_arg (stmt, 1);
15359 lhs = gimple_call_lhs (stmt);
15360 tree arg1_type = TREE_TYPE (arg1);
15361 tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
15362 tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
15363 location_t loc = gimple_location (stmt);
15364 gimple_seq stmts = NULL;
15365 /* Convert arg0 to unsigned. */
15366 tree arg0_unsigned
15367 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15368 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15369 /* Force arg1 into the range valid matching the arg0 type. */
15370 /* Build a vector consisting of the max valid bit-size values. */
15371 int n_elts = VECTOR_CST_NELTS (arg1);
15372 tree element_size = build_int_cst (unsigned_element_type,
15373 128 / n_elts);
15374 tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
15375 for (int i = 0; i < n_elts; i++)
15376 elts.safe_push (element_size);
15377 tree modulo_tree = elts.build ();
15378 /* Modulo the provided shift value against that vector. */
15379 tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15380 unsigned_arg1_type, arg1);
15381 tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
15382 unsigned_arg1_type, unsigned_arg1,
15383 modulo_tree);
15384 /* Do the shift. */
15385 tree res
15386 = gimple_build (&stmts, RSHIFT_EXPR,
15387 TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1);
15388 /* Convert result back to the lhs type. */
15389 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15390 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15391 update_call_from_tree (gsi, res);
15392 return true;
15393 }
15394 /* Vector loads. */
15395 case ALTIVEC_BUILTIN_LVX_V16QI:
15396 case ALTIVEC_BUILTIN_LVX_V8HI:
15397 case ALTIVEC_BUILTIN_LVX_V4SI:
15398 case ALTIVEC_BUILTIN_LVX_V4SF:
15399 case ALTIVEC_BUILTIN_LVX_V2DI:
15400 case ALTIVEC_BUILTIN_LVX_V2DF:
15401 case ALTIVEC_BUILTIN_LVX_V1TI:
15402 {
15403 arg0 = gimple_call_arg (stmt, 0); // offset
15404 arg1 = gimple_call_arg (stmt, 1); // address
15405 lhs = gimple_call_lhs (stmt);
15406 location_t loc = gimple_location (stmt);
15407 /* Since arg1 may be cast to a different type, just use ptr_type_node
15408 here instead of trying to enforce TBAA on pointer types. */
15409 tree arg1_type = ptr_type_node;
15410 tree lhs_type = TREE_TYPE (lhs);
15411 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15412 the tree using the value from arg0. The resulting type will match
15413 the type of arg1. */
15414 gimple_seq stmts = NULL;
15415 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15416 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15417 arg1_type, arg1, temp_offset);
15418 /* Mask off any lower bits from the address. */
15419 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15420 arg1_type, temp_addr,
15421 build_int_cst (arg1_type, -16));
15422 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15423 if (!is_gimple_mem_ref_addr (aligned_addr))
15424 {
15425 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15426 gimple *g = gimple_build_assign (t, aligned_addr);
15427 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15428 aligned_addr = t;
15429 }
15430 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15431 take an offset, but since we've already incorporated the offset
15432 above, here we just pass in a zero. */
15433 gimple *g
15434 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15435 build_int_cst (arg1_type, 0)));
15436 gimple_set_location (g, loc);
15437 gsi_replace (gsi, g, true);
15438 return true;
15439 }
15440 /* Vector stores. */
15441 case ALTIVEC_BUILTIN_STVX_V16QI:
15442 case ALTIVEC_BUILTIN_STVX_V8HI:
15443 case ALTIVEC_BUILTIN_STVX_V4SI:
15444 case ALTIVEC_BUILTIN_STVX_V4SF:
15445 case ALTIVEC_BUILTIN_STVX_V2DI:
15446 case ALTIVEC_BUILTIN_STVX_V2DF:
15447 {
15448 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15449 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15450 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15451 location_t loc = gimple_location (stmt);
15452 tree arg0_type = TREE_TYPE (arg0);
15453 /* Use ptr_type_node (no TBAA) for the arg2_type.
15454 FIXME: (Richard) "A proper fix would be to transition this type as
15455 seen from the frontend to GIMPLE, for example in a similar way we
15456 do for MEM_REFs by piggy-backing that on an extra argument, a
15457 constant zero pointer of the alias pointer type to use (which would
15458 also serve as a type indicator of the store itself). I'd use a
15459 target specific internal function for this (not sure if we can have
15460 those target specific, but I guess if it's folded away then that's
15461 fine) and get away with the overload set." */
15462 tree arg2_type = ptr_type_node;
15463 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15464 the tree using the value from arg0. The resulting type will match
15465 the type of arg2. */
15466 gimple_seq stmts = NULL;
15467 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15468 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15469 arg2_type, arg2, temp_offset);
15470 /* Mask off any lower bits from the address. */
15471 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15472 arg2_type, temp_addr,
15473 build_int_cst (arg2_type, -16));
15474 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15475 if (!is_gimple_mem_ref_addr (aligned_addr))
15476 {
15477 tree t = make_ssa_name (TREE_TYPE (aligned_addr));
15478 gimple *g = gimple_build_assign (t, aligned_addr);
15479 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15480 aligned_addr = t;
15481 }
15482 /* The desired gimple result should be similar to:
15483 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15484 gimple *g
15485 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15486 build_int_cst (arg2_type, 0)), arg0);
15487 gimple_set_location (g, loc);
15488 gsi_replace (gsi, g, true);
15489 return true;
15490 }
15491
15492 /* unaligned Vector loads. */
15493 case VSX_BUILTIN_LXVW4X_V16QI:
15494 case VSX_BUILTIN_LXVW4X_V8HI:
15495 case VSX_BUILTIN_LXVW4X_V4SF:
15496 case VSX_BUILTIN_LXVW4X_V4SI:
15497 case VSX_BUILTIN_LXVD2X_V2DF:
15498 case VSX_BUILTIN_LXVD2X_V2DI:
15499 {
15500 arg0 = gimple_call_arg (stmt, 0); // offset
15501 arg1 = gimple_call_arg (stmt, 1); // address
15502 lhs = gimple_call_lhs (stmt);
15503 location_t loc = gimple_location (stmt);
15504 /* Since arg1 may be cast to a different type, just use ptr_type_node
15505 here instead of trying to enforce TBAA on pointer types. */
15506 tree arg1_type = ptr_type_node;
15507 tree lhs_type = TREE_TYPE (lhs);
15508 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15509 required alignment (power) is 4 bytes regardless of data type. */
15510 tree align_ltype = build_aligned_type (lhs_type, 4);
15511 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15512 the tree using the value from arg0. The resulting type will match
15513 the type of arg1. */
15514 gimple_seq stmts = NULL;
15515 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15516 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15517 arg1_type, arg1, temp_offset);
15518 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15519 if (!is_gimple_mem_ref_addr (temp_addr))
15520 {
15521 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15522 gimple *g = gimple_build_assign (t, temp_addr);
15523 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15524 temp_addr = t;
15525 }
15526 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15527 take an offset, but since we've already incorporated the offset
15528 above, here we just pass in a zero. */
15529 gimple *g;
15530 g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
15531 build_int_cst (arg1_type, 0)));
15532 gimple_set_location (g, loc);
15533 gsi_replace (gsi, g, true);
15534 return true;
15535 }
15536
15537 /* unaligned Vector stores. */
15538 case VSX_BUILTIN_STXVW4X_V16QI:
15539 case VSX_BUILTIN_STXVW4X_V8HI:
15540 case VSX_BUILTIN_STXVW4X_V4SF:
15541 case VSX_BUILTIN_STXVW4X_V4SI:
15542 case VSX_BUILTIN_STXVD2X_V2DF:
15543 case VSX_BUILTIN_STXVD2X_V2DI:
15544 {
15545 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15546 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15547 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15548 location_t loc = gimple_location (stmt);
15549 tree arg0_type = TREE_TYPE (arg0);
15550 /* Use ptr_type_node (no TBAA) for the arg2_type. */
15551 tree arg2_type = ptr_type_node;
15552 /* In GIMPLE the type of the MEM_REF specifies the alignment. The
15553 required alignment (power) is 4 bytes regardless of data type. */
15554 tree align_stype = build_aligned_type (arg0_type, 4);
15555 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15556 the tree using the value from arg1. */
15557 gimple_seq stmts = NULL;
15558 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15559 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15560 arg2_type, arg2, temp_offset);
15561 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15562 if (!is_gimple_mem_ref_addr (temp_addr))
15563 {
15564 tree t = make_ssa_name (TREE_TYPE (temp_addr));
15565 gimple *g = gimple_build_assign (t, temp_addr);
15566 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15567 temp_addr = t;
15568 }
15569 gimple *g;
15570 g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
15571 build_int_cst (arg2_type, 0)), arg0);
15572 gimple_set_location (g, loc);
15573 gsi_replace (gsi, g, true);
15574 return true;
15575 }
15576
15577 /* Vector Fused multiply-add (fma). */
15578 case ALTIVEC_BUILTIN_VMADDFP:
15579 case VSX_BUILTIN_XVMADDDP:
15580 case ALTIVEC_BUILTIN_VMLADDUHM:
15581 {
15582 arg0 = gimple_call_arg (stmt, 0);
15583 arg1 = gimple_call_arg (stmt, 1);
15584 tree arg2 = gimple_call_arg (stmt, 2);
15585 lhs = gimple_call_lhs (stmt);
15586 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15587 gimple_call_set_lhs (g, lhs);
15588 gimple_call_set_nothrow (g, true);
15589 gimple_set_location (g, gimple_location (stmt));
15590 gsi_replace (gsi, g, true);
15591 return true;
15592 }
15593
15594 /* Vector compares; EQ, NE, GE, GT, LE. */
15595 case ALTIVEC_BUILTIN_VCMPEQUB:
15596 case ALTIVEC_BUILTIN_VCMPEQUH:
15597 case ALTIVEC_BUILTIN_VCMPEQUW:
15598 case P8V_BUILTIN_VCMPEQUD:
15599 fold_compare_helper (gsi, EQ_EXPR, stmt);
15600 return true;
15601
15602 case P9V_BUILTIN_CMPNEB:
15603 case P9V_BUILTIN_CMPNEH:
15604 case P9V_BUILTIN_CMPNEW:
15605 fold_compare_helper (gsi, NE_EXPR, stmt);
15606 return true;
15607
15608 case VSX_BUILTIN_CMPGE_16QI:
15609 case VSX_BUILTIN_CMPGE_U16QI:
15610 case VSX_BUILTIN_CMPGE_8HI:
15611 case VSX_BUILTIN_CMPGE_U8HI:
15612 case VSX_BUILTIN_CMPGE_4SI:
15613 case VSX_BUILTIN_CMPGE_U4SI:
15614 case VSX_BUILTIN_CMPGE_2DI:
15615 case VSX_BUILTIN_CMPGE_U2DI:
15616 fold_compare_helper (gsi, GE_EXPR, stmt);
15617 return true;
15618
15619 case ALTIVEC_BUILTIN_VCMPGTSB:
15620 case ALTIVEC_BUILTIN_VCMPGTUB:
15621 case ALTIVEC_BUILTIN_VCMPGTSH:
15622 case ALTIVEC_BUILTIN_VCMPGTUH:
15623 case ALTIVEC_BUILTIN_VCMPGTSW:
15624 case ALTIVEC_BUILTIN_VCMPGTUW:
15625 case P8V_BUILTIN_VCMPGTUD:
15626 case P8V_BUILTIN_VCMPGTSD:
15627 fold_compare_helper (gsi, GT_EXPR, stmt);
15628 return true;
15629
15630 case VSX_BUILTIN_CMPLE_16QI:
15631 case VSX_BUILTIN_CMPLE_U16QI:
15632 case VSX_BUILTIN_CMPLE_8HI:
15633 case VSX_BUILTIN_CMPLE_U8HI:
15634 case VSX_BUILTIN_CMPLE_4SI:
15635 case VSX_BUILTIN_CMPLE_U4SI:
15636 case VSX_BUILTIN_CMPLE_2DI:
15637 case VSX_BUILTIN_CMPLE_U2DI:
15638 fold_compare_helper (gsi, LE_EXPR, stmt);
15639 return true;
15640
15641 /* flavors of vec_splat_[us]{8,16,32}. */
15642 case ALTIVEC_BUILTIN_VSPLTISB:
15643 case ALTIVEC_BUILTIN_VSPLTISH:
15644 case ALTIVEC_BUILTIN_VSPLTISW:
15645 {
15646 arg0 = gimple_call_arg (stmt, 0);
15647 lhs = gimple_call_lhs (stmt);
15648
15649 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
15650 5-bit signed constant in range -16 to +15. */
15651 if (TREE_CODE (arg0) != INTEGER_CST
15652 || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15))
15653 return false;
15654 gimple_seq stmts = NULL;
15655 location_t loc = gimple_location (stmt);
15656 tree splat_value = gimple_convert (&stmts, loc,
15657 TREE_TYPE (TREE_TYPE (lhs)), arg0);
15658 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15659 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
15660 g = gimple_build_assign (lhs, splat_tree);
15661 gimple_set_location (g, gimple_location (stmt));
15662 gsi_replace (gsi, g, true);
15663 return true;
15664 }
15665
15666 /* Flavors of vec_splat. */
15667 /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */
15668 case ALTIVEC_BUILTIN_VSPLTB:
15669 case ALTIVEC_BUILTIN_VSPLTH:
15670 case ALTIVEC_BUILTIN_VSPLTW:
15671 case VSX_BUILTIN_XXSPLTD_V2DI:
15672 case VSX_BUILTIN_XXSPLTD_V2DF:
15673 {
15674 arg0 = gimple_call_arg (stmt, 0); /* input vector. */
15675 arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */
15676 /* Only fold the vec_splat_*() if arg1 is both a constant value and
15677 is a valid index into the arg0 vector. */
15678 unsigned int n_elts = VECTOR_CST_NELTS (arg0);
15679 if (TREE_CODE (arg1) != INTEGER_CST
15680 || TREE_INT_CST_LOW (arg1) > (n_elts -1))
15681 return false;
15682 lhs = gimple_call_lhs (stmt);
15683 tree lhs_type = TREE_TYPE (lhs);
15684 tree arg0_type = TREE_TYPE (arg0);
15685 tree splat;
15686 if (TREE_CODE (arg0) == VECTOR_CST)
15687 splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1));
15688 else
15689 {
15690 /* Determine (in bits) the length and start location of the
15691 splat value for a call to the tree_vec_extract helper. */
15692 int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type))
15693 * BITS_PER_UNIT / n_elts;
15694 int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size;
15695 tree len = build_int_cst (bitsizetype, splat_elem_size);
15696 tree start = build_int_cst (bitsizetype, splat_start_bit);
15697 splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0,
15698 len, start);
15699 }
15700 /* And finally, build the new vector. */
15701 tree splat_tree = build_vector_from_val (lhs_type, splat);
15702 g = gimple_build_assign (lhs, splat_tree);
15703 gimple_set_location (g, gimple_location (stmt));
15704 gsi_replace (gsi, g, true);
15705 return true;
15706 }
15707
15708 /* vec_mergel (integrals). */
15709 case ALTIVEC_BUILTIN_VMRGLH:
15710 case ALTIVEC_BUILTIN_VMRGLW:
15711 case VSX_BUILTIN_XXMRGLW_4SI:
15712 case ALTIVEC_BUILTIN_VMRGLB:
15713 case VSX_BUILTIN_VEC_MERGEL_V2DI:
15714 case VSX_BUILTIN_XXMRGLW_4SF:
15715 case VSX_BUILTIN_VEC_MERGEL_V2DF:
15716 fold_mergehl_helper (gsi, stmt, 1);
15717 return true;
15718 /* vec_mergeh (integrals). */
15719 case ALTIVEC_BUILTIN_VMRGHH:
15720 case ALTIVEC_BUILTIN_VMRGHW:
15721 case VSX_BUILTIN_XXMRGHW_4SI:
15722 case ALTIVEC_BUILTIN_VMRGHB:
15723 case VSX_BUILTIN_VEC_MERGEH_V2DI:
15724 case VSX_BUILTIN_XXMRGHW_4SF:
15725 case VSX_BUILTIN_VEC_MERGEH_V2DF:
15726 fold_mergehl_helper (gsi, stmt, 0);
15727 return true;
15728
15729 /* Flavors of vec_mergee. */
15730 case P8V_BUILTIN_VMRGEW_V4SI:
15731 case P8V_BUILTIN_VMRGEW_V2DI:
15732 case P8V_BUILTIN_VMRGEW_V4SF:
15733 case P8V_BUILTIN_VMRGEW_V2DF:
15734 fold_mergeeo_helper (gsi, stmt, 0);
15735 return true;
15736 /* Flavors of vec_mergeo. */
15737 case P8V_BUILTIN_VMRGOW_V4SI:
15738 case P8V_BUILTIN_VMRGOW_V2DI:
15739 case P8V_BUILTIN_VMRGOW_V4SF:
15740 case P8V_BUILTIN_VMRGOW_V2DF:
15741 fold_mergeeo_helper (gsi, stmt, 1);
15742 return true;
15743
15744 /* d = vec_pack (a, b) */
15745 case P8V_BUILTIN_VPKUDUM:
15746 case ALTIVEC_BUILTIN_VPKUHUM:
15747 case ALTIVEC_BUILTIN_VPKUWUM:
15748 {
15749 arg0 = gimple_call_arg (stmt, 0);
15750 arg1 = gimple_call_arg (stmt, 1);
15751 lhs = gimple_call_lhs (stmt);
15752 gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
15753 gimple_set_location (g, gimple_location (stmt));
15754 gsi_replace (gsi, g, true);
15755 return true;
15756 }
15757
15758 /* d = vec_unpackh (a) */
15759 /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
15760 in this code is sensitive to endian-ness, and needs to be inverted to
15761 handle both LE and BE targets. */
15762 case ALTIVEC_BUILTIN_VUPKHSB:
15763 case ALTIVEC_BUILTIN_VUPKHSH:
15764 case P8V_BUILTIN_VUPKHSW:
15765 {
15766 arg0 = gimple_call_arg (stmt, 0);
15767 lhs = gimple_call_lhs (stmt);
15768 if (BYTES_BIG_ENDIAN)
15769 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15770 else
15771 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15772 gimple_set_location (g, gimple_location (stmt));
15773 gsi_replace (gsi, g, true);
15774 return true;
15775 }
15776 /* d = vec_unpackl (a) */
15777 case ALTIVEC_BUILTIN_VUPKLSB:
15778 case ALTIVEC_BUILTIN_VUPKLSH:
15779 case P8V_BUILTIN_VUPKLSW:
15780 {
15781 arg0 = gimple_call_arg (stmt, 0);
15782 lhs = gimple_call_lhs (stmt);
15783 if (BYTES_BIG_ENDIAN)
15784 g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
15785 else
15786 g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
15787 gimple_set_location (g, gimple_location (stmt));
15788 gsi_replace (gsi, g, true);
15789 return true;
15790 }
15791 /* There is no gimple type corresponding with pixel, so just return. */
15792 case ALTIVEC_BUILTIN_VUPKHPX:
15793 case ALTIVEC_BUILTIN_VUPKLPX:
15794 return false;
15795
15796 /* vec_perm. */
15797 case ALTIVEC_BUILTIN_VPERM_16QI:
15798 case ALTIVEC_BUILTIN_VPERM_8HI:
15799 case ALTIVEC_BUILTIN_VPERM_4SI:
15800 case ALTIVEC_BUILTIN_VPERM_2DI:
15801 case ALTIVEC_BUILTIN_VPERM_4SF:
15802 case ALTIVEC_BUILTIN_VPERM_2DF:
15803 {
15804 arg0 = gimple_call_arg (stmt, 0);
15805 arg1 = gimple_call_arg (stmt, 1);
15806 tree permute = gimple_call_arg (stmt, 2);
15807 lhs = gimple_call_lhs (stmt);
15808 location_t loc = gimple_location (stmt);
15809 gimple_seq stmts = NULL;
15810 // convert arg0 and arg1 to match the type of the permute
15811 // for the VEC_PERM_EXPR operation.
15812 tree permute_type = (TREE_TYPE (permute));
15813 tree arg0_ptype = gimple_convert (&stmts, loc, permute_type, arg0);
15814 tree arg1_ptype = gimple_convert (&stmts, loc, permute_type, arg1);
15815 tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR,
15816 permute_type, arg0_ptype, arg1_ptype,
15817 permute);
15818 // Convert the result back to the desired lhs type upon completion.
15819 tree temp = gimple_convert (&stmts, loc, TREE_TYPE (lhs), lhs_ptype);
15820 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15821 g = gimple_build_assign (lhs, temp);
15822 gimple_set_location (g, loc);
15823 gsi_replace (gsi, g, true);
15824 return true;
15825 }
15826
15827 default:
15828 if (TARGET_DEBUG_BUILTIN)
15829 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
15830 fn_code, fn_name1, fn_name2);
15831 break;
15832 }
15833
15834 return false;
15835 }
15836
15837 /* Expand an expression EXP that calls a built-in function,
15838 with result going to TARGET if that's convenient
15839 (and in mode MODE if that's convenient).
15840 SUBTARGET may be used as the target for computing one of EXP's operands.
15841 IGNORE is nonzero if the value is to be ignored. */
15842
15843 static rtx
15844 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15845 machine_mode mode ATTRIBUTE_UNUSED,
15846 int ignore ATTRIBUTE_UNUSED)
15847 {
15848 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15849 enum rs6000_builtins fcode
15850 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
15851 size_t uns_fcode = (size_t)fcode;
15852 const struct builtin_description *d;
15853 size_t i;
15854 rtx ret;
15855 bool success;
15856 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
15857 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
15858 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
15859
15860 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
15861 floating point type, depending on whether long double is the IBM extended
15862 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
15863 we only define one variant of the built-in function, and switch the code
15864 when defining it, rather than defining two built-ins and using the
15865 overload table in rs6000-c.c to switch between the two. If we don't have
15866 the proper assembler, don't do this switch because CODE_FOR_*kf* and
15867 CODE_FOR_*tf* will be CODE_FOR_nothing. */
15868 if (FLOAT128_IEEE_P (TFmode))
15869 switch (icode)
15870 {
15871 default:
15872 break;
15873
15874 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
15875 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
15876 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
15877 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
15878 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
15879 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
15880 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
15881 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
15882 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
15883 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
15884 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
15885 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
15886 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
15887 }
15888
15889 if (TARGET_DEBUG_BUILTIN)
15890 {
15891 const char *name1 = rs6000_builtin_info[uns_fcode].name;
15892 const char *name2 = (icode != CODE_FOR_nothing)
15893 ? get_insn_name ((int) icode)
15894 : "nothing";
15895 const char *name3;
15896
15897 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
15898 {
15899 default: name3 = "unknown"; break;
15900 case RS6000_BTC_SPECIAL: name3 = "special"; break;
15901 case RS6000_BTC_UNARY: name3 = "unary"; break;
15902 case RS6000_BTC_BINARY: name3 = "binary"; break;
15903 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
15904 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
15905 case RS6000_BTC_ABS: name3 = "abs"; break;
15906 case RS6000_BTC_DST: name3 = "dst"; break;
15907 }
15908
15909
15910 fprintf (stderr,
15911 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
15912 (name1) ? name1 : "---", fcode,
15913 (name2) ? name2 : "---", (int) icode,
15914 name3,
15915 func_valid_p ? "" : ", not valid");
15916 }
15917
15918 if (!func_valid_p)
15919 {
15920 rs6000_invalid_builtin (fcode);
15921
15922 /* Given it is invalid, just generate a normal call. */
15923 return expand_call (exp, target, ignore);
15924 }
15925
15926 switch (fcode)
15927 {
15928 case RS6000_BUILTIN_RECIP:
15929 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
15930
15931 case RS6000_BUILTIN_RECIPF:
15932 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
15933
15934 case RS6000_BUILTIN_RSQRTF:
15935 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
15936
15937 case RS6000_BUILTIN_RSQRT:
15938 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
15939
15940 case POWER7_BUILTIN_BPERMD:
15941 return rs6000_expand_binop_builtin (((TARGET_64BIT)
15942 ? CODE_FOR_bpermd_di
15943 : CODE_FOR_bpermd_si), exp, target);
15944
15945 case RS6000_BUILTIN_GET_TB:
15946 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
15947 target);
15948
15949 case RS6000_BUILTIN_MFTB:
15950 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
15951 ? CODE_FOR_rs6000_mftb_di
15952 : CODE_FOR_rs6000_mftb_si),
15953 target);
15954
15955 case RS6000_BUILTIN_MFFS:
15956 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
15957
15958 case RS6000_BUILTIN_MTFSB0:
15959 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
15960
15961 case RS6000_BUILTIN_MTFSB1:
15962 return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
15963
15964 case RS6000_BUILTIN_SET_FPSCR_RN:
15965 return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
15966 exp);
15967
15968 case RS6000_BUILTIN_SET_FPSCR_DRN:
15969 return
15970 rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
15971 exp);
15972
15973 case RS6000_BUILTIN_MFFSL:
15974 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
15975
15976 case RS6000_BUILTIN_MTFSF:
15977 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
15978
15979 case RS6000_BUILTIN_CPU_INIT:
15980 case RS6000_BUILTIN_CPU_IS:
15981 case RS6000_BUILTIN_CPU_SUPPORTS:
15982 return cpu_expand_builtin (fcode, exp, target);
15983
15984 case MISC_BUILTIN_SPEC_BARRIER:
15985 {
15986 emit_insn (gen_speculation_barrier ());
15987 return NULL_RTX;
15988 }
15989
15990 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
15991 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
15992 {
15993 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
15994 : (int) CODE_FOR_altivec_lvsl_direct);
15995 machine_mode tmode = insn_data[icode2].operand[0].mode;
15996 machine_mode mode = insn_data[icode2].operand[1].mode;
15997 tree arg;
15998 rtx op, addr, pat;
15999
16000 gcc_assert (TARGET_ALTIVEC);
16001
16002 arg = CALL_EXPR_ARG (exp, 0);
16003 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16004 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16005 addr = memory_address (mode, op);
16006 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16007 op = addr;
16008 else
16009 {
16010 /* For the load case need to negate the address. */
16011 op = gen_reg_rtx (GET_MODE (addr));
16012 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16013 }
16014 op = gen_rtx_MEM (mode, op);
16015
16016 if (target == 0
16017 || GET_MODE (target) != tmode
16018 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16019 target = gen_reg_rtx (tmode);
16020
16021 pat = GEN_FCN (icode2) (target, op);
16022 if (!pat)
16023 return 0;
16024 emit_insn (pat);
16025
16026 return target;
16027 }
16028
16029 case ALTIVEC_BUILTIN_VCFUX:
16030 case ALTIVEC_BUILTIN_VCFSX:
16031 case ALTIVEC_BUILTIN_VCTUXS:
16032 case ALTIVEC_BUILTIN_VCTSXS:
16033 /* FIXME: There's got to be a nicer way to handle this case than
16034 constructing a new CALL_EXPR. */
16035 if (call_expr_nargs (exp) == 1)
16036 {
16037 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16038 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16039 }
16040 break;
16041
16042 /* For the pack and unpack int128 routines, fix up the builtin so it
16043 uses the correct IBM128 type. */
16044 case MISC_BUILTIN_PACK_IF:
16045 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16046 {
16047 icode = CODE_FOR_packtf;
16048 fcode = MISC_BUILTIN_PACK_TF;
16049 uns_fcode = (size_t)fcode;
16050 }
16051 break;
16052
16053 case MISC_BUILTIN_UNPACK_IF:
16054 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16055 {
16056 icode = CODE_FOR_unpacktf;
16057 fcode = MISC_BUILTIN_UNPACK_TF;
16058 uns_fcode = (size_t)fcode;
16059 }
16060 break;
16061
16062 default:
16063 break;
16064 }
16065
16066 if (TARGET_ALTIVEC)
16067 {
16068 ret = altivec_expand_builtin (exp, target, &success);
16069
16070 if (success)
16071 return ret;
16072 }
16073 if (TARGET_HTM)
16074 {
16075 ret = htm_expand_builtin (exp, target, &success);
16076
16077 if (success)
16078 return ret;
16079 }
16080
16081 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16082 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16083 gcc_assert (attr == RS6000_BTC_UNARY
16084 || attr == RS6000_BTC_BINARY
16085 || attr == RS6000_BTC_TERNARY
16086 || attr == RS6000_BTC_SPECIAL);
16087
16088 /* Handle simple unary operations. */
16089 d = bdesc_1arg;
16090 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16091 if (d->code == fcode)
16092 return rs6000_expand_unop_builtin (icode, exp, target);
16093
16094 /* Handle simple binary operations. */
16095 d = bdesc_2arg;
16096 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16097 if (d->code == fcode)
16098 return rs6000_expand_binop_builtin (icode, exp, target);
16099
16100 /* Handle simple ternary operations. */
16101 d = bdesc_3arg;
16102 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16103 if (d->code == fcode)
16104 return rs6000_expand_ternop_builtin (icode, exp, target);
16105
16106 /* Handle simple no-argument operations. */
16107 d = bdesc_0arg;
16108 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16109 if (d->code == fcode)
16110 return rs6000_expand_zeroop_builtin (icode, target);
16111
16112 gcc_unreachable ();
16113 }
16114
16115 /* Create a builtin vector type with a name. Taking care not to give
16116 the canonical type a name. */
16117
16118 static tree
16119 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16120 {
16121 tree result = build_vector_type (elt_type, num_elts);
16122
16123 /* Copy so we don't give the canonical type a name. */
16124 result = build_variant_type_copy (result);
16125
16126 add_builtin_type (name, result);
16127
16128 return result;
16129 }
16130
16131 static void
16132 rs6000_init_builtins (void)
16133 {
16134 tree tdecl;
16135 tree ftype;
16136 machine_mode mode;
16137
16138 if (TARGET_DEBUG_BUILTIN)
16139 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16140 (TARGET_ALTIVEC) ? ", altivec" : "",
16141 (TARGET_VSX) ? ", vsx" : "");
16142
16143 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16144 : "__vector long long",
16145 intDI_type_node, 2);
16146 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16147 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16148 intSI_type_node, 4);
16149 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16150 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16151 intHI_type_node, 8);
16152 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16153 intQI_type_node, 16);
16154
16155 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16156 unsigned_intQI_type_node, 16);
16157 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16158 unsigned_intHI_type_node, 8);
16159 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16160 unsigned_intSI_type_node, 4);
16161 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16162 ? "__vector unsigned long"
16163 : "__vector unsigned long long",
16164 unsigned_intDI_type_node, 2);
16165
16166 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16167
16168 const_str_type_node
16169 = build_pointer_type (build_qualified_type (char_type_node,
16170 TYPE_QUAL_CONST));
16171
16172 /* We use V1TI mode as a special container to hold __int128_t items that
16173 must live in VSX registers. */
16174 if (intTI_type_node)
16175 {
16176 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16177 intTI_type_node, 1);
16178 unsigned_V1TI_type_node
16179 = rs6000_vector_type ("__vector unsigned __int128",
16180 unsigned_intTI_type_node, 1);
16181 }
16182
16183 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16184 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16185 'vector unsigned short'. */
16186
16187 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16188 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16189 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16190 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16191 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16192
16193 long_integer_type_internal_node = long_integer_type_node;
16194 long_unsigned_type_internal_node = long_unsigned_type_node;
16195 long_long_integer_type_internal_node = long_long_integer_type_node;
16196 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16197 intQI_type_internal_node = intQI_type_node;
16198 uintQI_type_internal_node = unsigned_intQI_type_node;
16199 intHI_type_internal_node = intHI_type_node;
16200 uintHI_type_internal_node = unsigned_intHI_type_node;
16201 intSI_type_internal_node = intSI_type_node;
16202 uintSI_type_internal_node = unsigned_intSI_type_node;
16203 intDI_type_internal_node = intDI_type_node;
16204 uintDI_type_internal_node = unsigned_intDI_type_node;
16205 intTI_type_internal_node = intTI_type_node;
16206 uintTI_type_internal_node = unsigned_intTI_type_node;
16207 float_type_internal_node = float_type_node;
16208 double_type_internal_node = double_type_node;
16209 long_double_type_internal_node = long_double_type_node;
16210 dfloat64_type_internal_node = dfloat64_type_node;
16211 dfloat128_type_internal_node = dfloat128_type_node;
16212 void_type_internal_node = void_type_node;
16213
16214 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16215 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16216 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16217 format that uses a pair of doubles, depending on the switches and
16218 defaults.
16219
16220 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16221 floating point, we need make sure the type is non-zero or else self-test
16222 fails during bootstrap.
16223
16224 Always create __ibm128 as a separate type, even if the current long double
16225 format is IBM extended double.
16226
16227 For IEEE 128-bit floating point, always create the type __ieee128. If the
16228 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16229 __ieee128. */
16230 if (TARGET_FLOAT128_TYPE)
16231 {
16232 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16233 ibm128_float_type_node = long_double_type_node;
16234 else
16235 {
16236 ibm128_float_type_node = make_node (REAL_TYPE);
16237 TYPE_PRECISION (ibm128_float_type_node) = 128;
16238 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16239 layout_type (ibm128_float_type_node);
16240 }
16241
16242 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16243 "__ibm128");
16244
16245 if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128)
16246 ieee128_float_type_node = long_double_type_node;
16247 else
16248 ieee128_float_type_node = float128_type_node;
16249
16250 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16251 "__ieee128");
16252 }
16253
16254 else
16255 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16256
16257 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16258 tree type node. */
16259 builtin_mode_to_type[QImode][0] = integer_type_node;
16260 builtin_mode_to_type[HImode][0] = integer_type_node;
16261 builtin_mode_to_type[SImode][0] = intSI_type_node;
16262 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16263 builtin_mode_to_type[DImode][0] = intDI_type_node;
16264 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16265 builtin_mode_to_type[TImode][0] = intTI_type_node;
16266 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16267 builtin_mode_to_type[SFmode][0] = float_type_node;
16268 builtin_mode_to_type[DFmode][0] = double_type_node;
16269 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16270 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16271 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16272 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16273 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16274 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16275 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16276 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16277 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16278 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16279 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16280 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16281 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16282 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16283 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16284 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16285 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16286
16287 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16288 TYPE_NAME (bool_char_type_node) = tdecl;
16289
16290 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16291 TYPE_NAME (bool_short_type_node) = tdecl;
16292
16293 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16294 TYPE_NAME (bool_int_type_node) = tdecl;
16295
16296 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16297 TYPE_NAME (pixel_type_node) = tdecl;
16298
16299 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16300 bool_char_type_node, 16);
16301 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16302 bool_short_type_node, 8);
16303 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16304 bool_int_type_node, 4);
16305 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16306 ? "__vector __bool long"
16307 : "__vector __bool long long",
16308 bool_long_long_type_node, 2);
16309 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16310 pixel_type_node, 8);
16311
16312 /* Create Altivec and VSX builtins on machines with at least the
16313 general purpose extensions (970 and newer) to allow the use of
16314 the target attribute. */
16315 if (TARGET_EXTRA_BUILTINS)
16316 altivec_init_builtins ();
16317 if (TARGET_HTM)
16318 htm_init_builtins ();
16319
16320 if (TARGET_EXTRA_BUILTINS)
16321 rs6000_common_init_builtins ();
16322
16323 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16324 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16325 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16326
16327 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16328 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16329 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16330
16331 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16332 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16333 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16334
16335 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16336 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16337 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16338
16339 mode = (TARGET_64BIT) ? DImode : SImode;
16340 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16341 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16342 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16343
16344 ftype = build_function_type_list (unsigned_intDI_type_node,
16345 NULL_TREE);
16346 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16347
16348 if (TARGET_64BIT)
16349 ftype = build_function_type_list (unsigned_intDI_type_node,
16350 NULL_TREE);
16351 else
16352 ftype = build_function_type_list (unsigned_intSI_type_node,
16353 NULL_TREE);
16354 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16355
16356 ftype = build_function_type_list (double_type_node, NULL_TREE);
16357 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16358
16359 ftype = build_function_type_list (double_type_node, NULL_TREE);
16360 def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL);
16361
16362 ftype = build_function_type_list (void_type_node,
16363 intSI_type_node,
16364 NULL_TREE);
16365 def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0);
16366
16367 ftype = build_function_type_list (void_type_node,
16368 intSI_type_node,
16369 NULL_TREE);
16370 def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1);
16371
16372 ftype = build_function_type_list (void_type_node,
16373 intDI_type_node,
16374 NULL_TREE);
16375 def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN);
16376
16377 ftype = build_function_type_list (void_type_node,
16378 intDI_type_node,
16379 NULL_TREE);
16380 def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN);
16381
16382 ftype = build_function_type_list (void_type_node,
16383 intSI_type_node, double_type_node,
16384 NULL_TREE);
16385 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16386
16387 ftype = build_function_type_list (void_type_node, NULL_TREE);
16388 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16389 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16390 MISC_BUILTIN_SPEC_BARRIER);
16391
16392 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16393 NULL_TREE);
16394 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16395 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16396
16397 /* AIX libm provides clog as __clog. */
16398 if (TARGET_XCOFF &&
16399 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16400 set_user_assembler_name (tdecl, "__clog");
16401
16402 #ifdef SUBTARGET_INIT_BUILTINS
16403 SUBTARGET_INIT_BUILTINS;
16404 #endif
16405 }
16406
16407 /* Returns the rs6000 builtin decl for CODE. */
16408
16409 static tree
16410 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16411 {
16412 HOST_WIDE_INT fnmask;
16413
16414 if (code >= RS6000_BUILTIN_COUNT)
16415 return error_mark_node;
16416
16417 fnmask = rs6000_builtin_info[code].mask;
16418 if ((fnmask & rs6000_builtin_mask) != fnmask)
16419 {
16420 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16421 return error_mark_node;
16422 }
16423
16424 return rs6000_builtin_decls[code];
16425 }
16426
16427 static void
16428 altivec_init_builtins (void)
16429 {
16430 const struct builtin_description *d;
16431 size_t i;
16432 tree ftype;
16433 tree decl;
16434 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16435
16436 tree pvoid_type_node = build_pointer_type (void_type_node);
16437
16438 tree pcvoid_type_node
16439 = build_pointer_type (build_qualified_type (void_type_node,
16440 TYPE_QUAL_CONST));
16441
16442 tree int_ftype_opaque
16443 = build_function_type_list (integer_type_node,
16444 opaque_V4SI_type_node, NULL_TREE);
16445 tree opaque_ftype_opaque
16446 = build_function_type_list (integer_type_node, NULL_TREE);
16447 tree opaque_ftype_opaque_int
16448 = build_function_type_list (opaque_V4SI_type_node,
16449 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16450 tree opaque_ftype_opaque_opaque_int
16451 = build_function_type_list (opaque_V4SI_type_node,
16452 opaque_V4SI_type_node, opaque_V4SI_type_node,
16453 integer_type_node, NULL_TREE);
16454 tree opaque_ftype_opaque_opaque_opaque
16455 = build_function_type_list (opaque_V4SI_type_node,
16456 opaque_V4SI_type_node, opaque_V4SI_type_node,
16457 opaque_V4SI_type_node, NULL_TREE);
16458 tree opaque_ftype_opaque_opaque
16459 = build_function_type_list (opaque_V4SI_type_node,
16460 opaque_V4SI_type_node, opaque_V4SI_type_node,
16461 NULL_TREE);
16462 tree int_ftype_int_opaque_opaque
16463 = build_function_type_list (integer_type_node,
16464 integer_type_node, opaque_V4SI_type_node,
16465 opaque_V4SI_type_node, NULL_TREE);
16466 tree int_ftype_int_v4si_v4si
16467 = build_function_type_list (integer_type_node,
16468 integer_type_node, V4SI_type_node,
16469 V4SI_type_node, NULL_TREE);
16470 tree int_ftype_int_v2di_v2di
16471 = build_function_type_list (integer_type_node,
16472 integer_type_node, V2DI_type_node,
16473 V2DI_type_node, NULL_TREE);
16474 tree void_ftype_v4si
16475 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16476 tree v8hi_ftype_void
16477 = build_function_type_list (V8HI_type_node, NULL_TREE);
16478 tree void_ftype_void
16479 = build_function_type_list (void_type_node, NULL_TREE);
16480 tree void_ftype_int
16481 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16482
16483 tree opaque_ftype_long_pcvoid
16484 = build_function_type_list (opaque_V4SI_type_node,
16485 long_integer_type_node, pcvoid_type_node,
16486 NULL_TREE);
16487 tree v16qi_ftype_long_pcvoid
16488 = build_function_type_list (V16QI_type_node,
16489 long_integer_type_node, pcvoid_type_node,
16490 NULL_TREE);
16491 tree v8hi_ftype_long_pcvoid
16492 = build_function_type_list (V8HI_type_node,
16493 long_integer_type_node, pcvoid_type_node,
16494 NULL_TREE);
16495 tree v4si_ftype_long_pcvoid
16496 = build_function_type_list (V4SI_type_node,
16497 long_integer_type_node, pcvoid_type_node,
16498 NULL_TREE);
16499 tree v4sf_ftype_long_pcvoid
16500 = build_function_type_list (V4SF_type_node,
16501 long_integer_type_node, pcvoid_type_node,
16502 NULL_TREE);
16503 tree v2df_ftype_long_pcvoid
16504 = build_function_type_list (V2DF_type_node,
16505 long_integer_type_node, pcvoid_type_node,
16506 NULL_TREE);
16507 tree v2di_ftype_long_pcvoid
16508 = build_function_type_list (V2DI_type_node,
16509 long_integer_type_node, pcvoid_type_node,
16510 NULL_TREE);
16511 tree v1ti_ftype_long_pcvoid
16512 = build_function_type_list (V1TI_type_node,
16513 long_integer_type_node, pcvoid_type_node,
16514 NULL_TREE);
16515
16516 tree void_ftype_opaque_long_pvoid
16517 = build_function_type_list (void_type_node,
16518 opaque_V4SI_type_node, long_integer_type_node,
16519 pvoid_type_node, NULL_TREE);
16520 tree void_ftype_v4si_long_pvoid
16521 = build_function_type_list (void_type_node,
16522 V4SI_type_node, long_integer_type_node,
16523 pvoid_type_node, NULL_TREE);
16524 tree void_ftype_v16qi_long_pvoid
16525 = build_function_type_list (void_type_node,
16526 V16QI_type_node, long_integer_type_node,
16527 pvoid_type_node, NULL_TREE);
16528
16529 tree void_ftype_v16qi_pvoid_long
16530 = build_function_type_list (void_type_node,
16531 V16QI_type_node, pvoid_type_node,
16532 long_integer_type_node, NULL_TREE);
16533
16534 tree void_ftype_v8hi_long_pvoid
16535 = build_function_type_list (void_type_node,
16536 V8HI_type_node, long_integer_type_node,
16537 pvoid_type_node, NULL_TREE);
16538 tree void_ftype_v4sf_long_pvoid
16539 = build_function_type_list (void_type_node,
16540 V4SF_type_node, long_integer_type_node,
16541 pvoid_type_node, NULL_TREE);
16542 tree void_ftype_v2df_long_pvoid
16543 = build_function_type_list (void_type_node,
16544 V2DF_type_node, long_integer_type_node,
16545 pvoid_type_node, NULL_TREE);
16546 tree void_ftype_v1ti_long_pvoid
16547 = build_function_type_list (void_type_node,
16548 V1TI_type_node, long_integer_type_node,
16549 pvoid_type_node, NULL_TREE);
16550 tree void_ftype_v2di_long_pvoid
16551 = build_function_type_list (void_type_node,
16552 V2DI_type_node, long_integer_type_node,
16553 pvoid_type_node, NULL_TREE);
16554 tree int_ftype_int_v8hi_v8hi
16555 = build_function_type_list (integer_type_node,
16556 integer_type_node, V8HI_type_node,
16557 V8HI_type_node, NULL_TREE);
16558 tree int_ftype_int_v16qi_v16qi
16559 = build_function_type_list (integer_type_node,
16560 integer_type_node, V16QI_type_node,
16561 V16QI_type_node, NULL_TREE);
16562 tree int_ftype_int_v4sf_v4sf
16563 = build_function_type_list (integer_type_node,
16564 integer_type_node, V4SF_type_node,
16565 V4SF_type_node, NULL_TREE);
16566 tree int_ftype_int_v2df_v2df
16567 = build_function_type_list (integer_type_node,
16568 integer_type_node, V2DF_type_node,
16569 V2DF_type_node, NULL_TREE);
16570 tree v2di_ftype_v2di
16571 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16572 tree v4si_ftype_v4si
16573 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16574 tree v8hi_ftype_v8hi
16575 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16576 tree v16qi_ftype_v16qi
16577 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16578 tree v4sf_ftype_v4sf
16579 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16580 tree v2df_ftype_v2df
16581 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16582 tree void_ftype_pcvoid_int_int
16583 = build_function_type_list (void_type_node,
16584 pcvoid_type_node, integer_type_node,
16585 integer_type_node, NULL_TREE);
16586
16587 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16588 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16589 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16590 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16591 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16592 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16593 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16594 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16595 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16596 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16597 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16598 ALTIVEC_BUILTIN_LVXL_V2DF);
16599 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16600 ALTIVEC_BUILTIN_LVXL_V2DI);
16601 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16602 ALTIVEC_BUILTIN_LVXL_V4SF);
16603 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16604 ALTIVEC_BUILTIN_LVXL_V4SI);
16605 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16606 ALTIVEC_BUILTIN_LVXL_V8HI);
16607 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16608 ALTIVEC_BUILTIN_LVXL_V16QI);
16609 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16610 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16611 ALTIVEC_BUILTIN_LVX_V1TI);
16612 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16613 ALTIVEC_BUILTIN_LVX_V2DF);
16614 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16615 ALTIVEC_BUILTIN_LVX_V2DI);
16616 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16617 ALTIVEC_BUILTIN_LVX_V4SF);
16618 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16619 ALTIVEC_BUILTIN_LVX_V4SI);
16620 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16621 ALTIVEC_BUILTIN_LVX_V8HI);
16622 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16623 ALTIVEC_BUILTIN_LVX_V16QI);
16624 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16625 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16626 ALTIVEC_BUILTIN_STVX_V2DF);
16627 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16628 ALTIVEC_BUILTIN_STVX_V2DI);
16629 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16630 ALTIVEC_BUILTIN_STVX_V4SF);
16631 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16632 ALTIVEC_BUILTIN_STVX_V4SI);
16633 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16634 ALTIVEC_BUILTIN_STVX_V8HI);
16635 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16636 ALTIVEC_BUILTIN_STVX_V16QI);
16637 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16638 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16639 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16640 ALTIVEC_BUILTIN_STVXL_V2DF);
16641 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16642 ALTIVEC_BUILTIN_STVXL_V2DI);
16643 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16644 ALTIVEC_BUILTIN_STVXL_V4SF);
16645 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16646 ALTIVEC_BUILTIN_STVXL_V4SI);
16647 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16648 ALTIVEC_BUILTIN_STVXL_V8HI);
16649 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16650 ALTIVEC_BUILTIN_STVXL_V16QI);
16651 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16652 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16653 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16654 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16655 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16656 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16657 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16658 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16659 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16660 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16661 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16662 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16663 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16664 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16665 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16666 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16667
16668 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16669 VSX_BUILTIN_LXVD2X_V2DF);
16670 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16671 VSX_BUILTIN_LXVD2X_V2DI);
16672 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16673 VSX_BUILTIN_LXVW4X_V4SF);
16674 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16675 VSX_BUILTIN_LXVW4X_V4SI);
16676 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16677 VSX_BUILTIN_LXVW4X_V8HI);
16678 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16679 VSX_BUILTIN_LXVW4X_V16QI);
16680 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16681 VSX_BUILTIN_STXVD2X_V2DF);
16682 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16683 VSX_BUILTIN_STXVD2X_V2DI);
16684 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16685 VSX_BUILTIN_STXVW4X_V4SF);
16686 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16687 VSX_BUILTIN_STXVW4X_V4SI);
16688 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16689 VSX_BUILTIN_STXVW4X_V8HI);
16690 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16691 VSX_BUILTIN_STXVW4X_V16QI);
16692
16693 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16694 VSX_BUILTIN_LD_ELEMREV_V2DF);
16695 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16696 VSX_BUILTIN_LD_ELEMREV_V2DI);
16697 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16698 VSX_BUILTIN_LD_ELEMREV_V4SF);
16699 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16700 VSX_BUILTIN_LD_ELEMREV_V4SI);
16701 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16702 VSX_BUILTIN_LD_ELEMREV_V8HI);
16703 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16704 VSX_BUILTIN_LD_ELEMREV_V16QI);
16705 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16706 VSX_BUILTIN_ST_ELEMREV_V2DF);
16707 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16708 VSX_BUILTIN_ST_ELEMREV_V1TI);
16709 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16710 VSX_BUILTIN_ST_ELEMREV_V2DI);
16711 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16712 VSX_BUILTIN_ST_ELEMREV_V4SF);
16713 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16714 VSX_BUILTIN_ST_ELEMREV_V4SI);
16715 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16716 VSX_BUILTIN_ST_ELEMREV_V8HI);
16717 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16718 VSX_BUILTIN_ST_ELEMREV_V16QI);
16719
16720 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16721 VSX_BUILTIN_VEC_LD);
16722 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16723 VSX_BUILTIN_VEC_ST);
16724 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16725 VSX_BUILTIN_VEC_XL);
16726 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16727 VSX_BUILTIN_VEC_XL_BE);
16728 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16729 VSX_BUILTIN_VEC_XST);
16730 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16731 VSX_BUILTIN_VEC_XST_BE);
16732
16733 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16734 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16735 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16736
16737 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16738 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16739 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16740 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16741 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16742 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16743 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16744 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16745 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16746 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16747 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16748 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16749
16750 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16751 ALTIVEC_BUILTIN_VEC_ADDE);
16752 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16753 ALTIVEC_BUILTIN_VEC_ADDEC);
16754 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16755 ALTIVEC_BUILTIN_VEC_CMPNE);
16756 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16757 ALTIVEC_BUILTIN_VEC_MUL);
16758 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16759 ALTIVEC_BUILTIN_VEC_SUBE);
16760 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16761 ALTIVEC_BUILTIN_VEC_SUBEC);
16762
16763 /* Cell builtins. */
16764 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16765 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16766 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16767 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16768
16769 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16770 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16771 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16772 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16773
16774 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16775 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16776 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16777 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16778
16779 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16780 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16781 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16782 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16783
16784 if (TARGET_P9_VECTOR)
16785 {
16786 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16787 P9V_BUILTIN_STXVL);
16788 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16789 P9V_BUILTIN_XST_LEN_R);
16790 }
16791
16792 /* Add the DST variants. */
16793 d = bdesc_dst;
16794 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16795 {
16796 HOST_WIDE_INT mask = d->mask;
16797
16798 /* It is expected that these dst built-in functions may have
16799 d->icode equal to CODE_FOR_nothing. */
16800 if ((mask & builtin_mask) != mask)
16801 {
16802 if (TARGET_DEBUG_BUILTIN)
16803 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16804 d->name);
16805 continue;
16806 }
16807 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16808 }
16809
16810 /* Initialize the predicates. */
16811 d = bdesc_altivec_preds;
16812 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16813 {
16814 machine_mode mode1;
16815 tree type;
16816 HOST_WIDE_INT mask = d->mask;
16817
16818 if ((mask & builtin_mask) != mask)
16819 {
16820 if (TARGET_DEBUG_BUILTIN)
16821 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16822 d->name);
16823 continue;
16824 }
16825
16826 if (rs6000_overloaded_builtin_p (d->code))
16827 mode1 = VOIDmode;
16828 else
16829 {
16830 /* Cannot define builtin if the instruction is disabled. */
16831 gcc_assert (d->icode != CODE_FOR_nothing);
16832 mode1 = insn_data[d->icode].operand[1].mode;
16833 }
16834
16835 switch (mode1)
16836 {
16837 case E_VOIDmode:
16838 type = int_ftype_int_opaque_opaque;
16839 break;
16840 case E_V2DImode:
16841 type = int_ftype_int_v2di_v2di;
16842 break;
16843 case E_V4SImode:
16844 type = int_ftype_int_v4si_v4si;
16845 break;
16846 case E_V8HImode:
16847 type = int_ftype_int_v8hi_v8hi;
16848 break;
16849 case E_V16QImode:
16850 type = int_ftype_int_v16qi_v16qi;
16851 break;
16852 case E_V4SFmode:
16853 type = int_ftype_int_v4sf_v4sf;
16854 break;
16855 case E_V2DFmode:
16856 type = int_ftype_int_v2df_v2df;
16857 break;
16858 default:
16859 gcc_unreachable ();
16860 }
16861
16862 def_builtin (d->name, type, d->code);
16863 }
16864
16865 /* Initialize the abs* operators. */
16866 d = bdesc_abs;
16867 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
16868 {
16869 machine_mode mode0;
16870 tree type;
16871 HOST_WIDE_INT mask = d->mask;
16872
16873 if ((mask & builtin_mask) != mask)
16874 {
16875 if (TARGET_DEBUG_BUILTIN)
16876 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
16877 d->name);
16878 continue;
16879 }
16880
16881 /* Cannot define builtin if the instruction is disabled. */
16882 gcc_assert (d->icode != CODE_FOR_nothing);
16883 mode0 = insn_data[d->icode].operand[0].mode;
16884
16885 switch (mode0)
16886 {
16887 case E_V2DImode:
16888 type = v2di_ftype_v2di;
16889 break;
16890 case E_V4SImode:
16891 type = v4si_ftype_v4si;
16892 break;
16893 case E_V8HImode:
16894 type = v8hi_ftype_v8hi;
16895 break;
16896 case E_V16QImode:
16897 type = v16qi_ftype_v16qi;
16898 break;
16899 case E_V4SFmode:
16900 type = v4sf_ftype_v4sf;
16901 break;
16902 case E_V2DFmode:
16903 type = v2df_ftype_v2df;
16904 break;
16905 default:
16906 gcc_unreachable ();
16907 }
16908
16909 def_builtin (d->name, type, d->code);
16910 }
16911
16912 /* Initialize target builtin that implements
16913 targetm.vectorize.builtin_mask_for_load. */
16914
16915 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
16916 v16qi_ftype_long_pcvoid,
16917 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
16918 BUILT_IN_MD, NULL, NULL_TREE);
16919 TREE_READONLY (decl) = 1;
16920 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
16921 altivec_builtin_mask_for_load = decl;
16922
16923 /* Access to the vec_init patterns. */
16924 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
16925 integer_type_node, integer_type_node,
16926 integer_type_node, NULL_TREE);
16927 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
16928
16929 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
16930 short_integer_type_node,
16931 short_integer_type_node,
16932 short_integer_type_node,
16933 short_integer_type_node,
16934 short_integer_type_node,
16935 short_integer_type_node,
16936 short_integer_type_node, NULL_TREE);
16937 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
16938
16939 ftype = build_function_type_list (V16QI_type_node, char_type_node,
16940 char_type_node, char_type_node,
16941 char_type_node, char_type_node,
16942 char_type_node, char_type_node,
16943 char_type_node, char_type_node,
16944 char_type_node, char_type_node,
16945 char_type_node, char_type_node,
16946 char_type_node, char_type_node,
16947 char_type_node, NULL_TREE);
16948 def_builtin ("__builtin_vec_init_v16qi", ftype,
16949 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
16950
16951 ftype = build_function_type_list (V4SF_type_node, float_type_node,
16952 float_type_node, float_type_node,
16953 float_type_node, NULL_TREE);
16954 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
16955
16956 /* VSX builtins. */
16957 ftype = build_function_type_list (V2DF_type_node, double_type_node,
16958 double_type_node, NULL_TREE);
16959 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
16960
16961 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
16962 intDI_type_node, NULL_TREE);
16963 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
16964
16965 /* Access to the vec_set patterns. */
16966 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
16967 intSI_type_node,
16968 integer_type_node, NULL_TREE);
16969 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
16970
16971 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16972 intHI_type_node,
16973 integer_type_node, NULL_TREE);
16974 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
16975
16976 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
16977 intQI_type_node,
16978 integer_type_node, NULL_TREE);
16979 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
16980
16981 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
16982 float_type_node,
16983 integer_type_node, NULL_TREE);
16984 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
16985
16986 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
16987 double_type_node,
16988 integer_type_node, NULL_TREE);
16989 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
16990
16991 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
16992 intDI_type_node,
16993 integer_type_node, NULL_TREE);
16994 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
16995
16996 /* Access to the vec_extract patterns. */
16997 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16998 integer_type_node, NULL_TREE);
16999 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17000
17001 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17002 integer_type_node, NULL_TREE);
17003 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17004
17005 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17006 integer_type_node, NULL_TREE);
17007 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17008
17009 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17010 integer_type_node, NULL_TREE);
17011 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17012
17013 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17014 integer_type_node, NULL_TREE);
17015 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17016
17017 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17018 integer_type_node, NULL_TREE);
17019 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17020
17021
17022 if (V1TI_type_node)
17023 {
17024 tree v1ti_ftype_long_pcvoid
17025 = build_function_type_list (V1TI_type_node,
17026 long_integer_type_node, pcvoid_type_node,
17027 NULL_TREE);
17028 tree void_ftype_v1ti_long_pvoid
17029 = build_function_type_list (void_type_node,
17030 V1TI_type_node, long_integer_type_node,
17031 pvoid_type_node, NULL_TREE);
17032 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17033 VSX_BUILTIN_LD_ELEMREV_V1TI);
17034 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17035 VSX_BUILTIN_LXVD2X_V1TI);
17036 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17037 VSX_BUILTIN_STXVD2X_V1TI);
17038 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17039 NULL_TREE, NULL_TREE);
17040 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17041 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17042 intTI_type_node,
17043 integer_type_node, NULL_TREE);
17044 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17045 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17046 integer_type_node, NULL_TREE);
17047 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17048 }
17049
17050 }
17051
17052 static void
17053 htm_init_builtins (void)
17054 {
17055 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17056 const struct builtin_description *d;
17057 size_t i;
17058
17059 d = bdesc_htm;
17060 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17061 {
17062 tree op[MAX_HTM_OPERANDS], type;
17063 HOST_WIDE_INT mask = d->mask;
17064 unsigned attr = rs6000_builtin_info[d->code].attr;
17065 bool void_func = (attr & RS6000_BTC_VOID);
17066 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17067 int nopnds = 0;
17068 tree gpr_type_node;
17069 tree rettype;
17070 tree argtype;
17071
17072 /* It is expected that these htm built-in functions may have
17073 d->icode equal to CODE_FOR_nothing. */
17074
17075 if (TARGET_32BIT && TARGET_POWERPC64)
17076 gpr_type_node = long_long_unsigned_type_node;
17077 else
17078 gpr_type_node = long_unsigned_type_node;
17079
17080 if (attr & RS6000_BTC_SPR)
17081 {
17082 rettype = gpr_type_node;
17083 argtype = gpr_type_node;
17084 }
17085 else if (d->code == HTM_BUILTIN_TABORTDC
17086 || d->code == HTM_BUILTIN_TABORTDCI)
17087 {
17088 rettype = unsigned_type_node;
17089 argtype = gpr_type_node;
17090 }
17091 else
17092 {
17093 rettype = unsigned_type_node;
17094 argtype = unsigned_type_node;
17095 }
17096
17097 if ((mask & builtin_mask) != mask)
17098 {
17099 if (TARGET_DEBUG_BUILTIN)
17100 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17101 continue;
17102 }
17103
17104 if (d->name == 0)
17105 {
17106 if (TARGET_DEBUG_BUILTIN)
17107 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17108 (long unsigned) i);
17109 continue;
17110 }
17111
17112 op[nopnds++] = (void_func) ? void_type_node : rettype;
17113
17114 if (attr_args == RS6000_BTC_UNARY)
17115 op[nopnds++] = argtype;
17116 else if (attr_args == RS6000_BTC_BINARY)
17117 {
17118 op[nopnds++] = argtype;
17119 op[nopnds++] = argtype;
17120 }
17121 else if (attr_args == RS6000_BTC_TERNARY)
17122 {
17123 op[nopnds++] = argtype;
17124 op[nopnds++] = argtype;
17125 op[nopnds++] = argtype;
17126 }
17127
17128 switch (nopnds)
17129 {
17130 case 1:
17131 type = build_function_type_list (op[0], NULL_TREE);
17132 break;
17133 case 2:
17134 type = build_function_type_list (op[0], op[1], NULL_TREE);
17135 break;
17136 case 3:
17137 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17138 break;
17139 case 4:
17140 type = build_function_type_list (op[0], op[1], op[2], op[3],
17141 NULL_TREE);
17142 break;
17143 default:
17144 gcc_unreachable ();
17145 }
17146
17147 def_builtin (d->name, type, d->code);
17148 }
17149 }
17150
17151 /* Hash function for builtin functions with up to 3 arguments and a return
17152 type. */
17153 hashval_t
17154 builtin_hasher::hash (builtin_hash_struct *bh)
17155 {
17156 unsigned ret = 0;
17157 int i;
17158
17159 for (i = 0; i < 4; i++)
17160 {
17161 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17162 ret = (ret * 2) + bh->uns_p[i];
17163 }
17164
17165 return ret;
17166 }
17167
17168 /* Compare builtin hash entries H1 and H2 for equivalence. */
17169 bool
17170 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17171 {
17172 return ((p1->mode[0] == p2->mode[0])
17173 && (p1->mode[1] == p2->mode[1])
17174 && (p1->mode[2] == p2->mode[2])
17175 && (p1->mode[3] == p2->mode[3])
17176 && (p1->uns_p[0] == p2->uns_p[0])
17177 && (p1->uns_p[1] == p2->uns_p[1])
17178 && (p1->uns_p[2] == p2->uns_p[2])
17179 && (p1->uns_p[3] == p2->uns_p[3]));
17180 }
17181
17182 /* Map types for builtin functions with an explicit return type and up to 3
17183 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17184 of the argument. */
17185 static tree
17186 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17187 machine_mode mode_arg1, machine_mode mode_arg2,
17188 enum rs6000_builtins builtin, const char *name)
17189 {
17190 struct builtin_hash_struct h;
17191 struct builtin_hash_struct *h2;
17192 int num_args = 3;
17193 int i;
17194 tree ret_type = NULL_TREE;
17195 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17196
17197 /* Create builtin_hash_table. */
17198 if (builtin_hash_table == NULL)
17199 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17200
17201 h.type = NULL_TREE;
17202 h.mode[0] = mode_ret;
17203 h.mode[1] = mode_arg0;
17204 h.mode[2] = mode_arg1;
17205 h.mode[3] = mode_arg2;
17206 h.uns_p[0] = 0;
17207 h.uns_p[1] = 0;
17208 h.uns_p[2] = 0;
17209 h.uns_p[3] = 0;
17210
17211 /* If the builtin is a type that produces unsigned results or takes unsigned
17212 arguments, and it is returned as a decl for the vectorizer (such as
17213 widening multiplies, permute), make sure the arguments and return value
17214 are type correct. */
17215 switch (builtin)
17216 {
17217 /* unsigned 1 argument functions. */
17218 case CRYPTO_BUILTIN_VSBOX:
17219 case CRYPTO_BUILTIN_VSBOX_BE:
17220 case P8V_BUILTIN_VGBBD:
17221 case MISC_BUILTIN_CDTBCD:
17222 case MISC_BUILTIN_CBCDTD:
17223 h.uns_p[0] = 1;
17224 h.uns_p[1] = 1;
17225 break;
17226
17227 /* unsigned 2 argument functions. */
17228 case ALTIVEC_BUILTIN_VMULEUB:
17229 case ALTIVEC_BUILTIN_VMULEUH:
17230 case P8V_BUILTIN_VMULEUW:
17231 case ALTIVEC_BUILTIN_VMULOUB:
17232 case ALTIVEC_BUILTIN_VMULOUH:
17233 case P8V_BUILTIN_VMULOUW:
17234 case CRYPTO_BUILTIN_VCIPHER:
17235 case CRYPTO_BUILTIN_VCIPHER_BE:
17236 case CRYPTO_BUILTIN_VCIPHERLAST:
17237 case CRYPTO_BUILTIN_VCIPHERLAST_BE:
17238 case CRYPTO_BUILTIN_VNCIPHER:
17239 case CRYPTO_BUILTIN_VNCIPHER_BE:
17240 case CRYPTO_BUILTIN_VNCIPHERLAST:
17241 case CRYPTO_BUILTIN_VNCIPHERLAST_BE:
17242 case CRYPTO_BUILTIN_VPMSUMB:
17243 case CRYPTO_BUILTIN_VPMSUMH:
17244 case CRYPTO_BUILTIN_VPMSUMW:
17245 case CRYPTO_BUILTIN_VPMSUMD:
17246 case CRYPTO_BUILTIN_VPMSUM:
17247 case MISC_BUILTIN_ADDG6S:
17248 case MISC_BUILTIN_DIVWEU:
17249 case MISC_BUILTIN_DIVDEU:
17250 case VSX_BUILTIN_UDIV_V2DI:
17251 case ALTIVEC_BUILTIN_VMAXUB:
17252 case ALTIVEC_BUILTIN_VMINUB:
17253 case ALTIVEC_BUILTIN_VMAXUH:
17254 case ALTIVEC_BUILTIN_VMINUH:
17255 case ALTIVEC_BUILTIN_VMAXUW:
17256 case ALTIVEC_BUILTIN_VMINUW:
17257 case P8V_BUILTIN_VMAXUD:
17258 case P8V_BUILTIN_VMINUD:
17259 h.uns_p[0] = 1;
17260 h.uns_p[1] = 1;
17261 h.uns_p[2] = 1;
17262 break;
17263
17264 /* unsigned 3 argument functions. */
17265 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17266 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17267 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17268 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17269 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17270 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17271 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17272 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17273 case VSX_BUILTIN_VPERM_16QI_UNS:
17274 case VSX_BUILTIN_VPERM_8HI_UNS:
17275 case VSX_BUILTIN_VPERM_4SI_UNS:
17276 case VSX_BUILTIN_VPERM_2DI_UNS:
17277 case VSX_BUILTIN_XXSEL_16QI_UNS:
17278 case VSX_BUILTIN_XXSEL_8HI_UNS:
17279 case VSX_BUILTIN_XXSEL_4SI_UNS:
17280 case VSX_BUILTIN_XXSEL_2DI_UNS:
17281 case CRYPTO_BUILTIN_VPERMXOR:
17282 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17283 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17284 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17285 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17286 case CRYPTO_BUILTIN_VSHASIGMAW:
17287 case CRYPTO_BUILTIN_VSHASIGMAD:
17288 case CRYPTO_BUILTIN_VSHASIGMA:
17289 h.uns_p[0] = 1;
17290 h.uns_p[1] = 1;
17291 h.uns_p[2] = 1;
17292 h.uns_p[3] = 1;
17293 break;
17294
17295 /* signed permute functions with unsigned char mask. */
17296 case ALTIVEC_BUILTIN_VPERM_16QI:
17297 case ALTIVEC_BUILTIN_VPERM_8HI:
17298 case ALTIVEC_BUILTIN_VPERM_4SI:
17299 case ALTIVEC_BUILTIN_VPERM_4SF:
17300 case ALTIVEC_BUILTIN_VPERM_2DI:
17301 case ALTIVEC_BUILTIN_VPERM_2DF:
17302 case VSX_BUILTIN_VPERM_16QI:
17303 case VSX_BUILTIN_VPERM_8HI:
17304 case VSX_BUILTIN_VPERM_4SI:
17305 case VSX_BUILTIN_VPERM_4SF:
17306 case VSX_BUILTIN_VPERM_2DI:
17307 case VSX_BUILTIN_VPERM_2DF:
17308 h.uns_p[3] = 1;
17309 break;
17310
17311 /* unsigned args, signed return. */
17312 case VSX_BUILTIN_XVCVUXDSP:
17313 case VSX_BUILTIN_XVCVUXDDP_UNS:
17314 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17315 h.uns_p[1] = 1;
17316 break;
17317
17318 /* signed args, unsigned return. */
17319 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17320 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17321 case MISC_BUILTIN_UNPACK_TD:
17322 case MISC_BUILTIN_UNPACK_V1TI:
17323 h.uns_p[0] = 1;
17324 break;
17325
17326 /* unsigned arguments, bool return (compares). */
17327 case ALTIVEC_BUILTIN_VCMPEQUB:
17328 case ALTIVEC_BUILTIN_VCMPEQUH:
17329 case ALTIVEC_BUILTIN_VCMPEQUW:
17330 case P8V_BUILTIN_VCMPEQUD:
17331 case VSX_BUILTIN_CMPGE_U16QI:
17332 case VSX_BUILTIN_CMPGE_U8HI:
17333 case VSX_BUILTIN_CMPGE_U4SI:
17334 case VSX_BUILTIN_CMPGE_U2DI:
17335 case ALTIVEC_BUILTIN_VCMPGTUB:
17336 case ALTIVEC_BUILTIN_VCMPGTUH:
17337 case ALTIVEC_BUILTIN_VCMPGTUW:
17338 case P8V_BUILTIN_VCMPGTUD:
17339 h.uns_p[1] = 1;
17340 h.uns_p[2] = 1;
17341 break;
17342
17343 /* unsigned arguments for 128-bit pack instructions. */
17344 case MISC_BUILTIN_PACK_TD:
17345 case MISC_BUILTIN_PACK_V1TI:
17346 h.uns_p[1] = 1;
17347 h.uns_p[2] = 1;
17348 break;
17349
17350 /* unsigned second arguments (vector shift right). */
17351 case ALTIVEC_BUILTIN_VSRB:
17352 case ALTIVEC_BUILTIN_VSRH:
17353 case ALTIVEC_BUILTIN_VSRW:
17354 case P8V_BUILTIN_VSRD:
17355 h.uns_p[2] = 1;
17356 break;
17357
17358 default:
17359 break;
17360 }
17361
17362 /* Figure out how many args are present. */
17363 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17364 num_args--;
17365
17366 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17367 if (!ret_type && h.uns_p[0])
17368 ret_type = builtin_mode_to_type[h.mode[0]][0];
17369
17370 if (!ret_type)
17371 fatal_error (input_location,
17372 "internal error: builtin function %qs had an unexpected "
17373 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17374
17375 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17376 arg_type[i] = NULL_TREE;
17377
17378 for (i = 0; i < num_args; i++)
17379 {
17380 int m = (int) h.mode[i+1];
17381 int uns_p = h.uns_p[i+1];
17382
17383 arg_type[i] = builtin_mode_to_type[m][uns_p];
17384 if (!arg_type[i] && uns_p)
17385 arg_type[i] = builtin_mode_to_type[m][0];
17386
17387 if (!arg_type[i])
17388 fatal_error (input_location,
17389 "internal error: builtin function %qs, argument %d "
17390 "had unexpected argument type %qs", name, i,
17391 GET_MODE_NAME (m));
17392 }
17393
17394 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17395 if (*found == NULL)
17396 {
17397 h2 = ggc_alloc<builtin_hash_struct> ();
17398 *h2 = h;
17399 *found = h2;
17400
17401 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17402 arg_type[2], NULL_TREE);
17403 }
17404
17405 return (*found)->type;
17406 }
17407
17408 static void
17409 rs6000_common_init_builtins (void)
17410 {
17411 const struct builtin_description *d;
17412 size_t i;
17413
17414 tree opaque_ftype_opaque = NULL_TREE;
17415 tree opaque_ftype_opaque_opaque = NULL_TREE;
17416 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17417 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17418
17419 /* Create Altivec and VSX builtins on machines with at least the
17420 general purpose extensions (970 and newer) to allow the use of
17421 the target attribute. */
17422
17423 if (TARGET_EXTRA_BUILTINS)
17424 builtin_mask |= RS6000_BTM_COMMON;
17425
17426 /* Add the ternary operators. */
17427 d = bdesc_3arg;
17428 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17429 {
17430 tree type;
17431 HOST_WIDE_INT mask = d->mask;
17432
17433 if ((mask & builtin_mask) != mask)
17434 {
17435 if (TARGET_DEBUG_BUILTIN)
17436 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17437 continue;
17438 }
17439
17440 if (rs6000_overloaded_builtin_p (d->code))
17441 {
17442 if (! (type = opaque_ftype_opaque_opaque_opaque))
17443 type = opaque_ftype_opaque_opaque_opaque
17444 = build_function_type_list (opaque_V4SI_type_node,
17445 opaque_V4SI_type_node,
17446 opaque_V4SI_type_node,
17447 opaque_V4SI_type_node,
17448 NULL_TREE);
17449 }
17450 else
17451 {
17452 enum insn_code icode = d->icode;
17453 if (d->name == 0)
17454 {
17455 if (TARGET_DEBUG_BUILTIN)
17456 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17457 (long unsigned)i);
17458
17459 continue;
17460 }
17461
17462 if (icode == CODE_FOR_nothing)
17463 {
17464 if (TARGET_DEBUG_BUILTIN)
17465 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17466 d->name);
17467
17468 continue;
17469 }
17470
17471 type = builtin_function_type (insn_data[icode].operand[0].mode,
17472 insn_data[icode].operand[1].mode,
17473 insn_data[icode].operand[2].mode,
17474 insn_data[icode].operand[3].mode,
17475 d->code, d->name);
17476 }
17477
17478 def_builtin (d->name, type, d->code);
17479 }
17480
17481 /* Add the binary operators. */
17482 d = bdesc_2arg;
17483 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17484 {
17485 machine_mode mode0, mode1, mode2;
17486 tree type;
17487 HOST_WIDE_INT mask = d->mask;
17488
17489 if ((mask & builtin_mask) != mask)
17490 {
17491 if (TARGET_DEBUG_BUILTIN)
17492 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17493 continue;
17494 }
17495
17496 if (rs6000_overloaded_builtin_p (d->code))
17497 {
17498 if (! (type = opaque_ftype_opaque_opaque))
17499 type = opaque_ftype_opaque_opaque
17500 = build_function_type_list (opaque_V4SI_type_node,
17501 opaque_V4SI_type_node,
17502 opaque_V4SI_type_node,
17503 NULL_TREE);
17504 }
17505 else
17506 {
17507 enum insn_code icode = d->icode;
17508 if (d->name == 0)
17509 {
17510 if (TARGET_DEBUG_BUILTIN)
17511 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17512 (long unsigned)i);
17513
17514 continue;
17515 }
17516
17517 if (icode == CODE_FOR_nothing)
17518 {
17519 if (TARGET_DEBUG_BUILTIN)
17520 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17521 d->name);
17522
17523 continue;
17524 }
17525
17526 mode0 = insn_data[icode].operand[0].mode;
17527 mode1 = insn_data[icode].operand[1].mode;
17528 mode2 = insn_data[icode].operand[2].mode;
17529
17530 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17531 d->code, d->name);
17532 }
17533
17534 def_builtin (d->name, type, d->code);
17535 }
17536
17537 /* Add the simple unary operators. */
17538 d = bdesc_1arg;
17539 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17540 {
17541 machine_mode mode0, mode1;
17542 tree type;
17543 HOST_WIDE_INT mask = d->mask;
17544
17545 if ((mask & builtin_mask) != mask)
17546 {
17547 if (TARGET_DEBUG_BUILTIN)
17548 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17549 continue;
17550 }
17551
17552 if (rs6000_overloaded_builtin_p (d->code))
17553 {
17554 if (! (type = opaque_ftype_opaque))
17555 type = opaque_ftype_opaque
17556 = build_function_type_list (opaque_V4SI_type_node,
17557 opaque_V4SI_type_node,
17558 NULL_TREE);
17559 }
17560 else
17561 {
17562 enum insn_code icode = d->icode;
17563 if (d->name == 0)
17564 {
17565 if (TARGET_DEBUG_BUILTIN)
17566 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17567 (long unsigned)i);
17568
17569 continue;
17570 }
17571
17572 if (icode == CODE_FOR_nothing)
17573 {
17574 if (TARGET_DEBUG_BUILTIN)
17575 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17576 d->name);
17577
17578 continue;
17579 }
17580
17581 mode0 = insn_data[icode].operand[0].mode;
17582 mode1 = insn_data[icode].operand[1].mode;
17583
17584 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17585 d->code, d->name);
17586 }
17587
17588 def_builtin (d->name, type, d->code);
17589 }
17590
17591 /* Add the simple no-argument operators. */
17592 d = bdesc_0arg;
17593 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17594 {
17595 machine_mode mode0;
17596 tree type;
17597 HOST_WIDE_INT mask = d->mask;
17598
17599 if ((mask & builtin_mask) != mask)
17600 {
17601 if (TARGET_DEBUG_BUILTIN)
17602 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17603 continue;
17604 }
17605 if (rs6000_overloaded_builtin_p (d->code))
17606 {
17607 if (!opaque_ftype_opaque)
17608 opaque_ftype_opaque
17609 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17610 type = opaque_ftype_opaque;
17611 }
17612 else
17613 {
17614 enum insn_code icode = d->icode;
17615 if (d->name == 0)
17616 {
17617 if (TARGET_DEBUG_BUILTIN)
17618 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17619 (long unsigned) i);
17620 continue;
17621 }
17622 if (icode == CODE_FOR_nothing)
17623 {
17624 if (TARGET_DEBUG_BUILTIN)
17625 fprintf (stderr,
17626 "rs6000_builtin, skip no-argument %s (no code)\n",
17627 d->name);
17628 continue;
17629 }
17630 mode0 = insn_data[icode].operand[0].mode;
17631 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17632 d->code, d->name);
17633 }
17634 def_builtin (d->name, type, d->code);
17635 }
17636 }
17637
17638 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17639 static void
17640 init_float128_ibm (machine_mode mode)
17641 {
17642 if (!TARGET_XL_COMPAT)
17643 {
17644 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17645 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17646 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17647 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17648
17649 if (!TARGET_HARD_FLOAT)
17650 {
17651 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17652 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17653 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17654 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17655 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17656 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17657 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17658 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17659
17660 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17661 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17662 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17663 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17664 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17665 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17666 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17667 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17668 }
17669 }
17670 else
17671 {
17672 set_optab_libfunc (add_optab, mode, "_xlqadd");
17673 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17674 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17675 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17676 }
17677
17678 /* Add various conversions for IFmode to use the traditional TFmode
17679 names. */
17680 if (mode == IFmode)
17681 {
17682 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
17683 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
17684 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
17685 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
17686 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
17687 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
17688
17689 if (TARGET_POWERPC64)
17690 {
17691 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17692 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17693 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17694 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17695 }
17696 }
17697 }
17698
17699 /* Create a decl for either complex long double multiply or complex long double
17700 divide when long double is IEEE 128-bit floating point. We can't use
17701 __multc3 and __divtc3 because the original long double using IBM extended
17702 double used those names. The complex multiply/divide functions are encoded
17703 as builtin functions with a complex result and 4 scalar inputs. */
17704
17705 static void
17706 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17707 {
17708 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17709 name, NULL_TREE);
17710
17711 set_builtin_decl (fncode, fndecl, true);
17712
17713 if (TARGET_DEBUG_BUILTIN)
17714 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17715
17716 return;
17717 }
17718
17719 /* Set up IEEE 128-bit floating point routines. Use different names if the
17720 arguments can be passed in a vector register. The historical PowerPC
17721 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17722 continue to use that if we aren't using vector registers to pass IEEE
17723 128-bit floating point. */
17724
17725 static void
17726 init_float128_ieee (machine_mode mode)
17727 {
17728 if (FLOAT128_VECTOR_P (mode))
17729 {
17730 static bool complex_muldiv_init_p = false;
17731
17732 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17733 we have clone or target attributes, this will be called a second
17734 time. We want to create the built-in function only once. */
17735 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17736 {
17737 complex_muldiv_init_p = true;
17738 built_in_function fncode_mul =
17739 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17740 - MIN_MODE_COMPLEX_FLOAT);
17741 built_in_function fncode_div =
17742 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17743 - MIN_MODE_COMPLEX_FLOAT);
17744
17745 tree fntype = build_function_type_list (complex_long_double_type_node,
17746 long_double_type_node,
17747 long_double_type_node,
17748 long_double_type_node,
17749 long_double_type_node,
17750 NULL_TREE);
17751
17752 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17753 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17754 }
17755
17756 set_optab_libfunc (add_optab, mode, "__addkf3");
17757 set_optab_libfunc (sub_optab, mode, "__subkf3");
17758 set_optab_libfunc (neg_optab, mode, "__negkf2");
17759 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17760 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17761 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17762 set_optab_libfunc (abs_optab, mode, "__abskf2");
17763 set_optab_libfunc (powi_optab, mode, "__powikf2");
17764
17765 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17766 set_optab_libfunc (ne_optab, mode, "__nekf2");
17767 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17768 set_optab_libfunc (ge_optab, mode, "__gekf2");
17769 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17770 set_optab_libfunc (le_optab, mode, "__lekf2");
17771 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17772
17773 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17774 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17775 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17776 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17777
17778 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17779 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17780 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17781
17782 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17783 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17784 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17785
17786 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
17787 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
17788 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
17789 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
17790 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
17791 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
17792
17793 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17794 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17795 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17796 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17797
17798 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17799 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17800 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17801 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17802
17803 if (TARGET_POWERPC64)
17804 {
17805 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17806 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17807 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17808 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17809 }
17810 }
17811
17812 else
17813 {
17814 set_optab_libfunc (add_optab, mode, "_q_add");
17815 set_optab_libfunc (sub_optab, mode, "_q_sub");
17816 set_optab_libfunc (neg_optab, mode, "_q_neg");
17817 set_optab_libfunc (smul_optab, mode, "_q_mul");
17818 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17819 if (TARGET_PPC_GPOPT)
17820 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17821
17822 set_optab_libfunc (eq_optab, mode, "_q_feq");
17823 set_optab_libfunc (ne_optab, mode, "_q_fne");
17824 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17825 set_optab_libfunc (ge_optab, mode, "_q_fge");
17826 set_optab_libfunc (lt_optab, mode, "_q_flt");
17827 set_optab_libfunc (le_optab, mode, "_q_fle");
17828
17829 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17830 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17831 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17832 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17833 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17834 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17835 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17836 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
17837 }
17838 }
17839
17840 static void
17841 rs6000_init_libfuncs (void)
17842 {
17843 /* __float128 support. */
17844 if (TARGET_FLOAT128_TYPE)
17845 {
17846 init_float128_ibm (IFmode);
17847 init_float128_ieee (KFmode);
17848 }
17849
17850 /* AIX/Darwin/64-bit Linux quad floating point routines. */
17851 if (TARGET_LONG_DOUBLE_128)
17852 {
17853 if (!TARGET_IEEEQUAD)
17854 init_float128_ibm (TFmode);
17855
17856 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
17857 else
17858 init_float128_ieee (TFmode);
17859 }
17860 }
17861
17862 /* Emit a potentially record-form instruction, setting DST from SRC.
17863 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17864 signed comparison of DST with zero. If DOT is 1, the generated RTL
17865 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17866 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17867 a separate COMPARE. */
17868
17869 void
17870 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17871 {
17872 if (dot == 0)
17873 {
17874 emit_move_insn (dst, src);
17875 return;
17876 }
17877
17878 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17879 {
17880 emit_move_insn (dst, src);
17881 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17882 return;
17883 }
17884
17885 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17886 if (dot == 1)
17887 {
17888 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17889 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17890 }
17891 else
17892 {
17893 rtx set = gen_rtx_SET (dst, src);
17894 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17895 }
17896 }
17897
17898 \f
17899 /* A validation routine: say whether CODE, a condition code, and MODE
17900 match. The other alternatives either don't make sense or should
17901 never be generated. */
17902
17903 void
17904 validate_condition_mode (enum rtx_code code, machine_mode mode)
17905 {
17906 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
17907 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
17908 && GET_MODE_CLASS (mode) == MODE_CC);
17909
17910 /* These don't make sense. */
17911 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
17912 || mode != CCUNSmode);
17913
17914 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
17915 || mode == CCUNSmode);
17916
17917 gcc_assert (mode == CCFPmode
17918 || (code != ORDERED && code != UNORDERED
17919 && code != UNEQ && code != LTGT
17920 && code != UNGT && code != UNLT
17921 && code != UNGE && code != UNLE));
17922
17923 /* These should never be generated except for
17924 flag_finite_math_only. */
17925 gcc_assert (mode != CCFPmode
17926 || flag_finite_math_only
17927 || (code != LE && code != GE
17928 && code != UNEQ && code != LTGT
17929 && code != UNGT && code != UNLT));
17930
17931 /* These are invalid; the information is not there. */
17932 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17933 }
17934
17935 \f
17936 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17937 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17938 not zero, store there the bit offset (counted from the right) where
17939 the single stretch of 1 bits begins; and similarly for B, the bit
17940 offset where it ends. */
17941
17942 bool
17943 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17944 {
17945 unsigned HOST_WIDE_INT val = INTVAL (mask);
17946 unsigned HOST_WIDE_INT bit;
17947 int nb, ne;
17948 int n = GET_MODE_PRECISION (mode);
17949
17950 if (mode != DImode && mode != SImode)
17951 return false;
17952
17953 if (INTVAL (mask) >= 0)
17954 {
17955 bit = val & -val;
17956 ne = exact_log2 (bit);
17957 nb = exact_log2 (val + bit);
17958 }
17959 else if (val + 1 == 0)
17960 {
17961 nb = n;
17962 ne = 0;
17963 }
17964 else if (val & 1)
17965 {
17966 val = ~val;
17967 bit = val & -val;
17968 nb = exact_log2 (bit);
17969 ne = exact_log2 (val + bit);
17970 }
17971 else
17972 {
17973 bit = val & -val;
17974 ne = exact_log2 (bit);
17975 if (val + bit == 0)
17976 nb = n;
17977 else
17978 nb = 0;
17979 }
17980
17981 nb--;
17982
17983 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17984 return false;
17985
17986 if (b)
17987 *b = nb;
17988 if (e)
17989 *e = ne;
17990
17991 return true;
17992 }
17993
17994 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17995 or rldicr instruction, to implement an AND with it in mode MODE. */
17996
17997 bool
17998 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17999 {
18000 int nb, ne;
18001
18002 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18003 return false;
18004
18005 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18006 does not wrap. */
18007 if (mode == DImode)
18008 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18009
18010 /* For SImode, rlwinm can do everything. */
18011 if (mode == SImode)
18012 return (nb < 32 && ne < 32);
18013
18014 return false;
18015 }
18016
18017 /* Return the instruction template for an AND with mask in mode MODE, with
18018 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18019
18020 const char *
18021 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18022 {
18023 int nb, ne;
18024
18025 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18026 gcc_unreachable ();
18027
18028 if (mode == DImode && ne == 0)
18029 {
18030 operands[3] = GEN_INT (63 - nb);
18031 if (dot)
18032 return "rldicl. %0,%1,0,%3";
18033 return "rldicl %0,%1,0,%3";
18034 }
18035
18036 if (mode == DImode && nb == 63)
18037 {
18038 operands[3] = GEN_INT (63 - ne);
18039 if (dot)
18040 return "rldicr. %0,%1,0,%3";
18041 return "rldicr %0,%1,0,%3";
18042 }
18043
18044 if (nb < 32 && ne < 32)
18045 {
18046 operands[3] = GEN_INT (31 - nb);
18047 operands[4] = GEN_INT (31 - ne);
18048 if (dot)
18049 return "rlwinm. %0,%1,0,%3,%4";
18050 return "rlwinm %0,%1,0,%3,%4";
18051 }
18052
18053 gcc_unreachable ();
18054 }
18055
18056 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18057 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18058 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18059
18060 bool
18061 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18062 {
18063 int nb, ne;
18064
18065 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18066 return false;
18067
18068 int n = GET_MODE_PRECISION (mode);
18069 int sh = -1;
18070
18071 if (CONST_INT_P (XEXP (shift, 1)))
18072 {
18073 sh = INTVAL (XEXP (shift, 1));
18074 if (sh < 0 || sh >= n)
18075 return false;
18076 }
18077
18078 rtx_code code = GET_CODE (shift);
18079
18080 /* Convert any shift by 0 to a rotate, to simplify below code. */
18081 if (sh == 0)
18082 code = ROTATE;
18083
18084 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18085 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18086 code = ASHIFT;
18087 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18088 {
18089 code = LSHIFTRT;
18090 sh = n - sh;
18091 }
18092
18093 /* DImode rotates need rld*. */
18094 if (mode == DImode && code == ROTATE)
18095 return (nb == 63 || ne == 0 || ne == sh);
18096
18097 /* SImode rotates need rlw*. */
18098 if (mode == SImode && code == ROTATE)
18099 return (nb < 32 && ne < 32 && sh < 32);
18100
18101 /* Wrap-around masks are only okay for rotates. */
18102 if (ne > nb)
18103 return false;
18104
18105 /* Variable shifts are only okay for rotates. */
18106 if (sh < 0)
18107 return false;
18108
18109 /* Don't allow ASHIFT if the mask is wrong for that. */
18110 if (code == ASHIFT && ne < sh)
18111 return false;
18112
18113 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18114 if the mask is wrong for that. */
18115 if (nb < 32 && ne < 32 && sh < 32
18116 && !(code == LSHIFTRT && nb >= 32 - sh))
18117 return true;
18118
18119 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18120 if the mask is wrong for that. */
18121 if (code == LSHIFTRT)
18122 sh = 64 - sh;
18123 if (nb == 63 || ne == 0 || ne == sh)
18124 return !(code == LSHIFTRT && nb >= sh);
18125
18126 return false;
18127 }
18128
18129 /* Return the instruction template for a shift with mask in mode MODE, with
18130 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18131
18132 const char *
18133 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18134 {
18135 int nb, ne;
18136
18137 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18138 gcc_unreachable ();
18139
18140 if (mode == DImode && ne == 0)
18141 {
18142 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18143 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18144 operands[3] = GEN_INT (63 - nb);
18145 if (dot)
18146 return "rld%I2cl. %0,%1,%2,%3";
18147 return "rld%I2cl %0,%1,%2,%3";
18148 }
18149
18150 if (mode == DImode && nb == 63)
18151 {
18152 operands[3] = GEN_INT (63 - ne);
18153 if (dot)
18154 return "rld%I2cr. %0,%1,%2,%3";
18155 return "rld%I2cr %0,%1,%2,%3";
18156 }
18157
18158 if (mode == DImode
18159 && GET_CODE (operands[4]) != LSHIFTRT
18160 && CONST_INT_P (operands[2])
18161 && ne == INTVAL (operands[2]))
18162 {
18163 operands[3] = GEN_INT (63 - nb);
18164 if (dot)
18165 return "rld%I2c. %0,%1,%2,%3";
18166 return "rld%I2c %0,%1,%2,%3";
18167 }
18168
18169 if (nb < 32 && ne < 32)
18170 {
18171 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18172 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18173 operands[3] = GEN_INT (31 - nb);
18174 operands[4] = GEN_INT (31 - ne);
18175 /* This insn can also be a 64-bit rotate with mask that really makes
18176 it just a shift right (with mask); the %h below are to adjust for
18177 that situation (shift count is >= 32 in that case). */
18178 if (dot)
18179 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18180 return "rlw%I2nm %0,%1,%h2,%3,%4";
18181 }
18182
18183 gcc_unreachable ();
18184 }
18185
18186 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18187 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18188 ASHIFT, or LSHIFTRT) in mode MODE. */
18189
18190 bool
18191 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18192 {
18193 int nb, ne;
18194
18195 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18196 return false;
18197
18198 int n = GET_MODE_PRECISION (mode);
18199
18200 int sh = INTVAL (XEXP (shift, 1));
18201 if (sh < 0 || sh >= n)
18202 return false;
18203
18204 rtx_code code = GET_CODE (shift);
18205
18206 /* Convert any shift by 0 to a rotate, to simplify below code. */
18207 if (sh == 0)
18208 code = ROTATE;
18209
18210 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18211 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18212 code = ASHIFT;
18213 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18214 {
18215 code = LSHIFTRT;
18216 sh = n - sh;
18217 }
18218
18219 /* DImode rotates need rldimi. */
18220 if (mode == DImode && code == ROTATE)
18221 return (ne == sh);
18222
18223 /* SImode rotates need rlwimi. */
18224 if (mode == SImode && code == ROTATE)
18225 return (nb < 32 && ne < 32 && sh < 32);
18226
18227 /* Wrap-around masks are only okay for rotates. */
18228 if (ne > nb)
18229 return false;
18230
18231 /* Don't allow ASHIFT if the mask is wrong for that. */
18232 if (code == ASHIFT && ne < sh)
18233 return false;
18234
18235 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18236 if the mask is wrong for that. */
18237 if (nb < 32 && ne < 32 && sh < 32
18238 && !(code == LSHIFTRT && nb >= 32 - sh))
18239 return true;
18240
18241 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18242 if the mask is wrong for that. */
18243 if (code == LSHIFTRT)
18244 sh = 64 - sh;
18245 if (ne == sh)
18246 return !(code == LSHIFTRT && nb >= sh);
18247
18248 return false;
18249 }
18250
18251 /* Return the instruction template for an insert with mask in mode MODE, with
18252 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18253
18254 const char *
18255 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18256 {
18257 int nb, ne;
18258
18259 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18260 gcc_unreachable ();
18261
18262 /* Prefer rldimi because rlwimi is cracked. */
18263 if (TARGET_POWERPC64
18264 && (!dot || mode == DImode)
18265 && GET_CODE (operands[4]) != LSHIFTRT
18266 && ne == INTVAL (operands[2]))
18267 {
18268 operands[3] = GEN_INT (63 - nb);
18269 if (dot)
18270 return "rldimi. %0,%1,%2,%3";
18271 return "rldimi %0,%1,%2,%3";
18272 }
18273
18274 if (nb < 32 && ne < 32)
18275 {
18276 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18277 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18278 operands[3] = GEN_INT (31 - nb);
18279 operands[4] = GEN_INT (31 - ne);
18280 if (dot)
18281 return "rlwimi. %0,%1,%2,%3,%4";
18282 return "rlwimi %0,%1,%2,%3,%4";
18283 }
18284
18285 gcc_unreachable ();
18286 }
18287
18288 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18289 using two machine instructions. */
18290
18291 bool
18292 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18293 {
18294 /* There are two kinds of AND we can handle with two insns:
18295 1) those we can do with two rl* insn;
18296 2) ori[s];xori[s].
18297
18298 We do not handle that last case yet. */
18299
18300 /* If there is just one stretch of ones, we can do it. */
18301 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18302 return true;
18303
18304 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18305 one insn, we can do the whole thing with two. */
18306 unsigned HOST_WIDE_INT val = INTVAL (c);
18307 unsigned HOST_WIDE_INT bit1 = val & -val;
18308 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18309 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18310 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18311 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18312 }
18313
18314 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18315 If EXPAND is true, split rotate-and-mask instructions we generate to
18316 their constituent parts as well (this is used during expand); if DOT
18317 is 1, make the last insn a record-form instruction clobbering the
18318 destination GPR and setting the CC reg (from operands[3]); if 2, set
18319 that GPR as well as the CC reg. */
18320
18321 void
18322 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18323 {
18324 gcc_assert (!(expand && dot));
18325
18326 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18327
18328 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18329 shift right. This generates better code than doing the masks without
18330 shifts, or shifting first right and then left. */
18331 int nb, ne;
18332 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18333 {
18334 gcc_assert (mode == DImode);
18335
18336 int shift = 63 - nb;
18337 if (expand)
18338 {
18339 rtx tmp1 = gen_reg_rtx (DImode);
18340 rtx tmp2 = gen_reg_rtx (DImode);
18341 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18342 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18343 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18344 }
18345 else
18346 {
18347 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18348 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18349 emit_move_insn (operands[0], tmp);
18350 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18351 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18352 }
18353 return;
18354 }
18355
18356 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18357 that does the rest. */
18358 unsigned HOST_WIDE_INT bit1 = val & -val;
18359 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18360 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18361 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18362
18363 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18364 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18365
18366 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18367
18368 /* Two "no-rotate"-and-mask instructions, for SImode. */
18369 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18370 {
18371 gcc_assert (mode == SImode);
18372
18373 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18374 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18375 emit_move_insn (reg, tmp);
18376 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18377 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18378 return;
18379 }
18380
18381 gcc_assert (mode == DImode);
18382
18383 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18384 insns; we have to do the first in SImode, because it wraps. */
18385 if (mask2 <= 0xffffffff
18386 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18387 {
18388 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18389 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18390 GEN_INT (mask1));
18391 rtx reg_low = gen_lowpart (SImode, reg);
18392 emit_move_insn (reg_low, tmp);
18393 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18394 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18395 return;
18396 }
18397
18398 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18399 at the top end), rotate back and clear the other hole. */
18400 int right = exact_log2 (bit3);
18401 int left = 64 - right;
18402
18403 /* Rotate the mask too. */
18404 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18405
18406 if (expand)
18407 {
18408 rtx tmp1 = gen_reg_rtx (DImode);
18409 rtx tmp2 = gen_reg_rtx (DImode);
18410 rtx tmp3 = gen_reg_rtx (DImode);
18411 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18412 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18413 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18414 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18415 }
18416 else
18417 {
18418 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18419 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18420 emit_move_insn (operands[0], tmp);
18421 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18422 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18423 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18424 }
18425 }
18426 \f
18427 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18428 for lfq and stfq insns iff the registers are hard registers. */
18429
18430 int
18431 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18432 {
18433 /* We might have been passed a SUBREG. */
18434 if (!REG_P (reg1) || !REG_P (reg2))
18435 return 0;
18436
18437 /* We might have been passed non floating point registers. */
18438 if (!FP_REGNO_P (REGNO (reg1))
18439 || !FP_REGNO_P (REGNO (reg2)))
18440 return 0;
18441
18442 return (REGNO (reg1) == REGNO (reg2) - 1);
18443 }
18444
18445 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18446 addr1 and addr2 must be in consecutive memory locations
18447 (addr2 == addr1 + 8). */
18448
18449 int
18450 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18451 {
18452 rtx addr1, addr2;
18453 unsigned int reg1, reg2;
18454 int offset1, offset2;
18455
18456 /* The mems cannot be volatile. */
18457 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18458 return 0;
18459
18460 addr1 = XEXP (mem1, 0);
18461 addr2 = XEXP (mem2, 0);
18462
18463 /* Extract an offset (if used) from the first addr. */
18464 if (GET_CODE (addr1) == PLUS)
18465 {
18466 /* If not a REG, return zero. */
18467 if (!REG_P (XEXP (addr1, 0)))
18468 return 0;
18469 else
18470 {
18471 reg1 = REGNO (XEXP (addr1, 0));
18472 /* The offset must be constant! */
18473 if (!CONST_INT_P (XEXP (addr1, 1)))
18474 return 0;
18475 offset1 = INTVAL (XEXP (addr1, 1));
18476 }
18477 }
18478 else if (!REG_P (addr1))
18479 return 0;
18480 else
18481 {
18482 reg1 = REGNO (addr1);
18483 /* This was a simple (mem (reg)) expression. Offset is 0. */
18484 offset1 = 0;
18485 }
18486
18487 /* And now for the second addr. */
18488 if (GET_CODE (addr2) == PLUS)
18489 {
18490 /* If not a REG, return zero. */
18491 if (!REG_P (XEXP (addr2, 0)))
18492 return 0;
18493 else
18494 {
18495 reg2 = REGNO (XEXP (addr2, 0));
18496 /* The offset must be constant. */
18497 if (!CONST_INT_P (XEXP (addr2, 1)))
18498 return 0;
18499 offset2 = INTVAL (XEXP (addr2, 1));
18500 }
18501 }
18502 else if (!REG_P (addr2))
18503 return 0;
18504 else
18505 {
18506 reg2 = REGNO (addr2);
18507 /* This was a simple (mem (reg)) expression. Offset is 0. */
18508 offset2 = 0;
18509 }
18510
18511 /* Both of these must have the same base register. */
18512 if (reg1 != reg2)
18513 return 0;
18514
18515 /* The offset for the second addr must be 8 more than the first addr. */
18516 if (offset2 != offset1 + 8)
18517 return 0;
18518
18519 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18520 instructions. */
18521 return 1;
18522 }
18523 \f
18524 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18525 need to use DDmode, in all other cases we can use the same mode. */
18526 static machine_mode
18527 rs6000_secondary_memory_needed_mode (machine_mode mode)
18528 {
18529 if (lra_in_progress && mode == SDmode)
18530 return DDmode;
18531 return mode;
18532 }
18533
18534 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18535 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18536 only work on the traditional altivec registers, note if an altivec register
18537 was chosen. */
18538
18539 static enum rs6000_reg_type
18540 register_to_reg_type (rtx reg, bool *is_altivec)
18541 {
18542 HOST_WIDE_INT regno;
18543 enum reg_class rclass;
18544
18545 if (SUBREG_P (reg))
18546 reg = SUBREG_REG (reg);
18547
18548 if (!REG_P (reg))
18549 return NO_REG_TYPE;
18550
18551 regno = REGNO (reg);
18552 if (!HARD_REGISTER_NUM_P (regno))
18553 {
18554 if (!lra_in_progress && !reload_completed)
18555 return PSEUDO_REG_TYPE;
18556
18557 regno = true_regnum (reg);
18558 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
18559 return PSEUDO_REG_TYPE;
18560 }
18561
18562 gcc_assert (regno >= 0);
18563
18564 if (is_altivec && ALTIVEC_REGNO_P (regno))
18565 *is_altivec = true;
18566
18567 rclass = rs6000_regno_regclass[regno];
18568 return reg_class_to_reg_type[(int)rclass];
18569 }
18570
18571 /* Helper function to return the cost of adding a TOC entry address. */
18572
18573 static inline int
18574 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18575 {
18576 int ret;
18577
18578 if (TARGET_CMODEL != CMODEL_SMALL)
18579 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18580
18581 else
18582 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18583
18584 return ret;
18585 }
18586
18587 /* Helper function for rs6000_secondary_reload to determine whether the memory
18588 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18589 needs reloading. Return negative if the memory is not handled by the memory
18590 helper functions and to try a different reload method, 0 if no additional
18591 instructions are need, and positive to give the extra cost for the
18592 memory. */
18593
18594 static int
18595 rs6000_secondary_reload_memory (rtx addr,
18596 enum reg_class rclass,
18597 machine_mode mode)
18598 {
18599 int extra_cost = 0;
18600 rtx reg, and_arg, plus_arg0, plus_arg1;
18601 addr_mask_type addr_mask;
18602 const char *type = NULL;
18603 const char *fail_msg = NULL;
18604
18605 if (GPR_REG_CLASS_P (rclass))
18606 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18607
18608 else if (rclass == FLOAT_REGS)
18609 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18610
18611 else if (rclass == ALTIVEC_REGS)
18612 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18613
18614 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18615 else if (rclass == VSX_REGS)
18616 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18617 & ~RELOAD_REG_AND_M16);
18618
18619 /* If the register allocator hasn't made up its mind yet on the register
18620 class to use, settle on defaults to use. */
18621 else if (rclass == NO_REGS)
18622 {
18623 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18624 & ~RELOAD_REG_AND_M16);
18625
18626 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18627 addr_mask &= ~(RELOAD_REG_INDEXED
18628 | RELOAD_REG_PRE_INCDEC
18629 | RELOAD_REG_PRE_MODIFY);
18630 }
18631
18632 else
18633 addr_mask = 0;
18634
18635 /* If the register isn't valid in this register class, just return now. */
18636 if ((addr_mask & RELOAD_REG_VALID) == 0)
18637 {
18638 if (TARGET_DEBUG_ADDR)
18639 {
18640 fprintf (stderr,
18641 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18642 "not valid in class\n",
18643 GET_MODE_NAME (mode), reg_class_names[rclass]);
18644 debug_rtx (addr);
18645 }
18646
18647 return -1;
18648 }
18649
18650 switch (GET_CODE (addr))
18651 {
18652 /* Does the register class supports auto update forms for this mode? We
18653 don't need a scratch register, since the powerpc only supports
18654 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18655 case PRE_INC:
18656 case PRE_DEC:
18657 reg = XEXP (addr, 0);
18658 if (!base_reg_operand (addr, GET_MODE (reg)))
18659 {
18660 fail_msg = "no base register #1";
18661 extra_cost = -1;
18662 }
18663
18664 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18665 {
18666 extra_cost = 1;
18667 type = "update";
18668 }
18669 break;
18670
18671 case PRE_MODIFY:
18672 reg = XEXP (addr, 0);
18673 plus_arg1 = XEXP (addr, 1);
18674 if (!base_reg_operand (reg, GET_MODE (reg))
18675 || GET_CODE (plus_arg1) != PLUS
18676 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18677 {
18678 fail_msg = "bad PRE_MODIFY";
18679 extra_cost = -1;
18680 }
18681
18682 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18683 {
18684 extra_cost = 1;
18685 type = "update";
18686 }
18687 break;
18688
18689 /* Do we need to simulate AND -16 to clear the bottom address bits used
18690 in VMX load/stores? Only allow the AND for vector sizes. */
18691 case AND:
18692 and_arg = XEXP (addr, 0);
18693 if (GET_MODE_SIZE (mode) != 16
18694 || !CONST_INT_P (XEXP (addr, 1))
18695 || INTVAL (XEXP (addr, 1)) != -16)
18696 {
18697 fail_msg = "bad Altivec AND #1";
18698 extra_cost = -1;
18699 }
18700
18701 if (rclass != ALTIVEC_REGS)
18702 {
18703 if (legitimate_indirect_address_p (and_arg, false))
18704 extra_cost = 1;
18705
18706 else if (legitimate_indexed_address_p (and_arg, false))
18707 extra_cost = 2;
18708
18709 else
18710 {
18711 fail_msg = "bad Altivec AND #2";
18712 extra_cost = -1;
18713 }
18714
18715 type = "and";
18716 }
18717 break;
18718
18719 /* If this is an indirect address, make sure it is a base register. */
18720 case REG:
18721 case SUBREG:
18722 if (!legitimate_indirect_address_p (addr, false))
18723 {
18724 extra_cost = 1;
18725 type = "move";
18726 }
18727 break;
18728
18729 /* If this is an indexed address, make sure the register class can handle
18730 indexed addresses for this mode. */
18731 case PLUS:
18732 plus_arg0 = XEXP (addr, 0);
18733 plus_arg1 = XEXP (addr, 1);
18734
18735 /* (plus (plus (reg) (constant)) (constant)) is generated during
18736 push_reload processing, so handle it now. */
18737 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18738 {
18739 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18740 {
18741 extra_cost = 1;
18742 type = "offset";
18743 }
18744 }
18745
18746 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18747 push_reload processing, so handle it now. */
18748 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18749 {
18750 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18751 {
18752 extra_cost = 1;
18753 type = "indexed #2";
18754 }
18755 }
18756
18757 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18758 {
18759 fail_msg = "no base register #2";
18760 extra_cost = -1;
18761 }
18762
18763 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18764 {
18765 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18766 || !legitimate_indexed_address_p (addr, false))
18767 {
18768 extra_cost = 1;
18769 type = "indexed";
18770 }
18771 }
18772
18773 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18774 && CONST_INT_P (plus_arg1))
18775 {
18776 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18777 {
18778 extra_cost = 1;
18779 type = "vector d-form offset";
18780 }
18781 }
18782
18783 /* Make sure the register class can handle offset addresses. */
18784 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18785 {
18786 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18787 {
18788 extra_cost = 1;
18789 type = "offset #2";
18790 }
18791 }
18792
18793 else
18794 {
18795 fail_msg = "bad PLUS";
18796 extra_cost = -1;
18797 }
18798
18799 break;
18800
18801 case LO_SUM:
18802 /* Quad offsets are restricted and can't handle normal addresses. */
18803 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18804 {
18805 extra_cost = -1;
18806 type = "vector d-form lo_sum";
18807 }
18808
18809 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18810 {
18811 fail_msg = "bad LO_SUM";
18812 extra_cost = -1;
18813 }
18814
18815 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18816 {
18817 extra_cost = 1;
18818 type = "lo_sum";
18819 }
18820 break;
18821
18822 /* Static addresses need to create a TOC entry. */
18823 case CONST:
18824 case SYMBOL_REF:
18825 case LABEL_REF:
18826 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18827 {
18828 extra_cost = -1;
18829 type = "vector d-form lo_sum #2";
18830 }
18831
18832 else
18833 {
18834 type = "address";
18835 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18836 }
18837 break;
18838
18839 /* TOC references look like offsetable memory. */
18840 case UNSPEC:
18841 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
18842 {
18843 fail_msg = "bad UNSPEC";
18844 extra_cost = -1;
18845 }
18846
18847 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18848 {
18849 extra_cost = -1;
18850 type = "vector d-form lo_sum #3";
18851 }
18852
18853 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18854 {
18855 extra_cost = 1;
18856 type = "toc reference";
18857 }
18858 break;
18859
18860 default:
18861 {
18862 fail_msg = "bad address";
18863 extra_cost = -1;
18864 }
18865 }
18866
18867 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18868 {
18869 if (extra_cost < 0)
18870 fprintf (stderr,
18871 "rs6000_secondary_reload_memory error: mode = %s, "
18872 "class = %s, addr_mask = '%s', %s\n",
18873 GET_MODE_NAME (mode),
18874 reg_class_names[rclass],
18875 rs6000_debug_addr_mask (addr_mask, false),
18876 (fail_msg != NULL) ? fail_msg : "<bad address>");
18877
18878 else
18879 fprintf (stderr,
18880 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18881 "addr_mask = '%s', extra cost = %d, %s\n",
18882 GET_MODE_NAME (mode),
18883 reg_class_names[rclass],
18884 rs6000_debug_addr_mask (addr_mask, false),
18885 extra_cost,
18886 (type) ? type : "<none>");
18887
18888 debug_rtx (addr);
18889 }
18890
18891 return extra_cost;
18892 }
18893
18894 /* Helper function for rs6000_secondary_reload to return true if a move to a
18895 different register classe is really a simple move. */
18896
18897 static bool
18898 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18899 enum rs6000_reg_type from_type,
18900 machine_mode mode)
18901 {
18902 int size = GET_MODE_SIZE (mode);
18903
18904 /* Add support for various direct moves available. In this function, we only
18905 look at cases where we don't need any extra registers, and one or more
18906 simple move insns are issued. Originally small integers are not allowed
18907 in FPR/VSX registers. Single precision binary floating is not a simple
18908 move because we need to convert to the single precision memory layout.
18909 The 4-byte SDmode can be moved. TDmode values are disallowed since they
18910 need special direct move handling, which we do not support yet. */
18911 if (TARGET_DIRECT_MOVE
18912 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18913 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18914 {
18915 if (TARGET_POWERPC64)
18916 {
18917 /* ISA 2.07: MTVSRD or MVFVSRD. */
18918 if (size == 8)
18919 return true;
18920
18921 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
18922 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
18923 return true;
18924 }
18925
18926 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18927 if (TARGET_P8_VECTOR)
18928 {
18929 if (mode == SImode)
18930 return true;
18931
18932 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
18933 return true;
18934 }
18935
18936 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
18937 if (mode == SDmode)
18938 return true;
18939 }
18940
18941 /* Move to/from SPR. */
18942 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18943 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18944 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18945 return true;
18946
18947 return false;
18948 }
18949
18950 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18951 special direct moves that involve allocating an extra register, return the
18952 insn code of the helper function if there is such a function or
18953 CODE_FOR_nothing if not. */
18954
18955 static bool
18956 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18957 enum rs6000_reg_type from_type,
18958 machine_mode mode,
18959 secondary_reload_info *sri,
18960 bool altivec_p)
18961 {
18962 bool ret = false;
18963 enum insn_code icode = CODE_FOR_nothing;
18964 int cost = 0;
18965 int size = GET_MODE_SIZE (mode);
18966
18967 if (TARGET_POWERPC64 && size == 16)
18968 {
18969 /* Handle moving 128-bit values from GPRs to VSX point registers on
18970 ISA 2.07 (power8, power9) when running in 64-bit mode using
18971 XXPERMDI to glue the two 64-bit values back together. */
18972 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18973 {
18974 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18975 icode = reg_addr[mode].reload_vsx_gpr;
18976 }
18977
18978 /* Handle moving 128-bit values from VSX point registers to GPRs on
18979 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18980 bottom 64-bit value. */
18981 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18982 {
18983 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18984 icode = reg_addr[mode].reload_gpr_vsx;
18985 }
18986 }
18987
18988 else if (TARGET_POWERPC64 && mode == SFmode)
18989 {
18990 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18991 {
18992 cost = 3; /* xscvdpspn, mfvsrd, and. */
18993 icode = reg_addr[mode].reload_gpr_vsx;
18994 }
18995
18996 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18997 {
18998 cost = 2; /* mtvsrz, xscvspdpn. */
18999 icode = reg_addr[mode].reload_vsx_gpr;
19000 }
19001 }
19002
19003 else if (!TARGET_POWERPC64 && size == 8)
19004 {
19005 /* Handle moving 64-bit values from GPRs to floating point registers on
19006 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19007 32-bit values back together. Altivec register classes must be handled
19008 specially since a different instruction is used, and the secondary
19009 reload support requires a single instruction class in the scratch
19010 register constraint. However, right now TFmode is not allowed in
19011 Altivec registers, so the pattern will never match. */
19012 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19013 {
19014 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19015 icode = reg_addr[mode].reload_fpr_gpr;
19016 }
19017 }
19018
19019 if (icode != CODE_FOR_nothing)
19020 {
19021 ret = true;
19022 if (sri)
19023 {
19024 sri->icode = icode;
19025 sri->extra_cost = cost;
19026 }
19027 }
19028
19029 return ret;
19030 }
19031
19032 /* Return whether a move between two register classes can be done either
19033 directly (simple move) or via a pattern that uses a single extra temporary
19034 (using ISA 2.07's direct move in this case. */
19035
19036 static bool
19037 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19038 enum rs6000_reg_type from_type,
19039 machine_mode mode,
19040 secondary_reload_info *sri,
19041 bool altivec_p)
19042 {
19043 /* Fall back to load/store reloads if either type is not a register. */
19044 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19045 return false;
19046
19047 /* If we haven't allocated registers yet, assume the move can be done for the
19048 standard register types. */
19049 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19050 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19051 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19052 return true;
19053
19054 /* Moves to the same set of registers is a simple move for non-specialized
19055 registers. */
19056 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19057 return true;
19058
19059 /* Check whether a simple move can be done directly. */
19060 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19061 {
19062 if (sri)
19063 {
19064 sri->icode = CODE_FOR_nothing;
19065 sri->extra_cost = 0;
19066 }
19067 return true;
19068 }
19069
19070 /* Now check if we can do it in a few steps. */
19071 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19072 altivec_p);
19073 }
19074
19075 /* Inform reload about cases where moving X with a mode MODE to a register in
19076 RCLASS requires an extra scratch or immediate register. Return the class
19077 needed for the immediate register.
19078
19079 For VSX and Altivec, we may need a register to convert sp+offset into
19080 reg+sp.
19081
19082 For misaligned 64-bit gpr loads and stores we need a register to
19083 convert an offset address to indirect. */
19084
19085 static reg_class_t
19086 rs6000_secondary_reload (bool in_p,
19087 rtx x,
19088 reg_class_t rclass_i,
19089 machine_mode mode,
19090 secondary_reload_info *sri)
19091 {
19092 enum reg_class rclass = (enum reg_class) rclass_i;
19093 reg_class_t ret = ALL_REGS;
19094 enum insn_code icode;
19095 bool default_p = false;
19096 bool done_p = false;
19097
19098 /* Allow subreg of memory before/during reload. */
19099 bool memory_p = (MEM_P (x)
19100 || (!reload_completed && SUBREG_P (x)
19101 && MEM_P (SUBREG_REG (x))));
19102
19103 sri->icode = CODE_FOR_nothing;
19104 sri->t_icode = CODE_FOR_nothing;
19105 sri->extra_cost = 0;
19106 icode = ((in_p)
19107 ? reg_addr[mode].reload_load
19108 : reg_addr[mode].reload_store);
19109
19110 if (REG_P (x) || register_operand (x, mode))
19111 {
19112 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19113 bool altivec_p = (rclass == ALTIVEC_REGS);
19114 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19115
19116 if (!in_p)
19117 std::swap (to_type, from_type);
19118
19119 /* Can we do a direct move of some sort? */
19120 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19121 altivec_p))
19122 {
19123 icode = (enum insn_code)sri->icode;
19124 default_p = false;
19125 done_p = true;
19126 ret = NO_REGS;
19127 }
19128 }
19129
19130 /* Make sure 0.0 is not reloaded or forced into memory. */
19131 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19132 {
19133 ret = NO_REGS;
19134 default_p = false;
19135 done_p = true;
19136 }
19137
19138 /* If this is a scalar floating point value and we want to load it into the
19139 traditional Altivec registers, do it via a move via a traditional floating
19140 point register, unless we have D-form addressing. Also make sure that
19141 non-zero constants use a FPR. */
19142 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19143 && !mode_supports_vmx_dform (mode)
19144 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19145 && (memory_p || CONST_DOUBLE_P (x)))
19146 {
19147 ret = FLOAT_REGS;
19148 default_p = false;
19149 done_p = true;
19150 }
19151
19152 /* Handle reload of load/stores if we have reload helper functions. */
19153 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19154 {
19155 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19156 mode);
19157
19158 if (extra_cost >= 0)
19159 {
19160 done_p = true;
19161 ret = NO_REGS;
19162 if (extra_cost > 0)
19163 {
19164 sri->extra_cost = extra_cost;
19165 sri->icode = icode;
19166 }
19167 }
19168 }
19169
19170 /* Handle unaligned loads and stores of integer registers. */
19171 if (!done_p && TARGET_POWERPC64
19172 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19173 && memory_p
19174 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19175 {
19176 rtx addr = XEXP (x, 0);
19177 rtx off = address_offset (addr);
19178
19179 if (off != NULL_RTX)
19180 {
19181 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19182 unsigned HOST_WIDE_INT offset = INTVAL (off);
19183
19184 /* We need a secondary reload when our legitimate_address_p
19185 says the address is good (as otherwise the entire address
19186 will be reloaded), and the offset is not a multiple of
19187 four or we have an address wrap. Address wrap will only
19188 occur for LO_SUMs since legitimate_offset_address_p
19189 rejects addresses for 16-byte mems that will wrap. */
19190 if (GET_CODE (addr) == LO_SUM
19191 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19192 && ((offset & 3) != 0
19193 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19194 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19195 && (offset & 3) != 0))
19196 {
19197 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19198 if (in_p)
19199 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19200 : CODE_FOR_reload_di_load);
19201 else
19202 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19203 : CODE_FOR_reload_di_store);
19204 sri->extra_cost = 2;
19205 ret = NO_REGS;
19206 done_p = true;
19207 }
19208 else
19209 default_p = true;
19210 }
19211 else
19212 default_p = true;
19213 }
19214
19215 if (!done_p && !TARGET_POWERPC64
19216 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19217 && memory_p
19218 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19219 {
19220 rtx addr = XEXP (x, 0);
19221 rtx off = address_offset (addr);
19222
19223 if (off != NULL_RTX)
19224 {
19225 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19226 unsigned HOST_WIDE_INT offset = INTVAL (off);
19227
19228 /* We need a secondary reload when our legitimate_address_p
19229 says the address is good (as otherwise the entire address
19230 will be reloaded), and we have a wrap.
19231
19232 legitimate_lo_sum_address_p allows LO_SUM addresses to
19233 have any offset so test for wrap in the low 16 bits.
19234
19235 legitimate_offset_address_p checks for the range
19236 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19237 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19238 [0x7ff4,0x7fff] respectively, so test for the
19239 intersection of these ranges, [0x7ffc,0x7fff] and
19240 [0x7ff4,0x7ff7] respectively.
19241
19242 Note that the address we see here may have been
19243 manipulated by legitimize_reload_address. */
19244 if (GET_CODE (addr) == LO_SUM
19245 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19246 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19247 {
19248 if (in_p)
19249 sri->icode = CODE_FOR_reload_si_load;
19250 else
19251 sri->icode = CODE_FOR_reload_si_store;
19252 sri->extra_cost = 2;
19253 ret = NO_REGS;
19254 done_p = true;
19255 }
19256 else
19257 default_p = true;
19258 }
19259 else
19260 default_p = true;
19261 }
19262
19263 if (!done_p)
19264 default_p = true;
19265
19266 if (default_p)
19267 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19268
19269 gcc_assert (ret != ALL_REGS);
19270
19271 if (TARGET_DEBUG_ADDR)
19272 {
19273 fprintf (stderr,
19274 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19275 "mode = %s",
19276 reg_class_names[ret],
19277 in_p ? "true" : "false",
19278 reg_class_names[rclass],
19279 GET_MODE_NAME (mode));
19280
19281 if (reload_completed)
19282 fputs (", after reload", stderr);
19283
19284 if (!done_p)
19285 fputs (", done_p not set", stderr);
19286
19287 if (default_p)
19288 fputs (", default secondary reload", stderr);
19289
19290 if (sri->icode != CODE_FOR_nothing)
19291 fprintf (stderr, ", reload func = %s, extra cost = %d",
19292 insn_data[sri->icode].name, sri->extra_cost);
19293
19294 else if (sri->extra_cost > 0)
19295 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19296
19297 fputs ("\n", stderr);
19298 debug_rtx (x);
19299 }
19300
19301 return ret;
19302 }
19303
19304 /* Better tracing for rs6000_secondary_reload_inner. */
19305
19306 static void
19307 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19308 bool store_p)
19309 {
19310 rtx set, clobber;
19311
19312 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19313
19314 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19315 store_p ? "store" : "load");
19316
19317 if (store_p)
19318 set = gen_rtx_SET (mem, reg);
19319 else
19320 set = gen_rtx_SET (reg, mem);
19321
19322 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19323 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19324 }
19325
19326 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19327 ATTRIBUTE_NORETURN;
19328
19329 static void
19330 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19331 bool store_p)
19332 {
19333 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19334 gcc_unreachable ();
19335 }
19336
19337 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19338 reload helper functions. These were identified in
19339 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19340 reload, it calls the insns:
19341 reload_<RELOAD:mode>_<P:mptrsize>_store
19342 reload_<RELOAD:mode>_<P:mptrsize>_load
19343
19344 which in turn calls this function, to do whatever is necessary to create
19345 valid addresses. */
19346
19347 void
19348 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19349 {
19350 int regno = true_regnum (reg);
19351 machine_mode mode = GET_MODE (reg);
19352 addr_mask_type addr_mask;
19353 rtx addr;
19354 rtx new_addr;
19355 rtx op_reg, op0, op1;
19356 rtx and_op;
19357 rtx cc_clobber;
19358 rtvec rv;
19359
19360 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
19361 || !base_reg_operand (scratch, GET_MODE (scratch)))
19362 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19363
19364 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19365 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19366
19367 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19368 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19369
19370 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19371 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19372
19373 else
19374 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19375
19376 /* Make sure the mode is valid in this register class. */
19377 if ((addr_mask & RELOAD_REG_VALID) == 0)
19378 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19379
19380 if (TARGET_DEBUG_ADDR)
19381 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19382
19383 new_addr = addr = XEXP (mem, 0);
19384 switch (GET_CODE (addr))
19385 {
19386 /* Does the register class support auto update forms for this mode? If
19387 not, do the update now. We don't need a scratch register, since the
19388 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19389 case PRE_INC:
19390 case PRE_DEC:
19391 op_reg = XEXP (addr, 0);
19392 if (!base_reg_operand (op_reg, Pmode))
19393 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19394
19395 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19396 {
19397 int delta = GET_MODE_SIZE (mode);
19398 if (GET_CODE (addr) == PRE_DEC)
19399 delta = -delta;
19400 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
19401 new_addr = op_reg;
19402 }
19403 break;
19404
19405 case PRE_MODIFY:
19406 op0 = XEXP (addr, 0);
19407 op1 = XEXP (addr, 1);
19408 if (!base_reg_operand (op0, Pmode)
19409 || GET_CODE (op1) != PLUS
19410 || !rtx_equal_p (op0, XEXP (op1, 0)))
19411 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19412
19413 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19414 {
19415 emit_insn (gen_rtx_SET (op0, op1));
19416 new_addr = reg;
19417 }
19418 break;
19419
19420 /* Do we need to simulate AND -16 to clear the bottom address bits used
19421 in VMX load/stores? */
19422 case AND:
19423 op0 = XEXP (addr, 0);
19424 op1 = XEXP (addr, 1);
19425 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19426 {
19427 if (REG_P (op0) || SUBREG_P (op0))
19428 op_reg = op0;
19429
19430 else if (GET_CODE (op1) == PLUS)
19431 {
19432 emit_insn (gen_rtx_SET (scratch, op1));
19433 op_reg = scratch;
19434 }
19435
19436 else
19437 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19438
19439 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19440 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19441 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19442 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19443 new_addr = scratch;
19444 }
19445 break;
19446
19447 /* If this is an indirect address, make sure it is a base register. */
19448 case REG:
19449 case SUBREG:
19450 if (!base_reg_operand (addr, GET_MODE (addr)))
19451 {
19452 emit_insn (gen_rtx_SET (scratch, addr));
19453 new_addr = scratch;
19454 }
19455 break;
19456
19457 /* If this is an indexed address, make sure the register class can handle
19458 indexed addresses for this mode. */
19459 case PLUS:
19460 op0 = XEXP (addr, 0);
19461 op1 = XEXP (addr, 1);
19462 if (!base_reg_operand (op0, Pmode))
19463 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19464
19465 else if (int_reg_operand (op1, Pmode))
19466 {
19467 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19468 {
19469 emit_insn (gen_rtx_SET (scratch, addr));
19470 new_addr = scratch;
19471 }
19472 }
19473
19474 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19475 {
19476 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19477 || !quad_address_p (addr, mode, false))
19478 {
19479 emit_insn (gen_rtx_SET (scratch, addr));
19480 new_addr = scratch;
19481 }
19482 }
19483
19484 /* Make sure the register class can handle offset addresses. */
19485 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19486 {
19487 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19488 {
19489 emit_insn (gen_rtx_SET (scratch, addr));
19490 new_addr = scratch;
19491 }
19492 }
19493
19494 else
19495 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19496
19497 break;
19498
19499 case LO_SUM:
19500 op0 = XEXP (addr, 0);
19501 op1 = XEXP (addr, 1);
19502 if (!base_reg_operand (op0, Pmode))
19503 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19504
19505 else if (int_reg_operand (op1, Pmode))
19506 {
19507 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19508 {
19509 emit_insn (gen_rtx_SET (scratch, addr));
19510 new_addr = scratch;
19511 }
19512 }
19513
19514 /* Quad offsets are restricted and can't handle normal addresses. */
19515 else if (mode_supports_dq_form (mode))
19516 {
19517 emit_insn (gen_rtx_SET (scratch, addr));
19518 new_addr = scratch;
19519 }
19520
19521 /* Make sure the register class can handle offset addresses. */
19522 else if (legitimate_lo_sum_address_p (mode, addr, false))
19523 {
19524 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19525 {
19526 emit_insn (gen_rtx_SET (scratch, addr));
19527 new_addr = scratch;
19528 }
19529 }
19530
19531 else
19532 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19533
19534 break;
19535
19536 case SYMBOL_REF:
19537 case CONST:
19538 case LABEL_REF:
19539 rs6000_emit_move (scratch, addr, Pmode);
19540 new_addr = scratch;
19541 break;
19542
19543 default:
19544 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19545 }
19546
19547 /* Adjust the address if it changed. */
19548 if (addr != new_addr)
19549 {
19550 mem = replace_equiv_address_nv (mem, new_addr);
19551 if (TARGET_DEBUG_ADDR)
19552 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19553 }
19554
19555 /* Now create the move. */
19556 if (store_p)
19557 emit_insn (gen_rtx_SET (mem, reg));
19558 else
19559 emit_insn (gen_rtx_SET (reg, mem));
19560
19561 return;
19562 }
19563
19564 /* Convert reloads involving 64-bit gprs and misaligned offset
19565 addressing, or multiple 32-bit gprs and offsets that are too large,
19566 to use indirect addressing. */
19567
19568 void
19569 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19570 {
19571 int regno = true_regnum (reg);
19572 enum reg_class rclass;
19573 rtx addr;
19574 rtx scratch_or_premodify = scratch;
19575
19576 if (TARGET_DEBUG_ADDR)
19577 {
19578 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19579 store_p ? "store" : "load");
19580 fprintf (stderr, "reg:\n");
19581 debug_rtx (reg);
19582 fprintf (stderr, "mem:\n");
19583 debug_rtx (mem);
19584 fprintf (stderr, "scratch:\n");
19585 debug_rtx (scratch);
19586 }
19587
19588 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
19589 gcc_assert (MEM_P (mem));
19590 rclass = REGNO_REG_CLASS (regno);
19591 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19592 addr = XEXP (mem, 0);
19593
19594 if (GET_CODE (addr) == PRE_MODIFY)
19595 {
19596 gcc_assert (REG_P (XEXP (addr, 0))
19597 && GET_CODE (XEXP (addr, 1)) == PLUS
19598 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19599 scratch_or_premodify = XEXP (addr, 0);
19600 addr = XEXP (addr, 1);
19601 }
19602 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19603
19604 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19605
19606 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19607
19608 /* Now create the move. */
19609 if (store_p)
19610 emit_insn (gen_rtx_SET (mem, reg));
19611 else
19612 emit_insn (gen_rtx_SET (reg, mem));
19613
19614 return;
19615 }
19616
19617 /* Given an rtx X being reloaded into a reg required to be
19618 in class CLASS, return the class of reg to actually use.
19619 In general this is just CLASS; but on some machines
19620 in some cases it is preferable to use a more restrictive class.
19621
19622 On the RS/6000, we have to return NO_REGS when we want to reload a
19623 floating-point CONST_DOUBLE to force it to be copied to memory.
19624
19625 We also don't want to reload integer values into floating-point
19626 registers if we can at all help it. In fact, this can
19627 cause reload to die, if it tries to generate a reload of CTR
19628 into a FP register and discovers it doesn't have the memory location
19629 required.
19630
19631 ??? Would it be a good idea to have reload do the converse, that is
19632 try to reload floating modes into FP registers if possible?
19633 */
19634
19635 static enum reg_class
19636 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19637 {
19638 machine_mode mode = GET_MODE (x);
19639 bool is_constant = CONSTANT_P (x);
19640
19641 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19642 reload class for it. */
19643 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19644 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19645 return NO_REGS;
19646
19647 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19648 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19649 return NO_REGS;
19650
19651 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19652 the reloading of address expressions using PLUS into floating point
19653 registers. */
19654 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19655 {
19656 if (is_constant)
19657 {
19658 /* Zero is always allowed in all VSX registers. */
19659 if (x == CONST0_RTX (mode))
19660 return rclass;
19661
19662 /* If this is a vector constant that can be formed with a few Altivec
19663 instructions, we want altivec registers. */
19664 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19665 return ALTIVEC_REGS;
19666
19667 /* If this is an integer constant that can easily be loaded into
19668 vector registers, allow it. */
19669 if (CONST_INT_P (x))
19670 {
19671 HOST_WIDE_INT value = INTVAL (x);
19672
19673 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19674 2.06 can generate it in the Altivec registers with
19675 VSPLTI<x>. */
19676 if (value == -1)
19677 {
19678 if (TARGET_P8_VECTOR)
19679 return rclass;
19680 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19681 return ALTIVEC_REGS;
19682 else
19683 return NO_REGS;
19684 }
19685
19686 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19687 a sign extend in the Altivec registers. */
19688 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19689 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19690 return ALTIVEC_REGS;
19691 }
19692
19693 /* Force constant to memory. */
19694 return NO_REGS;
19695 }
19696
19697 /* D-form addressing can easily reload the value. */
19698 if (mode_supports_vmx_dform (mode)
19699 || mode_supports_dq_form (mode))
19700 return rclass;
19701
19702 /* If this is a scalar floating point value and we don't have D-form
19703 addressing, prefer the traditional floating point registers so that we
19704 can use D-form (register+offset) addressing. */
19705 if (rclass == VSX_REGS
19706 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19707 return FLOAT_REGS;
19708
19709 /* Prefer the Altivec registers if Altivec is handling the vector
19710 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19711 loads. */
19712 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19713 || mode == V1TImode)
19714 return ALTIVEC_REGS;
19715
19716 return rclass;
19717 }
19718
19719 if (is_constant || GET_CODE (x) == PLUS)
19720 {
19721 if (reg_class_subset_p (GENERAL_REGS, rclass))
19722 return GENERAL_REGS;
19723 if (reg_class_subset_p (BASE_REGS, rclass))
19724 return BASE_REGS;
19725 return NO_REGS;
19726 }
19727
19728 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
19729 return GENERAL_REGS;
19730
19731 return rclass;
19732 }
19733
19734 /* Debug version of rs6000_preferred_reload_class. */
19735 static enum reg_class
19736 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19737 {
19738 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19739
19740 fprintf (stderr,
19741 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19742 "mode = %s, x:\n",
19743 reg_class_names[ret], reg_class_names[rclass],
19744 GET_MODE_NAME (GET_MODE (x)));
19745 debug_rtx (x);
19746
19747 return ret;
19748 }
19749
19750 /* If we are copying between FP or AltiVec registers and anything else, we need
19751 a memory location. The exception is when we are targeting ppc64 and the
19752 move to/from fpr to gpr instructions are available. Also, under VSX, you
19753 can copy vector registers from the FP register set to the Altivec register
19754 set and vice versa. */
19755
19756 static bool
19757 rs6000_secondary_memory_needed (machine_mode mode,
19758 reg_class_t from_class,
19759 reg_class_t to_class)
19760 {
19761 enum rs6000_reg_type from_type, to_type;
19762 bool altivec_p = ((from_class == ALTIVEC_REGS)
19763 || (to_class == ALTIVEC_REGS));
19764
19765 /* If a simple/direct move is available, we don't need secondary memory */
19766 from_type = reg_class_to_reg_type[(int)from_class];
19767 to_type = reg_class_to_reg_type[(int)to_class];
19768
19769 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19770 (secondary_reload_info *)0, altivec_p))
19771 return false;
19772
19773 /* If we have a floating point or vector register class, we need to use
19774 memory to transfer the data. */
19775 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19776 return true;
19777
19778 return false;
19779 }
19780
19781 /* Debug version of rs6000_secondary_memory_needed. */
19782 static bool
19783 rs6000_debug_secondary_memory_needed (machine_mode mode,
19784 reg_class_t from_class,
19785 reg_class_t to_class)
19786 {
19787 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19788
19789 fprintf (stderr,
19790 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19791 "to_class = %s, mode = %s\n",
19792 ret ? "true" : "false",
19793 reg_class_names[from_class],
19794 reg_class_names[to_class],
19795 GET_MODE_NAME (mode));
19796
19797 return ret;
19798 }
19799
19800 /* Return the register class of a scratch register needed to copy IN into
19801 or out of a register in RCLASS in MODE. If it can be done directly,
19802 NO_REGS is returned. */
19803
19804 static enum reg_class
19805 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19806 rtx in)
19807 {
19808 int regno;
19809
19810 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19811 #if TARGET_MACHO
19812 && MACHOPIC_INDIRECT
19813 #endif
19814 ))
19815 {
19816 /* We cannot copy a symbolic operand directly into anything
19817 other than BASE_REGS for TARGET_ELF. So indicate that a
19818 register from BASE_REGS is needed as an intermediate
19819 register.
19820
19821 On Darwin, pic addresses require a load from memory, which
19822 needs a base register. */
19823 if (rclass != BASE_REGS
19824 && (SYMBOL_REF_P (in)
19825 || GET_CODE (in) == HIGH
19826 || GET_CODE (in) == LABEL_REF
19827 || GET_CODE (in) == CONST))
19828 return BASE_REGS;
19829 }
19830
19831 if (REG_P (in))
19832 {
19833 regno = REGNO (in);
19834 if (!HARD_REGISTER_NUM_P (regno))
19835 {
19836 regno = true_regnum (in);
19837 if (!HARD_REGISTER_NUM_P (regno))
19838 regno = -1;
19839 }
19840 }
19841 else if (SUBREG_P (in))
19842 {
19843 regno = true_regnum (in);
19844 if (!HARD_REGISTER_NUM_P (regno))
19845 regno = -1;
19846 }
19847 else
19848 regno = -1;
19849
19850 /* If we have VSX register moves, prefer moving scalar values between
19851 Altivec registers and GPR by going via an FPR (and then via memory)
19852 instead of reloading the secondary memory address for Altivec moves. */
19853 if (TARGET_VSX
19854 && GET_MODE_SIZE (mode) < 16
19855 && !mode_supports_vmx_dform (mode)
19856 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19857 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19858 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19859 && (regno >= 0 && INT_REGNO_P (regno)))))
19860 return FLOAT_REGS;
19861
19862 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19863 into anything. */
19864 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19865 || (regno >= 0 && INT_REGNO_P (regno)))
19866 return NO_REGS;
19867
19868 /* Constants, memory, and VSX registers can go into VSX registers (both the
19869 traditional floating point and the altivec registers). */
19870 if (rclass == VSX_REGS
19871 && (regno == -1 || VSX_REGNO_P (regno)))
19872 return NO_REGS;
19873
19874 /* Constants, memory, and FP registers can go into FP registers. */
19875 if ((regno == -1 || FP_REGNO_P (regno))
19876 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
19877 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19878
19879 /* Memory, and AltiVec registers can go into AltiVec registers. */
19880 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19881 && rclass == ALTIVEC_REGS)
19882 return NO_REGS;
19883
19884 /* We can copy among the CR registers. */
19885 if ((rclass == CR_REGS || rclass == CR0_REGS)
19886 && regno >= 0 && CR_REGNO_P (regno))
19887 return NO_REGS;
19888
19889 /* Otherwise, we need GENERAL_REGS. */
19890 return GENERAL_REGS;
19891 }
19892
19893 /* Debug version of rs6000_secondary_reload_class. */
19894 static enum reg_class
19895 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19896 machine_mode mode, rtx in)
19897 {
19898 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19899 fprintf (stderr,
19900 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19901 "mode = %s, input rtx:\n",
19902 reg_class_names[ret], reg_class_names[rclass],
19903 GET_MODE_NAME (mode));
19904 debug_rtx (in);
19905
19906 return ret;
19907 }
19908
19909 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19910
19911 static bool
19912 rs6000_can_change_mode_class (machine_mode from,
19913 machine_mode to,
19914 reg_class_t rclass)
19915 {
19916 unsigned from_size = GET_MODE_SIZE (from);
19917 unsigned to_size = GET_MODE_SIZE (to);
19918
19919 if (from_size != to_size)
19920 {
19921 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19922
19923 if (reg_classes_intersect_p (xclass, rclass))
19924 {
19925 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
19926 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
19927 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19928 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19929
19930 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19931 single register under VSX because the scalar part of the register
19932 is in the upper 64-bits, and not the lower 64-bits. Types like
19933 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19934 IEEE floating point can't overlap, and neither can small
19935 values. */
19936
19937 if (to_float128_vector_p && from_float128_vector_p)
19938 return true;
19939
19940 else if (to_float128_vector_p || from_float128_vector_p)
19941 return false;
19942
19943 /* TDmode in floating-mode registers must always go into a register
19944 pair with the most significant word in the even-numbered register
19945 to match ISA requirements. In little-endian mode, this does not
19946 match subreg numbering, so we cannot allow subregs. */
19947 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19948 return false;
19949
19950 if (from_size < 8 || to_size < 8)
19951 return false;
19952
19953 if (from_size == 8 && (8 * to_nregs) != to_size)
19954 return false;
19955
19956 if (to_size == 8 && (8 * from_nregs) != from_size)
19957 return false;
19958
19959 return true;
19960 }
19961 else
19962 return true;
19963 }
19964
19965 /* Since the VSX register set includes traditional floating point registers
19966 and altivec registers, just check for the size being different instead of
19967 trying to check whether the modes are vector modes. Otherwise it won't
19968 allow say DF and DI to change classes. For types like TFmode and TDmode
19969 that take 2 64-bit registers, rather than a single 128-bit register, don't
19970 allow subregs of those types to other 128 bit types. */
19971 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19972 {
19973 unsigned num_regs = (from_size + 15) / 16;
19974 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
19975 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
19976 return false;
19977
19978 return (from_size == 8 || from_size == 16);
19979 }
19980
19981 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19982 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19983 return false;
19984
19985 return true;
19986 }
19987
19988 /* Debug version of rs6000_can_change_mode_class. */
19989 static bool
19990 rs6000_debug_can_change_mode_class (machine_mode from,
19991 machine_mode to,
19992 reg_class_t rclass)
19993 {
19994 bool ret = rs6000_can_change_mode_class (from, to, rclass);
19995
19996 fprintf (stderr,
19997 "rs6000_can_change_mode_class, return %s, from = %s, "
19998 "to = %s, rclass = %s\n",
19999 ret ? "true" : "false",
20000 GET_MODE_NAME (from), GET_MODE_NAME (to),
20001 reg_class_names[rclass]);
20002
20003 return ret;
20004 }
20005 \f
20006 /* Return a string to do a move operation of 128 bits of data. */
20007
20008 const char *
20009 rs6000_output_move_128bit (rtx operands[])
20010 {
20011 rtx dest = operands[0];
20012 rtx src = operands[1];
20013 machine_mode mode = GET_MODE (dest);
20014 int dest_regno;
20015 int src_regno;
20016 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20017 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20018
20019 if (REG_P (dest))
20020 {
20021 dest_regno = REGNO (dest);
20022 dest_gpr_p = INT_REGNO_P (dest_regno);
20023 dest_fp_p = FP_REGNO_P (dest_regno);
20024 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20025 dest_vsx_p = dest_fp_p | dest_vmx_p;
20026 }
20027 else
20028 {
20029 dest_regno = -1;
20030 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20031 }
20032
20033 if (REG_P (src))
20034 {
20035 src_regno = REGNO (src);
20036 src_gpr_p = INT_REGNO_P (src_regno);
20037 src_fp_p = FP_REGNO_P (src_regno);
20038 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20039 src_vsx_p = src_fp_p | src_vmx_p;
20040 }
20041 else
20042 {
20043 src_regno = -1;
20044 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20045 }
20046
20047 /* Register moves. */
20048 if (dest_regno >= 0 && src_regno >= 0)
20049 {
20050 if (dest_gpr_p)
20051 {
20052 if (src_gpr_p)
20053 return "#";
20054
20055 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20056 return (WORDS_BIG_ENDIAN
20057 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20058 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20059
20060 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20061 return "#";
20062 }
20063
20064 else if (TARGET_VSX && dest_vsx_p)
20065 {
20066 if (src_vsx_p)
20067 return "xxlor %x0,%x1,%x1";
20068
20069 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20070 return (WORDS_BIG_ENDIAN
20071 ? "mtvsrdd %x0,%1,%L1"
20072 : "mtvsrdd %x0,%L1,%1");
20073
20074 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20075 return "#";
20076 }
20077
20078 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20079 return "vor %0,%1,%1";
20080
20081 else if (dest_fp_p && src_fp_p)
20082 return "#";
20083 }
20084
20085 /* Loads. */
20086 else if (dest_regno >= 0 && MEM_P (src))
20087 {
20088 if (dest_gpr_p)
20089 {
20090 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20091 return "lq %0,%1";
20092 else
20093 return "#";
20094 }
20095
20096 else if (TARGET_ALTIVEC && dest_vmx_p
20097 && altivec_indexed_or_indirect_operand (src, mode))
20098 return "lvx %0,%y1";
20099
20100 else if (TARGET_VSX && dest_vsx_p)
20101 {
20102 if (mode_supports_dq_form (mode)
20103 && quad_address_p (XEXP (src, 0), mode, true))
20104 return "lxv %x0,%1";
20105
20106 else if (TARGET_P9_VECTOR)
20107 return "lxvx %x0,%y1";
20108
20109 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20110 return "lxvw4x %x0,%y1";
20111
20112 else
20113 return "lxvd2x %x0,%y1";
20114 }
20115
20116 else if (TARGET_ALTIVEC && dest_vmx_p)
20117 return "lvx %0,%y1";
20118
20119 else if (dest_fp_p)
20120 return "#";
20121 }
20122
20123 /* Stores. */
20124 else if (src_regno >= 0 && MEM_P (dest))
20125 {
20126 if (src_gpr_p)
20127 {
20128 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20129 return "stq %1,%0";
20130 else
20131 return "#";
20132 }
20133
20134 else if (TARGET_ALTIVEC && src_vmx_p
20135 && altivec_indexed_or_indirect_operand (dest, mode))
20136 return "stvx %1,%y0";
20137
20138 else if (TARGET_VSX && src_vsx_p)
20139 {
20140 if (mode_supports_dq_form (mode)
20141 && quad_address_p (XEXP (dest, 0), mode, true))
20142 return "stxv %x1,%0";
20143
20144 else if (TARGET_P9_VECTOR)
20145 return "stxvx %x1,%y0";
20146
20147 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20148 return "stxvw4x %x1,%y0";
20149
20150 else
20151 return "stxvd2x %x1,%y0";
20152 }
20153
20154 else if (TARGET_ALTIVEC && src_vmx_p)
20155 return "stvx %1,%y0";
20156
20157 else if (src_fp_p)
20158 return "#";
20159 }
20160
20161 /* Constants. */
20162 else if (dest_regno >= 0
20163 && (CONST_INT_P (src)
20164 || CONST_WIDE_INT_P (src)
20165 || CONST_DOUBLE_P (src)
20166 || GET_CODE (src) == CONST_VECTOR))
20167 {
20168 if (dest_gpr_p)
20169 return "#";
20170
20171 else if ((dest_vmx_p && TARGET_ALTIVEC)
20172 || (dest_vsx_p && TARGET_VSX))
20173 return output_vec_const_move (operands);
20174 }
20175
20176 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20177 }
20178
20179 /* Validate a 128-bit move. */
20180 bool
20181 rs6000_move_128bit_ok_p (rtx operands[])
20182 {
20183 machine_mode mode = GET_MODE (operands[0]);
20184 return (gpc_reg_operand (operands[0], mode)
20185 || gpc_reg_operand (operands[1], mode));
20186 }
20187
20188 /* Return true if a 128-bit move needs to be split. */
20189 bool
20190 rs6000_split_128bit_ok_p (rtx operands[])
20191 {
20192 if (!reload_completed)
20193 return false;
20194
20195 if (!gpr_or_gpr_p (operands[0], operands[1]))
20196 return false;
20197
20198 if (quad_load_store_p (operands[0], operands[1]))
20199 return false;
20200
20201 return true;
20202 }
20203
20204 \f
20205 /* Given a comparison operation, return the bit number in CCR to test. We
20206 know this is a valid comparison.
20207
20208 SCC_P is 1 if this is for an scc. That means that %D will have been
20209 used instead of %C, so the bits will be in different places.
20210
20211 Return -1 if OP isn't a valid comparison for some reason. */
20212
20213 int
20214 ccr_bit (rtx op, int scc_p)
20215 {
20216 enum rtx_code code = GET_CODE (op);
20217 machine_mode cc_mode;
20218 int cc_regnum;
20219 int base_bit;
20220 rtx reg;
20221
20222 if (!COMPARISON_P (op))
20223 return -1;
20224
20225 reg = XEXP (op, 0);
20226
20227 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
20228 return -1;
20229
20230 cc_mode = GET_MODE (reg);
20231 cc_regnum = REGNO (reg);
20232 base_bit = 4 * (cc_regnum - CR0_REGNO);
20233
20234 validate_condition_mode (code, cc_mode);
20235
20236 /* When generating a sCOND operation, only positive conditions are
20237 allowed. */
20238 if (scc_p)
20239 switch (code)
20240 {
20241 case EQ:
20242 case GT:
20243 case LT:
20244 case UNORDERED:
20245 case GTU:
20246 case LTU:
20247 break;
20248 default:
20249 return -1;
20250 }
20251
20252 switch (code)
20253 {
20254 case NE:
20255 return scc_p ? base_bit + 3 : base_bit + 2;
20256 case EQ:
20257 return base_bit + 2;
20258 case GT: case GTU: case UNLE:
20259 return base_bit + 1;
20260 case LT: case LTU: case UNGE:
20261 return base_bit;
20262 case ORDERED: case UNORDERED:
20263 return base_bit + 3;
20264
20265 case GE: case GEU:
20266 /* If scc, we will have done a cror to put the bit in the
20267 unordered position. So test that bit. For integer, this is ! LT
20268 unless this is an scc insn. */
20269 return scc_p ? base_bit + 3 : base_bit;
20270
20271 case LE: case LEU:
20272 return scc_p ? base_bit + 3 : base_bit + 1;
20273
20274 default:
20275 return -1;
20276 }
20277 }
20278 \f
20279 /* Return the GOT register. */
20280
20281 rtx
20282 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20283 {
20284 /* The second flow pass currently (June 1999) can't update
20285 regs_ever_live without disturbing other parts of the compiler, so
20286 update it here to make the prolog/epilogue code happy. */
20287 if (!can_create_pseudo_p ()
20288 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20289 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20290
20291 crtl->uses_pic_offset_table = 1;
20292
20293 return pic_offset_table_rtx;
20294 }
20295 \f
20296 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
20297
20298 /* Write out a function code label. */
20299
20300 void
20301 rs6000_output_function_entry (FILE *file, const char *fname)
20302 {
20303 if (fname[0] != '.')
20304 {
20305 switch (DEFAULT_ABI)
20306 {
20307 default:
20308 gcc_unreachable ();
20309
20310 case ABI_AIX:
20311 if (DOT_SYMBOLS)
20312 putc ('.', file);
20313 else
20314 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20315 break;
20316
20317 case ABI_ELFv2:
20318 case ABI_V4:
20319 case ABI_DARWIN:
20320 break;
20321 }
20322 }
20323
20324 RS6000_OUTPUT_BASENAME (file, fname);
20325 }
20326
20327 /* Print an operand. Recognize special options, documented below. */
20328
20329 #if TARGET_ELF
20330 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
20331 only introduced by the linker, when applying the sda21
20332 relocation. */
20333 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20334 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20335 #else
20336 #define SMALL_DATA_RELOC "sda21"
20337 #define SMALL_DATA_REG 0
20338 #endif
20339
20340 void
20341 print_operand (FILE *file, rtx x, int code)
20342 {
20343 int i;
20344 unsigned HOST_WIDE_INT uval;
20345
20346 switch (code)
20347 {
20348 /* %a is output_address. */
20349
20350 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20351 output_operand. */
20352
20353 case 'D':
20354 /* Like 'J' but get to the GT bit only. */
20355 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20356 {
20357 output_operand_lossage ("invalid %%D value");
20358 return;
20359 }
20360
20361 /* Bit 1 is GT bit. */
20362 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20363
20364 /* Add one for shift count in rlinm for scc. */
20365 fprintf (file, "%d", i + 1);
20366 return;
20367
20368 case 'e':
20369 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20370 if (! INT_P (x))
20371 {
20372 output_operand_lossage ("invalid %%e value");
20373 return;
20374 }
20375
20376 uval = INTVAL (x);
20377 if ((uval & 0xffff) == 0 && uval != 0)
20378 putc ('s', file);
20379 return;
20380
20381 case 'E':
20382 /* X is a CR register. Print the number of the EQ bit of the CR */
20383 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20384 output_operand_lossage ("invalid %%E value");
20385 else
20386 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20387 return;
20388
20389 case 'f':
20390 /* X is a CR register. Print the shift count needed to move it
20391 to the high-order four bits. */
20392 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20393 output_operand_lossage ("invalid %%f value");
20394 else
20395 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20396 return;
20397
20398 case 'F':
20399 /* Similar, but print the count for the rotate in the opposite
20400 direction. */
20401 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20402 output_operand_lossage ("invalid %%F value");
20403 else
20404 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20405 return;
20406
20407 case 'G':
20408 /* X is a constant integer. If it is negative, print "m",
20409 otherwise print "z". This is to make an aze or ame insn. */
20410 if (!CONST_INT_P (x))
20411 output_operand_lossage ("invalid %%G value");
20412 else if (INTVAL (x) >= 0)
20413 putc ('z', file);
20414 else
20415 putc ('m', file);
20416 return;
20417
20418 case 'h':
20419 /* If constant, output low-order five bits. Otherwise, write
20420 normally. */
20421 if (INT_P (x))
20422 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20423 else
20424 print_operand (file, x, 0);
20425 return;
20426
20427 case 'H':
20428 /* If constant, output low-order six bits. Otherwise, write
20429 normally. */
20430 if (INT_P (x))
20431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20432 else
20433 print_operand (file, x, 0);
20434 return;
20435
20436 case 'I':
20437 /* Print `i' if this is a constant, else nothing. */
20438 if (INT_P (x))
20439 putc ('i', file);
20440 return;
20441
20442 case 'j':
20443 /* Write the bit number in CCR for jump. */
20444 i = ccr_bit (x, 0);
20445 if (i == -1)
20446 output_operand_lossage ("invalid %%j code");
20447 else
20448 fprintf (file, "%d", i);
20449 return;
20450
20451 case 'J':
20452 /* Similar, but add one for shift count in rlinm for scc and pass
20453 scc flag to `ccr_bit'. */
20454 i = ccr_bit (x, 1);
20455 if (i == -1)
20456 output_operand_lossage ("invalid %%J code");
20457 else
20458 /* If we want bit 31, write a shift count of zero, not 32. */
20459 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20460 return;
20461
20462 case 'k':
20463 /* X must be a constant. Write the 1's complement of the
20464 constant. */
20465 if (! INT_P (x))
20466 output_operand_lossage ("invalid %%k value");
20467 else
20468 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20469 return;
20470
20471 case 'K':
20472 /* X must be a symbolic constant on ELF. Write an
20473 expression suitable for an 'addi' that adds in the low 16
20474 bits of the MEM. */
20475 if (GET_CODE (x) == CONST)
20476 {
20477 if (GET_CODE (XEXP (x, 0)) != PLUS
20478 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
20479 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20480 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20481 output_operand_lossage ("invalid %%K value");
20482 }
20483 print_operand_address (file, x);
20484 fputs ("@l", file);
20485 return;
20486
20487 /* %l is output_asm_label. */
20488
20489 case 'L':
20490 /* Write second word of DImode or DFmode reference. Works on register
20491 or non-indexed memory only. */
20492 if (REG_P (x))
20493 fputs (reg_names[REGNO (x) + 1], file);
20494 else if (MEM_P (x))
20495 {
20496 machine_mode mode = GET_MODE (x);
20497 /* Handle possible auto-increment. Since it is pre-increment and
20498 we have already done it, we can just use an offset of word. */
20499 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20500 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20501 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20502 UNITS_PER_WORD));
20503 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20504 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20505 UNITS_PER_WORD));
20506 else
20507 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20508 UNITS_PER_WORD),
20509 0));
20510
20511 if (small_data_operand (x, GET_MODE (x)))
20512 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20513 reg_names[SMALL_DATA_REG]);
20514 }
20515 return;
20516
20517 case 'N': /* Unused */
20518 /* Write the number of elements in the vector times 4. */
20519 if (GET_CODE (x) != PARALLEL)
20520 output_operand_lossage ("invalid %%N value");
20521 else
20522 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20523 return;
20524
20525 case 'O': /* Unused */
20526 /* Similar, but subtract 1 first. */
20527 if (GET_CODE (x) != PARALLEL)
20528 output_operand_lossage ("invalid %%O value");
20529 else
20530 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20531 return;
20532
20533 case 'p':
20534 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20535 if (! INT_P (x)
20536 || INTVAL (x) < 0
20537 || (i = exact_log2 (INTVAL (x))) < 0)
20538 output_operand_lossage ("invalid %%p value");
20539 else
20540 fprintf (file, "%d", i);
20541 return;
20542
20543 case 'P':
20544 /* The operand must be an indirect memory reference. The result
20545 is the register name. */
20546 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
20547 || REGNO (XEXP (x, 0)) >= 32)
20548 output_operand_lossage ("invalid %%P value");
20549 else
20550 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20551 return;
20552
20553 case 'q':
20554 /* This outputs the logical code corresponding to a boolean
20555 expression. The expression may have one or both operands
20556 negated (if one, only the first one). For condition register
20557 logical operations, it will also treat the negated
20558 CR codes as NOTs, but not handle NOTs of them. */
20559 {
20560 const char *const *t = 0;
20561 const char *s;
20562 enum rtx_code code = GET_CODE (x);
20563 static const char * const tbl[3][3] = {
20564 { "and", "andc", "nor" },
20565 { "or", "orc", "nand" },
20566 { "xor", "eqv", "xor" } };
20567
20568 if (code == AND)
20569 t = tbl[0];
20570 else if (code == IOR)
20571 t = tbl[1];
20572 else if (code == XOR)
20573 t = tbl[2];
20574 else
20575 output_operand_lossage ("invalid %%q value");
20576
20577 if (GET_CODE (XEXP (x, 0)) != NOT)
20578 s = t[0];
20579 else
20580 {
20581 if (GET_CODE (XEXP (x, 1)) == NOT)
20582 s = t[2];
20583 else
20584 s = t[1];
20585 }
20586
20587 fputs (s, file);
20588 }
20589 return;
20590
20591 case 'Q':
20592 if (! TARGET_MFCRF)
20593 return;
20594 fputc (',', file);
20595 /* FALLTHRU */
20596
20597 case 'R':
20598 /* X is a CR register. Print the mask for `mtcrf'. */
20599 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20600 output_operand_lossage ("invalid %%R value");
20601 else
20602 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20603 return;
20604
20605 case 's':
20606 /* Low 5 bits of 32 - value */
20607 if (! INT_P (x))
20608 output_operand_lossage ("invalid %%s value");
20609 else
20610 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20611 return;
20612
20613 case 't':
20614 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20615 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
20616 {
20617 output_operand_lossage ("invalid %%t value");
20618 return;
20619 }
20620
20621 /* Bit 3 is OV bit. */
20622 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20623
20624 /* If we want bit 31, write a shift count of zero, not 32. */
20625 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20626 return;
20627
20628 case 'T':
20629 /* Print the symbolic name of a branch target register. */
20630 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20631 x = XVECEXP (x, 0, 0);
20632 if (!REG_P (x) || (REGNO (x) != LR_REGNO
20633 && REGNO (x) != CTR_REGNO))
20634 output_operand_lossage ("invalid %%T value");
20635 else if (REGNO (x) == LR_REGNO)
20636 fputs ("lr", file);
20637 else
20638 fputs ("ctr", file);
20639 return;
20640
20641 case 'u':
20642 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20643 for use in unsigned operand. */
20644 if (! INT_P (x))
20645 {
20646 output_operand_lossage ("invalid %%u value");
20647 return;
20648 }
20649
20650 uval = INTVAL (x);
20651 if ((uval & 0xffff) == 0)
20652 uval >>= 16;
20653
20654 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20655 return;
20656
20657 case 'v':
20658 /* High-order 16 bits of constant for use in signed operand. */
20659 if (! INT_P (x))
20660 output_operand_lossage ("invalid %%v value");
20661 else
20662 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20663 (INTVAL (x) >> 16) & 0xffff);
20664 return;
20665
20666 case 'U':
20667 /* Print `u' if this has an auto-increment or auto-decrement. */
20668 if (MEM_P (x)
20669 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20670 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20671 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20672 putc ('u', file);
20673 return;
20674
20675 case 'V':
20676 /* Print the trap code for this operand. */
20677 switch (GET_CODE (x))
20678 {
20679 case EQ:
20680 fputs ("eq", file); /* 4 */
20681 break;
20682 case NE:
20683 fputs ("ne", file); /* 24 */
20684 break;
20685 case LT:
20686 fputs ("lt", file); /* 16 */
20687 break;
20688 case LE:
20689 fputs ("le", file); /* 20 */
20690 break;
20691 case GT:
20692 fputs ("gt", file); /* 8 */
20693 break;
20694 case GE:
20695 fputs ("ge", file); /* 12 */
20696 break;
20697 case LTU:
20698 fputs ("llt", file); /* 2 */
20699 break;
20700 case LEU:
20701 fputs ("lle", file); /* 6 */
20702 break;
20703 case GTU:
20704 fputs ("lgt", file); /* 1 */
20705 break;
20706 case GEU:
20707 fputs ("lge", file); /* 5 */
20708 break;
20709 default:
20710 output_operand_lossage ("invalid %%V value");
20711 }
20712 break;
20713
20714 case 'w':
20715 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20716 normally. */
20717 if (INT_P (x))
20718 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20719 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20720 else
20721 print_operand (file, x, 0);
20722 return;
20723
20724 case 'x':
20725 /* X is a FPR or Altivec register used in a VSX context. */
20726 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
20727 output_operand_lossage ("invalid %%x value");
20728 else
20729 {
20730 int reg = REGNO (x);
20731 int vsx_reg = (FP_REGNO_P (reg)
20732 ? reg - 32
20733 : reg - FIRST_ALTIVEC_REGNO + 32);
20734
20735 #ifdef TARGET_REGNAMES
20736 if (TARGET_REGNAMES)
20737 fprintf (file, "%%vs%d", vsx_reg);
20738 else
20739 #endif
20740 fprintf (file, "%d", vsx_reg);
20741 }
20742 return;
20743
20744 case 'X':
20745 if (MEM_P (x)
20746 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20747 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20748 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20749 putc ('x', file);
20750 return;
20751
20752 case 'Y':
20753 /* Like 'L', for third word of TImode/PTImode */
20754 if (REG_P (x))
20755 fputs (reg_names[REGNO (x) + 2], file);
20756 else if (MEM_P (x))
20757 {
20758 machine_mode mode = GET_MODE (x);
20759 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20760 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20761 output_address (mode, plus_constant (Pmode,
20762 XEXP (XEXP (x, 0), 0), 8));
20763 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20764 output_address (mode, plus_constant (Pmode,
20765 XEXP (XEXP (x, 0), 0), 8));
20766 else
20767 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20768 if (small_data_operand (x, GET_MODE (x)))
20769 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20770 reg_names[SMALL_DATA_REG]);
20771 }
20772 return;
20773
20774 case 'z':
20775 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20776 x = XVECEXP (x, 0, 1);
20777 /* X is a SYMBOL_REF. Write out the name preceded by a
20778 period and without any trailing data in brackets. Used for function
20779 names. If we are configured for System V (or the embedded ABI) on
20780 the PowerPC, do not emit the period, since those systems do not use
20781 TOCs and the like. */
20782 if (!SYMBOL_REF_P (x))
20783 {
20784 output_operand_lossage ("invalid %%z value");
20785 return;
20786 }
20787
20788 /* For macho, check to see if we need a stub. */
20789 if (TARGET_MACHO)
20790 {
20791 const char *name = XSTR (x, 0);
20792 #if TARGET_MACHO
20793 if (darwin_picsymbol_stubs
20794 && MACHOPIC_INDIRECT
20795 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20796 name = machopic_indirection_name (x, /*stub_p=*/true);
20797 #endif
20798 assemble_name (file, name);
20799 }
20800 else if (!DOT_SYMBOLS)
20801 assemble_name (file, XSTR (x, 0));
20802 else
20803 rs6000_output_function_entry (file, XSTR (x, 0));
20804 return;
20805
20806 case 'Z':
20807 /* Like 'L', for last word of TImode/PTImode. */
20808 if (REG_P (x))
20809 fputs (reg_names[REGNO (x) + 3], file);
20810 else if (MEM_P (x))
20811 {
20812 machine_mode mode = GET_MODE (x);
20813 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20814 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20815 output_address (mode, plus_constant (Pmode,
20816 XEXP (XEXP (x, 0), 0), 12));
20817 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20818 output_address (mode, plus_constant (Pmode,
20819 XEXP (XEXP (x, 0), 0), 12));
20820 else
20821 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20822 if (small_data_operand (x, GET_MODE (x)))
20823 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20824 reg_names[SMALL_DATA_REG]);
20825 }
20826 return;
20827
20828 /* Print AltiVec memory operand. */
20829 case 'y':
20830 {
20831 rtx tmp;
20832
20833 gcc_assert (MEM_P (x));
20834
20835 tmp = XEXP (x, 0);
20836
20837 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20838 && GET_CODE (tmp) == AND
20839 && CONST_INT_P (XEXP (tmp, 1))
20840 && INTVAL (XEXP (tmp, 1)) == -16)
20841 tmp = XEXP (tmp, 0);
20842 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20843 && GET_CODE (tmp) == PRE_MODIFY)
20844 tmp = XEXP (tmp, 1);
20845 if (REG_P (tmp))
20846 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20847 else
20848 {
20849 if (GET_CODE (tmp) != PLUS
20850 || !REG_P (XEXP (tmp, 0))
20851 || !REG_P (XEXP (tmp, 1)))
20852 {
20853 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20854 break;
20855 }
20856
20857 if (REGNO (XEXP (tmp, 0)) == 0)
20858 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20859 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20860 else
20861 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20862 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20863 }
20864 break;
20865 }
20866
20867 case 0:
20868 if (REG_P (x))
20869 fprintf (file, "%s", reg_names[REGNO (x)]);
20870 else if (MEM_P (x))
20871 {
20872 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20873 know the width from the mode. */
20874 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20875 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20876 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20877 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20878 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20879 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20880 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20881 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20882 else
20883 output_address (GET_MODE (x), XEXP (x, 0));
20884 }
20885 else if (toc_relative_expr_p (x, false,
20886 &tocrel_base_oac, &tocrel_offset_oac))
20887 /* This hack along with a corresponding hack in
20888 rs6000_output_addr_const_extra arranges to output addends
20889 where the assembler expects to find them. eg.
20890 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20891 without this hack would be output as "x@toc+4". We
20892 want "x+4@toc". */
20893 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20894 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
20895 output_addr_const (file, XVECEXP (x, 0, 0));
20896 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
20897 output_addr_const (file, XVECEXP (x, 0, 1));
20898 else
20899 output_addr_const (file, x);
20900 return;
20901
20902 case '&':
20903 if (const char *name = get_some_local_dynamic_name ())
20904 assemble_name (file, name);
20905 else
20906 output_operand_lossage ("'%%&' used without any "
20907 "local dynamic TLS references");
20908 return;
20909
20910 default:
20911 output_operand_lossage ("invalid %%xn code");
20912 }
20913 }
20914 \f
20915 /* Print the address of an operand. */
20916
20917 void
20918 print_operand_address (FILE *file, rtx x)
20919 {
20920 if (REG_P (x))
20921 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20922
20923 /* Is it a pc-relative address? */
20924 else if (pcrel_address (x, Pmode))
20925 {
20926 HOST_WIDE_INT offset;
20927
20928 if (GET_CODE (x) == CONST)
20929 x = XEXP (x, 0);
20930
20931 if (GET_CODE (x) == PLUS)
20932 {
20933 offset = INTVAL (XEXP (x, 1));
20934 x = XEXP (x, 0);
20935 }
20936 else
20937 offset = 0;
20938
20939 output_addr_const (file, x);
20940
20941 if (offset)
20942 fprintf (file, "%+" PRId64, offset);
20943
20944 fputs ("@pcrel", file);
20945 }
20946 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
20947 || GET_CODE (x) == LABEL_REF)
20948 {
20949 output_addr_const (file, x);
20950 if (small_data_operand (x, GET_MODE (x)))
20951 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20952 reg_names[SMALL_DATA_REG]);
20953 else
20954 gcc_assert (!TARGET_TOC);
20955 }
20956 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20957 && REG_P (XEXP (x, 1)))
20958 {
20959 if (REGNO (XEXP (x, 0)) == 0)
20960 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20961 reg_names[ REGNO (XEXP (x, 0)) ]);
20962 else
20963 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20964 reg_names[ REGNO (XEXP (x, 1)) ]);
20965 }
20966 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20967 && CONST_INT_P (XEXP (x, 1)))
20968 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20969 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20970 #if TARGET_MACHO
20971 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20972 && CONSTANT_P (XEXP (x, 1)))
20973 {
20974 fprintf (file, "lo16(");
20975 output_addr_const (file, XEXP (x, 1));
20976 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20977 }
20978 #endif
20979 #if TARGET_ELF
20980 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20981 && CONSTANT_P (XEXP (x, 1)))
20982 {
20983 output_addr_const (file, XEXP (x, 1));
20984 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20985 }
20986 #endif
20987 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
20988 {
20989 /* This hack along with a corresponding hack in
20990 rs6000_output_addr_const_extra arranges to output addends
20991 where the assembler expects to find them. eg.
20992 (lo_sum (reg 9)
20993 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20994 without this hack would be output as "x@toc+8@l(9)". We
20995 want "x+8@toc@l(9)". */
20996 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
20997 if (GET_CODE (x) == LO_SUM)
20998 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20999 else
21000 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21001 }
21002 else
21003 output_addr_const (file, x);
21004 }
21005 \f
21006 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21007
21008 static bool
21009 rs6000_output_addr_const_extra (FILE *file, rtx x)
21010 {
21011 if (GET_CODE (x) == UNSPEC)
21012 switch (XINT (x, 1))
21013 {
21014 case UNSPEC_TOCREL:
21015 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
21016 && REG_P (XVECEXP (x, 0, 1))
21017 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21018 output_addr_const (file, XVECEXP (x, 0, 0));
21019 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21020 {
21021 if (INTVAL (tocrel_offset_oac) >= 0)
21022 fprintf (file, "+");
21023 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21024 }
21025 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21026 {
21027 putc ('-', file);
21028 assemble_name (file, toc_label_name);
21029 need_toc_init = 1;
21030 }
21031 else if (TARGET_ELF)
21032 fputs ("@toc", file);
21033 return true;
21034
21035 #if TARGET_MACHO
21036 case UNSPEC_MACHOPIC_OFFSET:
21037 output_addr_const (file, XVECEXP (x, 0, 0));
21038 putc ('-', file);
21039 machopic_output_function_base_name (file);
21040 return true;
21041 #endif
21042 }
21043 return false;
21044 }
21045 \f
21046 /* Target hook for assembling integer objects. The PowerPC version has
21047 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21048 is defined. It also needs to handle DI-mode objects on 64-bit
21049 targets. */
21050
21051 static bool
21052 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21053 {
21054 #ifdef RELOCATABLE_NEEDS_FIXUP
21055 /* Special handling for SI values. */
21056 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21057 {
21058 static int recurse = 0;
21059
21060 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21061 the .fixup section. Since the TOC section is already relocated, we
21062 don't need to mark it here. We used to skip the text section, but it
21063 should never be valid for relocated addresses to be placed in the text
21064 section. */
21065 if (DEFAULT_ABI == ABI_V4
21066 && (TARGET_RELOCATABLE || flag_pic > 1)
21067 && in_section != toc_section
21068 && !recurse
21069 && !CONST_SCALAR_INT_P (x)
21070 && CONSTANT_P (x))
21071 {
21072 char buf[256];
21073
21074 recurse = 1;
21075 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21076 fixuplabelno++;
21077 ASM_OUTPUT_LABEL (asm_out_file, buf);
21078 fprintf (asm_out_file, "\t.long\t(");
21079 output_addr_const (asm_out_file, x);
21080 fprintf (asm_out_file, ")@fixup\n");
21081 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21082 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21083 fprintf (asm_out_file, "\t.long\t");
21084 assemble_name (asm_out_file, buf);
21085 fprintf (asm_out_file, "\n\t.previous\n");
21086 recurse = 0;
21087 return true;
21088 }
21089 /* Remove initial .'s to turn a -mcall-aixdesc function
21090 address into the address of the descriptor, not the function
21091 itself. */
21092 else if (SYMBOL_REF_P (x)
21093 && XSTR (x, 0)[0] == '.'
21094 && DEFAULT_ABI == ABI_AIX)
21095 {
21096 const char *name = XSTR (x, 0);
21097 while (*name == '.')
21098 name++;
21099
21100 fprintf (asm_out_file, "\t.long\t%s\n", name);
21101 return true;
21102 }
21103 }
21104 #endif /* RELOCATABLE_NEEDS_FIXUP */
21105 return default_assemble_integer (x, size, aligned_p);
21106 }
21107
21108 /* Return a template string for assembly to emit when making an
21109 external call. FUNOP is the call mem argument operand number. */
21110
21111 static const char *
21112 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
21113 {
21114 /* -Wformat-overflow workaround, without which gcc thinks that %u
21115 might produce 10 digits. */
21116 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21117
21118 char arg[12];
21119 arg[0] = 0;
21120 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21121 {
21122 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21123 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
21124 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21125 sprintf (arg, "(%%&@tlsld)");
21126 else
21127 gcc_unreachable ();
21128 }
21129
21130 /* The magic 32768 offset here corresponds to the offset of
21131 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
21132 char z[11];
21133 sprintf (z, "%%z%u%s", funop,
21134 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
21135 ? "+32768" : ""));
21136
21137 static char str[32]; /* 1 spare */
21138 if (rs6000_pcrel_p (cfun))
21139 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
21140 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21141 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21142 sibcall ? "" : "\n\tnop");
21143 else if (DEFAULT_ABI == ABI_V4)
21144 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
21145 flag_pic ? "@plt" : "");
21146 #if TARGET_MACHO
21147 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
21148 else if (DEFAULT_ABI == ABI_DARWIN)
21149 {
21150 /* The cookie is in operand func+2. */
21151 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
21152 int cookie = INTVAL (operands[funop + 2]);
21153 if (cookie & CALL_LONG)
21154 {
21155 tree funname = get_identifier (XSTR (operands[funop], 0));
21156 tree labelname = get_prev_label (funname);
21157 gcc_checking_assert (labelname && !sibcall);
21158
21159 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
21160 instruction will reach 'foo', otherwise link as 'bl L42'".
21161 "L42" should be a 'branch island', that will do a far jump to
21162 'foo'. Branch islands are generated in
21163 macho_branch_islands(). */
21164 sprintf (str, "jbsr %%z%u,%.10s", funop,
21165 IDENTIFIER_POINTER (labelname));
21166 }
21167 else
21168 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
21169 after the call. */
21170 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
21171 }
21172 #endif
21173 else
21174 gcc_unreachable ();
21175 return str;
21176 }
21177
21178 const char *
21179 rs6000_call_template (rtx *operands, unsigned int funop)
21180 {
21181 return rs6000_call_template_1 (operands, funop, false);
21182 }
21183
21184 const char *
21185 rs6000_sibcall_template (rtx *operands, unsigned int funop)
21186 {
21187 return rs6000_call_template_1 (operands, funop, true);
21188 }
21189
21190 /* As above, for indirect calls. */
21191
21192 static const char *
21193 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
21194 bool sibcall)
21195 {
21196 /* -Wformat-overflow workaround, without which gcc thinks that %u
21197 might produce 10 digits. Note that -Wformat-overflow will not
21198 currently warn here for str[], so do not rely on a warning to
21199 ensure str[] is correctly sized. */
21200 gcc_assert (funop <= MAX_RECOG_OPERANDS);
21201
21202 /* Currently, funop is either 0 or 1. The maximum string is always
21203 a !speculate 64-bit __tls_get_addr call.
21204
21205 ABI_ELFv2, pcrel:
21206 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21207 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
21208 . 9 crset 2\n\t
21209 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21210 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
21211 . 8 beq%T1l-
21212 .---
21213 .142
21214
21215 ABI_AIX:
21216 . 9 ld 2,%3\n\t
21217 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21218 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21219 . 9 crset 2\n\t
21220 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21221 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21222 . 10 beq%T1l-\n\t
21223 . 10 ld 2,%4(1)
21224 .---
21225 .151
21226
21227 ABI_ELFv2:
21228 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21229 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
21230 . 9 crset 2\n\t
21231 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21232 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
21233 . 10 beq%T1l-\n\t
21234 . 10 ld 2,%3(1)
21235 .---
21236 .142
21237
21238 ABI_V4:
21239 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21240 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
21241 . 9 crset 2\n\t
21242 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
21243 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
21244 . 8 beq%T1l-
21245 .---
21246 .141 */
21247 static char str[160]; /* 8 spare */
21248 char *s = str;
21249 const char *ptrload = TARGET_64BIT ? "d" : "wz";
21250
21251 if (DEFAULT_ABI == ABI_AIX)
21252 s += sprintf (s,
21253 "l%s 2,%%%u\n\t",
21254 ptrload, funop + 2);
21255
21256 /* We don't need the extra code to stop indirect call speculation if
21257 calling via LR. */
21258 bool speculate = (TARGET_MACHO
21259 || rs6000_speculate_indirect_jumps
21260 || (REG_P (operands[funop])
21261 && REGNO (operands[funop]) == LR_REGNO));
21262
21263 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
21264 {
21265 const char *rel64 = TARGET_64BIT ? "64" : "";
21266 char tls[29];
21267 tls[0] = 0;
21268 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
21269 {
21270 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
21271 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
21272 rel64, funop + 1);
21273 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
21274 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
21275 rel64);
21276 else
21277 gcc_unreachable ();
21278 }
21279
21280 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
21281 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21282 && flag_pic == 2 ? "+32768" : "");
21283 if (!speculate)
21284 {
21285 s += sprintf (s,
21286 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
21287 tls, rel64, notoc, funop, addend);
21288 s += sprintf (s, "crset 2\n\t");
21289 }
21290 s += sprintf (s,
21291 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
21292 tls, rel64, notoc, funop, addend);
21293 }
21294 else if (!speculate)
21295 s += sprintf (s, "crset 2\n\t");
21296
21297 if (rs6000_pcrel_p (cfun))
21298 {
21299 if (speculate)
21300 sprintf (s, "b%%T%ul", funop);
21301 else
21302 sprintf (s, "beq%%T%ul-", funop);
21303 }
21304 else if (DEFAULT_ABI == ABI_AIX)
21305 {
21306 if (speculate)
21307 sprintf (s,
21308 "b%%T%ul\n\t"
21309 "l%s 2,%%%u(1)",
21310 funop, ptrload, funop + 3);
21311 else
21312 sprintf (s,
21313 "beq%%T%ul-\n\t"
21314 "l%s 2,%%%u(1)",
21315 funop, ptrload, funop + 3);
21316 }
21317 else if (DEFAULT_ABI == ABI_ELFv2)
21318 {
21319 if (speculate)
21320 sprintf (s,
21321 "b%%T%ul\n\t"
21322 "l%s 2,%%%u(1)",
21323 funop, ptrload, funop + 2);
21324 else
21325 sprintf (s,
21326 "beq%%T%ul-\n\t"
21327 "l%s 2,%%%u(1)",
21328 funop, ptrload, funop + 2);
21329 }
21330 else
21331 {
21332 if (speculate)
21333 sprintf (s,
21334 "b%%T%u%s",
21335 funop, sibcall ? "" : "l");
21336 else
21337 sprintf (s,
21338 "beq%%T%u%s-%s",
21339 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
21340 }
21341 return str;
21342 }
21343
21344 const char *
21345 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
21346 {
21347 return rs6000_indirect_call_template_1 (operands, funop, false);
21348 }
21349
21350 const char *
21351 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
21352 {
21353 return rs6000_indirect_call_template_1 (operands, funop, true);
21354 }
21355
21356 #if HAVE_AS_PLTSEQ
21357 /* Output indirect call insns. WHICH identifies the type of sequence. */
21358 const char *
21359 rs6000_pltseq_template (rtx *operands, int which)
21360 {
21361 const char *rel64 = TARGET_64BIT ? "64" : "";
21362 char tls[30];
21363 tls[0] = 0;
21364 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
21365 {
21366 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
21367 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
21368 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
21369 off, rel64);
21370 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
21371 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
21372 off, rel64);
21373 else
21374 gcc_unreachable ();
21375 }
21376
21377 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
21378 static char str[96]; /* 10 spare */
21379 char off = WORDS_BIG_ENDIAN ? '2' : '4';
21380 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
21381 && flag_pic == 2 ? "+32768" : "");
21382 switch (which)
21383 {
21384 case RS6000_PLTSEQ_TOCSAVE:
21385 sprintf (str,
21386 "st%s\n\t"
21387 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
21388 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
21389 tls, rel64);
21390 break;
21391 case RS6000_PLTSEQ_PLT16_HA:
21392 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
21393 sprintf (str,
21394 "lis %%0,0\n\t"
21395 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
21396 tls, off, rel64);
21397 else
21398 sprintf (str,
21399 "addis %%0,%%1,0\n\t"
21400 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
21401 tls, off, rel64, addend);
21402 break;
21403 case RS6000_PLTSEQ_PLT16_LO:
21404 sprintf (str,
21405 "l%s %%0,0(%%1)\n\t"
21406 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
21407 TARGET_64BIT ? "d" : "wz",
21408 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
21409 break;
21410 case RS6000_PLTSEQ_MTCTR:
21411 sprintf (str,
21412 "mtctr %%1\n\t"
21413 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
21414 tls, rel64, addend);
21415 break;
21416 case RS6000_PLTSEQ_PLT_PCREL34:
21417 sprintf (str,
21418 "pl%s %%0,0(0),1\n\t"
21419 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
21420 TARGET_64BIT ? "d" : "wz",
21421 tls, rel64);
21422 break;
21423 default:
21424 gcc_unreachable ();
21425 }
21426 return str;
21427 }
21428 #endif
21429
21430 /* Helper function to return whether a MODE can do prefixed loads/stores.
21431 VOIDmode is used when we are loading the pc-relative address into a base
21432 register, but we are not using it as part of a memory operation. As modes
21433 add support for prefixed memory, they will be added here. */
21434
21435 static bool
21436 mode_supports_prefixed_address_p (machine_mode mode)
21437 {
21438 return mode == VOIDmode;
21439 }
21440
21441 /* Function to return true if ADDR is a valid prefixed memory address that uses
21442 mode MODE. */
21443
21444 bool
21445 rs6000_prefixed_address (rtx addr, machine_mode mode)
21446 {
21447 if (!TARGET_PREFIXED_ADDR || !mode_supports_prefixed_address_p (mode))
21448 return false;
21449
21450 /* Check for PC-relative addresses. */
21451 if (pcrel_address (addr, Pmode))
21452 return true;
21453
21454 /* Check for prefixed memory addresses that have a large numeric offset,
21455 or an offset that can't be used for a DS/DQ-form memory operation. */
21456 if (GET_CODE (addr) == PLUS)
21457 {
21458 rtx op0 = XEXP (addr, 0);
21459 rtx op1 = XEXP (addr, 1);
21460
21461 if (!base_reg_operand (op0, Pmode) || !CONST_INT_P (op1))
21462 return false;
21463
21464 HOST_WIDE_INT value = INTVAL (op1);
21465 if (!SIGNED_34BIT_OFFSET_P (value, 0))
21466 return false;
21467
21468 /* Offset larger than 16-bits? */
21469 if (!SIGNED_16BIT_OFFSET_P (value, 0))
21470 return true;
21471
21472 /* DQ instruction (bottom 4 bits must be 0) for vectors. */
21473 HOST_WIDE_INT mask;
21474 if (GET_MODE_SIZE (mode) >= 16)
21475 mask = 15;
21476
21477 /* DS instruction (bottom 2 bits must be 0). For 32-bit integers, we
21478 need to use DS instructions if we are sign-extending the value with
21479 LWA. For 32-bit floating point, we need DS instructions to load and
21480 store values to the traditional Altivec registers. */
21481 else if (GET_MODE_SIZE (mode) >= 4)
21482 mask = 3;
21483
21484 /* QImode/HImode has no restrictions. */
21485 else
21486 return true;
21487
21488 /* Return true if we must use a prefixed instruction. */
21489 return (value & mask) != 0;
21490 }
21491
21492 return false;
21493 }
21494 \f
21495 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21496 /* Emit an assembler directive to set symbol visibility for DECL to
21497 VISIBILITY_TYPE. */
21498
21499 static void
21500 rs6000_assemble_visibility (tree decl, int vis)
21501 {
21502 if (TARGET_XCOFF)
21503 return;
21504
21505 /* Functions need to have their entry point symbol visibility set as
21506 well as their descriptor symbol visibility. */
21507 if (DEFAULT_ABI == ABI_AIX
21508 && DOT_SYMBOLS
21509 && TREE_CODE (decl) == FUNCTION_DECL)
21510 {
21511 static const char * const visibility_types[] = {
21512 NULL, "protected", "hidden", "internal"
21513 };
21514
21515 const char *name, *type;
21516
21517 name = ((* targetm.strip_name_encoding)
21518 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21519 type = visibility_types[vis];
21520
21521 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21522 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21523 }
21524 else
21525 default_assemble_visibility (decl, vis);
21526 }
21527 #endif
21528 \f
21529 enum rtx_code
21530 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21531 {
21532 /* Reversal of FP compares takes care -- an ordered compare
21533 becomes an unordered compare and vice versa. */
21534 if (mode == CCFPmode
21535 && (!flag_finite_math_only
21536 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21537 || code == UNEQ || code == LTGT))
21538 return reverse_condition_maybe_unordered (code);
21539 else
21540 return reverse_condition (code);
21541 }
21542
21543 /* Generate a compare for CODE. Return a brand-new rtx that
21544 represents the result of the compare. */
21545
21546 static rtx
21547 rs6000_generate_compare (rtx cmp, machine_mode mode)
21548 {
21549 machine_mode comp_mode;
21550 rtx compare_result;
21551 enum rtx_code code = GET_CODE (cmp);
21552 rtx op0 = XEXP (cmp, 0);
21553 rtx op1 = XEXP (cmp, 1);
21554
21555 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21556 comp_mode = CCmode;
21557 else if (FLOAT_MODE_P (mode))
21558 comp_mode = CCFPmode;
21559 else if (code == GTU || code == LTU
21560 || code == GEU || code == LEU)
21561 comp_mode = CCUNSmode;
21562 else if ((code == EQ || code == NE)
21563 && unsigned_reg_p (op0)
21564 && (unsigned_reg_p (op1)
21565 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21566 /* These are unsigned values, perhaps there will be a later
21567 ordering compare that can be shared with this one. */
21568 comp_mode = CCUNSmode;
21569 else
21570 comp_mode = CCmode;
21571
21572 /* If we have an unsigned compare, make sure we don't have a signed value as
21573 an immediate. */
21574 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
21575 && INTVAL (op1) < 0)
21576 {
21577 op0 = copy_rtx_if_shared (op0);
21578 op1 = force_reg (GET_MODE (op0), op1);
21579 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21580 }
21581
21582 /* First, the compare. */
21583 compare_result = gen_reg_rtx (comp_mode);
21584
21585 /* IEEE 128-bit support in VSX registers when we do not have hardware
21586 support. */
21587 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21588 {
21589 rtx libfunc = NULL_RTX;
21590 bool check_nan = false;
21591 rtx dest;
21592
21593 switch (code)
21594 {
21595 case EQ:
21596 case NE:
21597 libfunc = optab_libfunc (eq_optab, mode);
21598 break;
21599
21600 case GT:
21601 case GE:
21602 libfunc = optab_libfunc (ge_optab, mode);
21603 break;
21604
21605 case LT:
21606 case LE:
21607 libfunc = optab_libfunc (le_optab, mode);
21608 break;
21609
21610 case UNORDERED:
21611 case ORDERED:
21612 libfunc = optab_libfunc (unord_optab, mode);
21613 code = (code == UNORDERED) ? NE : EQ;
21614 break;
21615
21616 case UNGE:
21617 case UNGT:
21618 check_nan = true;
21619 libfunc = optab_libfunc (ge_optab, mode);
21620 code = (code == UNGE) ? GE : GT;
21621 break;
21622
21623 case UNLE:
21624 case UNLT:
21625 check_nan = true;
21626 libfunc = optab_libfunc (le_optab, mode);
21627 code = (code == UNLE) ? LE : LT;
21628 break;
21629
21630 case UNEQ:
21631 case LTGT:
21632 check_nan = true;
21633 libfunc = optab_libfunc (eq_optab, mode);
21634 code = (code = UNEQ) ? EQ : NE;
21635 break;
21636
21637 default:
21638 gcc_unreachable ();
21639 }
21640
21641 gcc_assert (libfunc);
21642
21643 if (!check_nan)
21644 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21645 SImode, op0, mode, op1, mode);
21646
21647 /* The library signals an exception for signalling NaNs, so we need to
21648 handle isgreater, etc. by first checking isordered. */
21649 else
21650 {
21651 rtx ne_rtx, normal_dest, unord_dest;
21652 rtx unord_func = optab_libfunc (unord_optab, mode);
21653 rtx join_label = gen_label_rtx ();
21654 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21655 rtx unord_cmp = gen_reg_rtx (comp_mode);
21656
21657
21658 /* Test for either value being a NaN. */
21659 gcc_assert (unord_func);
21660 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21661 SImode, op0, mode, op1, mode);
21662
21663 /* Set value (0) if either value is a NaN, and jump to the join
21664 label. */
21665 dest = gen_reg_rtx (SImode);
21666 emit_move_insn (dest, const1_rtx);
21667 emit_insn (gen_rtx_SET (unord_cmp,
21668 gen_rtx_COMPARE (comp_mode, unord_dest,
21669 const0_rtx)));
21670
21671 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21672 emit_jump_insn (gen_rtx_SET (pc_rtx,
21673 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21674 join_ref,
21675 pc_rtx)));
21676
21677 /* Do the normal comparison, knowing that the values are not
21678 NaNs. */
21679 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21680 SImode, op0, mode, op1, mode);
21681
21682 emit_insn (gen_cstoresi4 (dest,
21683 gen_rtx_fmt_ee (code, SImode, normal_dest,
21684 const0_rtx),
21685 normal_dest, const0_rtx));
21686
21687 /* Join NaN and non-Nan paths. Compare dest against 0. */
21688 emit_label (join_label);
21689 code = NE;
21690 }
21691
21692 emit_insn (gen_rtx_SET (compare_result,
21693 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21694 }
21695
21696 else
21697 {
21698 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21699 CLOBBERs to match cmptf_internal2 pattern. */
21700 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21701 && FLOAT128_IBM_P (GET_MODE (op0))
21702 && TARGET_HARD_FLOAT)
21703 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21704 gen_rtvec (10,
21705 gen_rtx_SET (compare_result,
21706 gen_rtx_COMPARE (comp_mode, op0, op1)),
21707 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21708 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21709 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21710 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21711 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21712 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21713 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21714 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21715 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21716 else if (GET_CODE (op1) == UNSPEC
21717 && XINT (op1, 1) == UNSPEC_SP_TEST)
21718 {
21719 rtx op1b = XVECEXP (op1, 0, 0);
21720 comp_mode = CCEQmode;
21721 compare_result = gen_reg_rtx (CCEQmode);
21722 if (TARGET_64BIT)
21723 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21724 else
21725 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21726 }
21727 else
21728 emit_insn (gen_rtx_SET (compare_result,
21729 gen_rtx_COMPARE (comp_mode, op0, op1)));
21730 }
21731
21732 /* Some kinds of FP comparisons need an OR operation;
21733 under flag_finite_math_only we don't bother. */
21734 if (FLOAT_MODE_P (mode)
21735 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21736 && !flag_finite_math_only
21737 && (code == LE || code == GE
21738 || code == UNEQ || code == LTGT
21739 || code == UNGT || code == UNLT))
21740 {
21741 enum rtx_code or1, or2;
21742 rtx or1_rtx, or2_rtx, compare2_rtx;
21743 rtx or_result = gen_reg_rtx (CCEQmode);
21744
21745 switch (code)
21746 {
21747 case LE: or1 = LT; or2 = EQ; break;
21748 case GE: or1 = GT; or2 = EQ; break;
21749 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21750 case LTGT: or1 = LT; or2 = GT; break;
21751 case UNGT: or1 = UNORDERED; or2 = GT; break;
21752 case UNLT: or1 = UNORDERED; or2 = LT; break;
21753 default: gcc_unreachable ();
21754 }
21755 validate_condition_mode (or1, comp_mode);
21756 validate_condition_mode (or2, comp_mode);
21757 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21758 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21759 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21760 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21761 const_true_rtx);
21762 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21763
21764 compare_result = or_result;
21765 code = EQ;
21766 }
21767
21768 validate_condition_mode (code, GET_MODE (compare_result));
21769
21770 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21771 }
21772
21773 \f
21774 /* Return the diagnostic message string if the binary operation OP is
21775 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21776
21777 static const char*
21778 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21779 const_tree type1,
21780 const_tree type2)
21781 {
21782 machine_mode mode1 = TYPE_MODE (type1);
21783 machine_mode mode2 = TYPE_MODE (type2);
21784
21785 /* For complex modes, use the inner type. */
21786 if (COMPLEX_MODE_P (mode1))
21787 mode1 = GET_MODE_INNER (mode1);
21788
21789 if (COMPLEX_MODE_P (mode2))
21790 mode2 = GET_MODE_INNER (mode2);
21791
21792 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21793 double to intermix unless -mfloat128-convert. */
21794 if (mode1 == mode2)
21795 return NULL;
21796
21797 if (!TARGET_FLOAT128_CVT)
21798 {
21799 if ((mode1 == KFmode && mode2 == IFmode)
21800 || (mode1 == IFmode && mode2 == KFmode))
21801 return N_("__float128 and __ibm128 cannot be used in the same "
21802 "expression");
21803
21804 if (TARGET_IEEEQUAD
21805 && ((mode1 == IFmode && mode2 == TFmode)
21806 || (mode1 == TFmode && mode2 == IFmode)))
21807 return N_("__ibm128 and long double cannot be used in the same "
21808 "expression");
21809
21810 if (!TARGET_IEEEQUAD
21811 && ((mode1 == KFmode && mode2 == TFmode)
21812 || (mode1 == TFmode && mode2 == KFmode)))
21813 return N_("__float128 and long double cannot be used in the same "
21814 "expression");
21815 }
21816
21817 return NULL;
21818 }
21819
21820 \f
21821 /* Expand floating point conversion to/from __float128 and __ibm128. */
21822
21823 void
21824 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21825 {
21826 machine_mode dest_mode = GET_MODE (dest);
21827 machine_mode src_mode = GET_MODE (src);
21828 convert_optab cvt = unknown_optab;
21829 bool do_move = false;
21830 rtx libfunc = NULL_RTX;
21831 rtx dest2;
21832 typedef rtx (*rtx_2func_t) (rtx, rtx);
21833 rtx_2func_t hw_convert = (rtx_2func_t)0;
21834 size_t kf_or_tf;
21835
21836 struct hw_conv_t {
21837 rtx_2func_t from_df;
21838 rtx_2func_t from_sf;
21839 rtx_2func_t from_si_sign;
21840 rtx_2func_t from_si_uns;
21841 rtx_2func_t from_di_sign;
21842 rtx_2func_t from_di_uns;
21843 rtx_2func_t to_df;
21844 rtx_2func_t to_sf;
21845 rtx_2func_t to_si_sign;
21846 rtx_2func_t to_si_uns;
21847 rtx_2func_t to_di_sign;
21848 rtx_2func_t to_di_uns;
21849 } hw_conversions[2] = {
21850 /* convertions to/from KFmode */
21851 {
21852 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21853 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21854 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21855 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21856 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21857 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21858 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21859 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21860 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21861 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21862 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21863 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21864 },
21865
21866 /* convertions to/from TFmode */
21867 {
21868 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21869 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21870 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21871 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21872 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21873 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21874 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21875 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21876 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21877 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21878 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21879 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21880 },
21881 };
21882
21883 if (dest_mode == src_mode)
21884 gcc_unreachable ();
21885
21886 /* Eliminate memory operations. */
21887 if (MEM_P (src))
21888 src = force_reg (src_mode, src);
21889
21890 if (MEM_P (dest))
21891 {
21892 rtx tmp = gen_reg_rtx (dest_mode);
21893 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21894 rs6000_emit_move (dest, tmp, dest_mode);
21895 return;
21896 }
21897
21898 /* Convert to IEEE 128-bit floating point. */
21899 if (FLOAT128_IEEE_P (dest_mode))
21900 {
21901 if (dest_mode == KFmode)
21902 kf_or_tf = 0;
21903 else if (dest_mode == TFmode)
21904 kf_or_tf = 1;
21905 else
21906 gcc_unreachable ();
21907
21908 switch (src_mode)
21909 {
21910 case E_DFmode:
21911 cvt = sext_optab;
21912 hw_convert = hw_conversions[kf_or_tf].from_df;
21913 break;
21914
21915 case E_SFmode:
21916 cvt = sext_optab;
21917 hw_convert = hw_conversions[kf_or_tf].from_sf;
21918 break;
21919
21920 case E_KFmode:
21921 case E_IFmode:
21922 case E_TFmode:
21923 if (FLOAT128_IBM_P (src_mode))
21924 cvt = sext_optab;
21925 else
21926 do_move = true;
21927 break;
21928
21929 case E_SImode:
21930 if (unsigned_p)
21931 {
21932 cvt = ufloat_optab;
21933 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21934 }
21935 else
21936 {
21937 cvt = sfloat_optab;
21938 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21939 }
21940 break;
21941
21942 case E_DImode:
21943 if (unsigned_p)
21944 {
21945 cvt = ufloat_optab;
21946 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21947 }
21948 else
21949 {
21950 cvt = sfloat_optab;
21951 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21952 }
21953 break;
21954
21955 default:
21956 gcc_unreachable ();
21957 }
21958 }
21959
21960 /* Convert from IEEE 128-bit floating point. */
21961 else if (FLOAT128_IEEE_P (src_mode))
21962 {
21963 if (src_mode == KFmode)
21964 kf_or_tf = 0;
21965 else if (src_mode == TFmode)
21966 kf_or_tf = 1;
21967 else
21968 gcc_unreachable ();
21969
21970 switch (dest_mode)
21971 {
21972 case E_DFmode:
21973 cvt = trunc_optab;
21974 hw_convert = hw_conversions[kf_or_tf].to_df;
21975 break;
21976
21977 case E_SFmode:
21978 cvt = trunc_optab;
21979 hw_convert = hw_conversions[kf_or_tf].to_sf;
21980 break;
21981
21982 case E_KFmode:
21983 case E_IFmode:
21984 case E_TFmode:
21985 if (FLOAT128_IBM_P (dest_mode))
21986 cvt = trunc_optab;
21987 else
21988 do_move = true;
21989 break;
21990
21991 case E_SImode:
21992 if (unsigned_p)
21993 {
21994 cvt = ufix_optab;
21995 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21996 }
21997 else
21998 {
21999 cvt = sfix_optab;
22000 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22001 }
22002 break;
22003
22004 case E_DImode:
22005 if (unsigned_p)
22006 {
22007 cvt = ufix_optab;
22008 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22009 }
22010 else
22011 {
22012 cvt = sfix_optab;
22013 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22014 }
22015 break;
22016
22017 default:
22018 gcc_unreachable ();
22019 }
22020 }
22021
22022 /* Both IBM format. */
22023 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22024 do_move = true;
22025
22026 else
22027 gcc_unreachable ();
22028
22029 /* Handle conversion between TFmode/KFmode/IFmode. */
22030 if (do_move)
22031 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
22032
22033 /* Handle conversion if we have hardware support. */
22034 else if (TARGET_FLOAT128_HW && hw_convert)
22035 emit_insn ((hw_convert) (dest, src));
22036
22037 /* Call an external function to do the conversion. */
22038 else if (cvt != unknown_optab)
22039 {
22040 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22041 gcc_assert (libfunc != NULL_RTX);
22042
22043 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22044 src, src_mode);
22045
22046 gcc_assert (dest2 != NULL_RTX);
22047 if (!rtx_equal_p (dest, dest2))
22048 emit_move_insn (dest, dest2);
22049 }
22050
22051 else
22052 gcc_unreachable ();
22053
22054 return;
22055 }
22056
22057 \f
22058 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22059 can be used as that dest register. Return the dest register. */
22060
22061 rtx
22062 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22063 {
22064 if (op2 == const0_rtx)
22065 return op1;
22066
22067 if (GET_CODE (scratch) == SCRATCH)
22068 scratch = gen_reg_rtx (mode);
22069
22070 if (logical_operand (op2, mode))
22071 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22072 else
22073 emit_insn (gen_rtx_SET (scratch,
22074 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22075
22076 return scratch;
22077 }
22078
22079 void
22080 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22081 {
22082 rtx condition_rtx;
22083 machine_mode op_mode;
22084 enum rtx_code cond_code;
22085 rtx result = operands[0];
22086
22087 condition_rtx = rs6000_generate_compare (operands[1], mode);
22088 cond_code = GET_CODE (condition_rtx);
22089
22090 if (cond_code == NE
22091 || cond_code == GE || cond_code == LE
22092 || cond_code == GEU || cond_code == LEU
22093 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22094 {
22095 rtx not_result = gen_reg_rtx (CCEQmode);
22096 rtx not_op, rev_cond_rtx;
22097 machine_mode cc_mode;
22098
22099 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22100
22101 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22102 SImode, XEXP (condition_rtx, 0), const0_rtx);
22103 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22104 emit_insn (gen_rtx_SET (not_result, not_op));
22105 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22106 }
22107
22108 op_mode = GET_MODE (XEXP (operands[1], 0));
22109 if (op_mode == VOIDmode)
22110 op_mode = GET_MODE (XEXP (operands[1], 1));
22111
22112 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22113 {
22114 PUT_MODE (condition_rtx, DImode);
22115 convert_move (result, condition_rtx, 0);
22116 }
22117 else
22118 {
22119 PUT_MODE (condition_rtx, SImode);
22120 emit_insn (gen_rtx_SET (result, condition_rtx));
22121 }
22122 }
22123
22124 /* Emit a branch of kind CODE to location LOC. */
22125
22126 void
22127 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22128 {
22129 rtx condition_rtx, loc_ref;
22130
22131 condition_rtx = rs6000_generate_compare (operands[0], mode);
22132 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22133 emit_jump_insn (gen_rtx_SET (pc_rtx,
22134 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22135 loc_ref, pc_rtx)));
22136 }
22137
22138 /* Return the string to output a conditional branch to LABEL, which is
22139 the operand template of the label, or NULL if the branch is really a
22140 conditional return.
22141
22142 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22143 condition code register and its mode specifies what kind of
22144 comparison we made.
22145
22146 REVERSED is nonzero if we should reverse the sense of the comparison.
22147
22148 INSN is the insn. */
22149
22150 char *
22151 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22152 {
22153 static char string[64];
22154 enum rtx_code code = GET_CODE (op);
22155 rtx cc_reg = XEXP (op, 0);
22156 machine_mode mode = GET_MODE (cc_reg);
22157 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22158 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22159 int really_reversed = reversed ^ need_longbranch;
22160 char *s = string;
22161 const char *ccode;
22162 const char *pred;
22163 rtx note;
22164
22165 validate_condition_mode (code, mode);
22166
22167 /* Work out which way this really branches. We could use
22168 reverse_condition_maybe_unordered here always but this
22169 makes the resulting assembler clearer. */
22170 if (really_reversed)
22171 {
22172 /* Reversal of FP compares takes care -- an ordered compare
22173 becomes an unordered compare and vice versa. */
22174 if (mode == CCFPmode)
22175 code = reverse_condition_maybe_unordered (code);
22176 else
22177 code = reverse_condition (code);
22178 }
22179
22180 switch (code)
22181 {
22182 /* Not all of these are actually distinct opcodes, but
22183 we distinguish them for clarity of the resulting assembler. */
22184 case NE: case LTGT:
22185 ccode = "ne"; break;
22186 case EQ: case UNEQ:
22187 ccode = "eq"; break;
22188 case GE: case GEU:
22189 ccode = "ge"; break;
22190 case GT: case GTU: case UNGT:
22191 ccode = "gt"; break;
22192 case LE: case LEU:
22193 ccode = "le"; break;
22194 case LT: case LTU: case UNLT:
22195 ccode = "lt"; break;
22196 case UNORDERED: ccode = "un"; break;
22197 case ORDERED: ccode = "nu"; break;
22198 case UNGE: ccode = "nl"; break;
22199 case UNLE: ccode = "ng"; break;
22200 default:
22201 gcc_unreachable ();
22202 }
22203
22204 /* Maybe we have a guess as to how likely the branch is. */
22205 pred = "";
22206 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22207 if (note != NULL_RTX)
22208 {
22209 /* PROB is the difference from 50%. */
22210 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22211 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22212
22213 /* Only hint for highly probable/improbable branches on newer cpus when
22214 we have real profile data, as static prediction overrides processor
22215 dynamic prediction. For older cpus we may as well always hint, but
22216 assume not taken for branches that are very close to 50% as a
22217 mispredicted taken branch is more expensive than a
22218 mispredicted not-taken branch. */
22219 if (rs6000_always_hint
22220 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22221 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22222 && br_prob_note_reliable_p (note)))
22223 {
22224 if (abs (prob) > REG_BR_PROB_BASE / 20
22225 && ((prob > 0) ^ need_longbranch))
22226 pred = "+";
22227 else
22228 pred = "-";
22229 }
22230 }
22231
22232 if (label == NULL)
22233 s += sprintf (s, "b%slr%s ", ccode, pred);
22234 else
22235 s += sprintf (s, "b%s%s ", ccode, pred);
22236
22237 /* We need to escape any '%' characters in the reg_names string.
22238 Assume they'd only be the first character.... */
22239 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22240 *s++ = '%';
22241 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22242
22243 if (label != NULL)
22244 {
22245 /* If the branch distance was too far, we may have to use an
22246 unconditional branch to go the distance. */
22247 if (need_longbranch)
22248 s += sprintf (s, ",$+8\n\tb %s", label);
22249 else
22250 s += sprintf (s, ",%s", label);
22251 }
22252
22253 return string;
22254 }
22255
22256 /* Return insn for VSX or Altivec comparisons. */
22257
22258 static rtx
22259 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22260 {
22261 rtx mask;
22262 machine_mode mode = GET_MODE (op0);
22263
22264 switch (code)
22265 {
22266 default:
22267 break;
22268
22269 case GE:
22270 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22271 return NULL_RTX;
22272 /* FALLTHRU */
22273
22274 case EQ:
22275 case GT:
22276 case GTU:
22277 case ORDERED:
22278 case UNORDERED:
22279 case UNEQ:
22280 case LTGT:
22281 mask = gen_reg_rtx (mode);
22282 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22283 return mask;
22284 }
22285
22286 return NULL_RTX;
22287 }
22288
22289 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22290 DMODE is expected destination mode. This is a recursive function. */
22291
22292 static rtx
22293 rs6000_emit_vector_compare (enum rtx_code rcode,
22294 rtx op0, rtx op1,
22295 machine_mode dmode)
22296 {
22297 rtx mask;
22298 bool swap_operands = false;
22299 bool try_again = false;
22300
22301 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22303
22304 /* See if the comparison works as is. */
22305 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22306 if (mask)
22307 return mask;
22308
22309 switch (rcode)
22310 {
22311 case LT:
22312 rcode = GT;
22313 swap_operands = true;
22314 try_again = true;
22315 break;
22316 case LTU:
22317 rcode = GTU;
22318 swap_operands = true;
22319 try_again = true;
22320 break;
22321 case NE:
22322 case UNLE:
22323 case UNLT:
22324 case UNGE:
22325 case UNGT:
22326 /* Invert condition and try again.
22327 e.g., A != B becomes ~(A==B). */
22328 {
22329 enum rtx_code rev_code;
22330 enum insn_code nor_code;
22331 rtx mask2;
22332
22333 rev_code = reverse_condition_maybe_unordered (rcode);
22334 if (rev_code == UNKNOWN)
22335 return NULL_RTX;
22336
22337 nor_code = optab_handler (one_cmpl_optab, dmode);
22338 if (nor_code == CODE_FOR_nothing)
22339 return NULL_RTX;
22340
22341 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22342 if (!mask2)
22343 return NULL_RTX;
22344
22345 mask = gen_reg_rtx (dmode);
22346 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22347 return mask;
22348 }
22349 break;
22350 case GE:
22351 case GEU:
22352 case LE:
22353 case LEU:
22354 /* Try GT/GTU/LT/LTU OR EQ */
22355 {
22356 rtx c_rtx, eq_rtx;
22357 enum insn_code ior_code;
22358 enum rtx_code new_code;
22359
22360 switch (rcode)
22361 {
22362 case GE:
22363 new_code = GT;
22364 break;
22365
22366 case GEU:
22367 new_code = GTU;
22368 break;
22369
22370 case LE:
22371 new_code = LT;
22372 break;
22373
22374 case LEU:
22375 new_code = LTU;
22376 break;
22377
22378 default:
22379 gcc_unreachable ();
22380 }
22381
22382 ior_code = optab_handler (ior_optab, dmode);
22383 if (ior_code == CODE_FOR_nothing)
22384 return NULL_RTX;
22385
22386 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22387 if (!c_rtx)
22388 return NULL_RTX;
22389
22390 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22391 if (!eq_rtx)
22392 return NULL_RTX;
22393
22394 mask = gen_reg_rtx (dmode);
22395 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22396 return mask;
22397 }
22398 break;
22399 default:
22400 return NULL_RTX;
22401 }
22402
22403 if (try_again)
22404 {
22405 if (swap_operands)
22406 std::swap (op0, op1);
22407
22408 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22409 if (mask)
22410 return mask;
22411 }
22412
22413 /* You only get two chances. */
22414 return NULL_RTX;
22415 }
22416
22417 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22418 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22419 operands for the relation operation COND. */
22420
22421 int
22422 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22423 rtx cond, rtx cc_op0, rtx cc_op1)
22424 {
22425 machine_mode dest_mode = GET_MODE (dest);
22426 machine_mode mask_mode = GET_MODE (cc_op0);
22427 enum rtx_code rcode = GET_CODE (cond);
22428 machine_mode cc_mode = CCmode;
22429 rtx mask;
22430 rtx cond2;
22431 bool invert_move = false;
22432
22433 if (VECTOR_UNIT_NONE_P (dest_mode))
22434 return 0;
22435
22436 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22437 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22438
22439 switch (rcode)
22440 {
22441 /* Swap operands if we can, and fall back to doing the operation as
22442 specified, and doing a NOR to invert the test. */
22443 case NE:
22444 case UNLE:
22445 case UNLT:
22446 case UNGE:
22447 case UNGT:
22448 /* Invert condition and try again.
22449 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22450 invert_move = true;
22451 rcode = reverse_condition_maybe_unordered (rcode);
22452 if (rcode == UNKNOWN)
22453 return 0;
22454 break;
22455
22456 case GE:
22457 case LE:
22458 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22459 {
22460 /* Invert condition to avoid compound test. */
22461 invert_move = true;
22462 rcode = reverse_condition (rcode);
22463 }
22464 break;
22465
22466 case GTU:
22467 case GEU:
22468 case LTU:
22469 case LEU:
22470 /* Mark unsigned tests with CCUNSmode. */
22471 cc_mode = CCUNSmode;
22472
22473 /* Invert condition to avoid compound test if necessary. */
22474 if (rcode == GEU || rcode == LEU)
22475 {
22476 invert_move = true;
22477 rcode = reverse_condition (rcode);
22478 }
22479 break;
22480
22481 default:
22482 break;
22483 }
22484
22485 /* Get the vector mask for the given relational operations. */
22486 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22487
22488 if (!mask)
22489 return 0;
22490
22491 if (invert_move)
22492 std::swap (op_true, op_false);
22493
22494 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22495 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22496 && (GET_CODE (op_true) == CONST_VECTOR
22497 || GET_CODE (op_false) == CONST_VECTOR))
22498 {
22499 rtx constant_0 = CONST0_RTX (dest_mode);
22500 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22501
22502 if (op_true == constant_m1 && op_false == constant_0)
22503 {
22504 emit_move_insn (dest, mask);
22505 return 1;
22506 }
22507
22508 else if (op_true == constant_0 && op_false == constant_m1)
22509 {
22510 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22511 return 1;
22512 }
22513
22514 /* If we can't use the vector comparison directly, perhaps we can use
22515 the mask for the true or false fields, instead of loading up a
22516 constant. */
22517 if (op_true == constant_m1)
22518 op_true = mask;
22519
22520 if (op_false == constant_0)
22521 op_false = mask;
22522 }
22523
22524 if (!REG_P (op_true) && !SUBREG_P (op_true))
22525 op_true = force_reg (dest_mode, op_true);
22526
22527 if (!REG_P (op_false) && !SUBREG_P (op_false))
22528 op_false = force_reg (dest_mode, op_false);
22529
22530 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22531 CONST0_RTX (dest_mode));
22532 emit_insn (gen_rtx_SET (dest,
22533 gen_rtx_IF_THEN_ELSE (dest_mode,
22534 cond2,
22535 op_true,
22536 op_false)));
22537 return 1;
22538 }
22539
22540 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22541 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22542 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22543 hardware has no such operation. */
22544
22545 static int
22546 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22547 {
22548 enum rtx_code code = GET_CODE (op);
22549 rtx op0 = XEXP (op, 0);
22550 rtx op1 = XEXP (op, 1);
22551 machine_mode compare_mode = GET_MODE (op0);
22552 machine_mode result_mode = GET_MODE (dest);
22553 bool max_p = false;
22554
22555 if (result_mode != compare_mode)
22556 return 0;
22557
22558 if (code == GE || code == GT)
22559 max_p = true;
22560 else if (code == LE || code == LT)
22561 max_p = false;
22562 else
22563 return 0;
22564
22565 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22566 ;
22567
22568 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22569 max_p = !max_p;
22570
22571 else
22572 return 0;
22573
22574 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22575 return 1;
22576 }
22577
22578 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22579 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22580 operands of the last comparison is nonzero/true, FALSE_COND if it is
22581 zero/false. Return 0 if the hardware has no such operation. */
22582
22583 static int
22584 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22585 {
22586 enum rtx_code code = GET_CODE (op);
22587 rtx op0 = XEXP (op, 0);
22588 rtx op1 = XEXP (op, 1);
22589 machine_mode result_mode = GET_MODE (dest);
22590 rtx compare_rtx;
22591 rtx cmove_rtx;
22592 rtx clobber_rtx;
22593
22594 if (!can_create_pseudo_p ())
22595 return 0;
22596
22597 switch (code)
22598 {
22599 case EQ:
22600 case GE:
22601 case GT:
22602 break;
22603
22604 case NE:
22605 case LT:
22606 case LE:
22607 code = swap_condition (code);
22608 std::swap (op0, op1);
22609 break;
22610
22611 default:
22612 return 0;
22613 }
22614
22615 /* Generate: [(parallel [(set (dest)
22616 (if_then_else (op (cmp1) (cmp2))
22617 (true)
22618 (false)))
22619 (clobber (scratch))])]. */
22620
22621 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22622 cmove_rtx = gen_rtx_SET (dest,
22623 gen_rtx_IF_THEN_ELSE (result_mode,
22624 compare_rtx,
22625 true_cond,
22626 false_cond));
22627
22628 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22629 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22630 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22631
22632 return 1;
22633 }
22634
22635 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22636 operands of the last comparison is nonzero/true, FALSE_COND if it
22637 is zero/false. Return 0 if the hardware has no such operation. */
22638
22639 int
22640 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22641 {
22642 enum rtx_code code = GET_CODE (op);
22643 rtx op0 = XEXP (op, 0);
22644 rtx op1 = XEXP (op, 1);
22645 machine_mode compare_mode = GET_MODE (op0);
22646 machine_mode result_mode = GET_MODE (dest);
22647 rtx temp;
22648 bool is_against_zero;
22649
22650 /* These modes should always match. */
22651 if (GET_MODE (op1) != compare_mode
22652 /* In the isel case however, we can use a compare immediate, so
22653 op1 may be a small constant. */
22654 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22655 return 0;
22656 if (GET_MODE (true_cond) != result_mode)
22657 return 0;
22658 if (GET_MODE (false_cond) != result_mode)
22659 return 0;
22660
22661 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22662 if (TARGET_P9_MINMAX
22663 && (compare_mode == SFmode || compare_mode == DFmode)
22664 && (result_mode == SFmode || result_mode == DFmode))
22665 {
22666 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22667 return 1;
22668
22669 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22670 return 1;
22671 }
22672
22673 /* Don't allow using floating point comparisons for integer results for
22674 now. */
22675 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22676 return 0;
22677
22678 /* First, work out if the hardware can do this at all, or
22679 if it's too slow.... */
22680 if (!FLOAT_MODE_P (compare_mode))
22681 {
22682 if (TARGET_ISEL)
22683 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22684 return 0;
22685 }
22686
22687 is_against_zero = op1 == CONST0_RTX (compare_mode);
22688
22689 /* A floating-point subtract might overflow, underflow, or produce
22690 an inexact result, thus changing the floating-point flags, so it
22691 can't be generated if we care about that. It's safe if one side
22692 of the construct is zero, since then no subtract will be
22693 generated. */
22694 if (SCALAR_FLOAT_MODE_P (compare_mode)
22695 && flag_trapping_math && ! is_against_zero)
22696 return 0;
22697
22698 /* Eliminate half of the comparisons by switching operands, this
22699 makes the remaining code simpler. */
22700 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22701 || code == LTGT || code == LT || code == UNLE)
22702 {
22703 code = reverse_condition_maybe_unordered (code);
22704 temp = true_cond;
22705 true_cond = false_cond;
22706 false_cond = temp;
22707 }
22708
22709 /* UNEQ and LTGT take four instructions for a comparison with zero,
22710 it'll probably be faster to use a branch here too. */
22711 if (code == UNEQ && HONOR_NANS (compare_mode))
22712 return 0;
22713
22714 /* We're going to try to implement comparisons by performing
22715 a subtract, then comparing against zero. Unfortunately,
22716 Inf - Inf is NaN which is not zero, and so if we don't
22717 know that the operand is finite and the comparison
22718 would treat EQ different to UNORDERED, we can't do it. */
22719 if (HONOR_INFINITIES (compare_mode)
22720 && code != GT && code != UNGE
22721 && (!CONST_DOUBLE_P (op1)
22722 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22723 /* Constructs of the form (a OP b ? a : b) are safe. */
22724 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22725 || (! rtx_equal_p (op0, true_cond)
22726 && ! rtx_equal_p (op1, true_cond))))
22727 return 0;
22728
22729 /* At this point we know we can use fsel. */
22730
22731 /* Reduce the comparison to a comparison against zero. */
22732 if (! is_against_zero)
22733 {
22734 temp = gen_reg_rtx (compare_mode);
22735 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22736 op0 = temp;
22737 op1 = CONST0_RTX (compare_mode);
22738 }
22739
22740 /* If we don't care about NaNs we can reduce some of the comparisons
22741 down to faster ones. */
22742 if (! HONOR_NANS (compare_mode))
22743 switch (code)
22744 {
22745 case GT:
22746 code = LE;
22747 temp = true_cond;
22748 true_cond = false_cond;
22749 false_cond = temp;
22750 break;
22751 case UNGE:
22752 code = GE;
22753 break;
22754 case UNEQ:
22755 code = EQ;
22756 break;
22757 default:
22758 break;
22759 }
22760
22761 /* Now, reduce everything down to a GE. */
22762 switch (code)
22763 {
22764 case GE:
22765 break;
22766
22767 case LE:
22768 temp = gen_reg_rtx (compare_mode);
22769 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22770 op0 = temp;
22771 break;
22772
22773 case ORDERED:
22774 temp = gen_reg_rtx (compare_mode);
22775 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22776 op0 = temp;
22777 break;
22778
22779 case EQ:
22780 temp = gen_reg_rtx (compare_mode);
22781 emit_insn (gen_rtx_SET (temp,
22782 gen_rtx_NEG (compare_mode,
22783 gen_rtx_ABS (compare_mode, op0))));
22784 op0 = temp;
22785 break;
22786
22787 case UNGE:
22788 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22789 temp = gen_reg_rtx (result_mode);
22790 emit_insn (gen_rtx_SET (temp,
22791 gen_rtx_IF_THEN_ELSE (result_mode,
22792 gen_rtx_GE (VOIDmode,
22793 op0, op1),
22794 true_cond, false_cond)));
22795 false_cond = true_cond;
22796 true_cond = temp;
22797
22798 temp = gen_reg_rtx (compare_mode);
22799 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22800 op0 = temp;
22801 break;
22802
22803 case GT:
22804 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22805 temp = gen_reg_rtx (result_mode);
22806 emit_insn (gen_rtx_SET (temp,
22807 gen_rtx_IF_THEN_ELSE (result_mode,
22808 gen_rtx_GE (VOIDmode,
22809 op0, op1),
22810 true_cond, false_cond)));
22811 true_cond = false_cond;
22812 false_cond = temp;
22813
22814 temp = gen_reg_rtx (compare_mode);
22815 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22816 op0 = temp;
22817 break;
22818
22819 default:
22820 gcc_unreachable ();
22821 }
22822
22823 emit_insn (gen_rtx_SET (dest,
22824 gen_rtx_IF_THEN_ELSE (result_mode,
22825 gen_rtx_GE (VOIDmode,
22826 op0, op1),
22827 true_cond, false_cond)));
22828 return 1;
22829 }
22830
22831 /* Same as above, but for ints (isel). */
22832
22833 int
22834 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22835 {
22836 rtx condition_rtx, cr;
22837 machine_mode mode = GET_MODE (dest);
22838 enum rtx_code cond_code;
22839 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22840 bool signedp;
22841
22842 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22843 return 0;
22844
22845 /* We still have to do the compare, because isel doesn't do a
22846 compare, it just looks at the CRx bits set by a previous compare
22847 instruction. */
22848 condition_rtx = rs6000_generate_compare (op, mode);
22849 cond_code = GET_CODE (condition_rtx);
22850 cr = XEXP (condition_rtx, 0);
22851 signedp = GET_MODE (cr) == CCmode;
22852
22853 isel_func = (mode == SImode
22854 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22855 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22856
22857 switch (cond_code)
22858 {
22859 case LT: case GT: case LTU: case GTU: case EQ:
22860 /* isel handles these directly. */
22861 break;
22862
22863 default:
22864 /* We need to swap the sense of the comparison. */
22865 {
22866 std::swap (false_cond, true_cond);
22867 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22868 }
22869 break;
22870 }
22871
22872 false_cond = force_reg (mode, false_cond);
22873 if (true_cond != const0_rtx)
22874 true_cond = force_reg (mode, true_cond);
22875
22876 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22877
22878 return 1;
22879 }
22880
22881 void
22882 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22883 {
22884 machine_mode mode = GET_MODE (op0);
22885 enum rtx_code c;
22886 rtx target;
22887
22888 /* VSX/altivec have direct min/max insns. */
22889 if ((code == SMAX || code == SMIN)
22890 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22891 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22892 {
22893 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22894 return;
22895 }
22896
22897 if (code == SMAX || code == SMIN)
22898 c = GE;
22899 else
22900 c = GEU;
22901
22902 if (code == SMAX || code == UMAX)
22903 target = emit_conditional_move (dest, c, op0, op1, mode,
22904 op0, op1, mode, 0);
22905 else
22906 target = emit_conditional_move (dest, c, op0, op1, mode,
22907 op1, op0, mode, 0);
22908 gcc_assert (target);
22909 if (target != dest)
22910 emit_move_insn (dest, target);
22911 }
22912
22913 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22914 COND is true. Mark the jump as unlikely to be taken. */
22915
22916 static void
22917 emit_unlikely_jump (rtx cond, rtx label)
22918 {
22919 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22920 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22921 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22922 }
22923
22924 /* A subroutine of the atomic operation splitters. Emit a load-locked
22925 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22926 the zero_extend operation. */
22927
22928 static void
22929 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22930 {
22931 rtx (*fn) (rtx, rtx) = NULL;
22932
22933 switch (mode)
22934 {
22935 case E_QImode:
22936 fn = gen_load_lockedqi;
22937 break;
22938 case E_HImode:
22939 fn = gen_load_lockedhi;
22940 break;
22941 case E_SImode:
22942 if (GET_MODE (mem) == QImode)
22943 fn = gen_load_lockedqi_si;
22944 else if (GET_MODE (mem) == HImode)
22945 fn = gen_load_lockedhi_si;
22946 else
22947 fn = gen_load_lockedsi;
22948 break;
22949 case E_DImode:
22950 fn = gen_load_lockeddi;
22951 break;
22952 case E_TImode:
22953 fn = gen_load_lockedti;
22954 break;
22955 default:
22956 gcc_unreachable ();
22957 }
22958 emit_insn (fn (reg, mem));
22959 }
22960
22961 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22962 instruction in MODE. */
22963
22964 static void
22965 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22966 {
22967 rtx (*fn) (rtx, rtx, rtx) = NULL;
22968
22969 switch (mode)
22970 {
22971 case E_QImode:
22972 fn = gen_store_conditionalqi;
22973 break;
22974 case E_HImode:
22975 fn = gen_store_conditionalhi;
22976 break;
22977 case E_SImode:
22978 fn = gen_store_conditionalsi;
22979 break;
22980 case E_DImode:
22981 fn = gen_store_conditionaldi;
22982 break;
22983 case E_TImode:
22984 fn = gen_store_conditionalti;
22985 break;
22986 default:
22987 gcc_unreachable ();
22988 }
22989
22990 /* Emit sync before stwcx. to address PPC405 Erratum. */
22991 if (PPC405_ERRATUM77)
22992 emit_insn (gen_hwsync ());
22993
22994 emit_insn (fn (res, mem, val));
22995 }
22996
22997 /* Expand barriers before and after a load_locked/store_cond sequence. */
22998
22999 static rtx
23000 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23001 {
23002 rtx addr = XEXP (mem, 0);
23003
23004 if (!legitimate_indirect_address_p (addr, reload_completed)
23005 && !legitimate_indexed_address_p (addr, reload_completed))
23006 {
23007 addr = force_reg (Pmode, addr);
23008 mem = replace_equiv_address_nv (mem, addr);
23009 }
23010
23011 switch (model)
23012 {
23013 case MEMMODEL_RELAXED:
23014 case MEMMODEL_CONSUME:
23015 case MEMMODEL_ACQUIRE:
23016 break;
23017 case MEMMODEL_RELEASE:
23018 case MEMMODEL_ACQ_REL:
23019 emit_insn (gen_lwsync ());
23020 break;
23021 case MEMMODEL_SEQ_CST:
23022 emit_insn (gen_hwsync ());
23023 break;
23024 default:
23025 gcc_unreachable ();
23026 }
23027 return mem;
23028 }
23029
23030 static void
23031 rs6000_post_atomic_barrier (enum memmodel model)
23032 {
23033 switch (model)
23034 {
23035 case MEMMODEL_RELAXED:
23036 case MEMMODEL_CONSUME:
23037 case MEMMODEL_RELEASE:
23038 break;
23039 case MEMMODEL_ACQUIRE:
23040 case MEMMODEL_ACQ_REL:
23041 case MEMMODEL_SEQ_CST:
23042 emit_insn (gen_isync ());
23043 break;
23044 default:
23045 gcc_unreachable ();
23046 }
23047 }
23048
23049 /* A subroutine of the various atomic expanders. For sub-word operations,
23050 we must adjust things to operate on SImode. Given the original MEM,
23051 return a new aligned memory. Also build and return the quantities by
23052 which to shift and mask. */
23053
23054 static rtx
23055 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23056 {
23057 rtx addr, align, shift, mask, mem;
23058 HOST_WIDE_INT shift_mask;
23059 machine_mode mode = GET_MODE (orig_mem);
23060
23061 /* For smaller modes, we have to implement this via SImode. */
23062 shift_mask = (mode == QImode ? 0x18 : 0x10);
23063
23064 addr = XEXP (orig_mem, 0);
23065 addr = force_reg (GET_MODE (addr), addr);
23066
23067 /* Aligned memory containing subword. Generate a new memory. We
23068 do not want any of the existing MEM_ATTR data, as we're now
23069 accessing memory outside the original object. */
23070 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23071 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23072 mem = gen_rtx_MEM (SImode, align);
23073 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23074 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23075 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23076
23077 /* Shift amount for subword relative to aligned word. */
23078 shift = gen_reg_rtx (SImode);
23079 addr = gen_lowpart (SImode, addr);
23080 rtx tmp = gen_reg_rtx (SImode);
23081 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23082 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23083 if (BYTES_BIG_ENDIAN)
23084 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23085 shift, 1, OPTAB_LIB_WIDEN);
23086 *pshift = shift;
23087
23088 /* Mask for insertion. */
23089 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23090 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23091 *pmask = mask;
23092
23093 return mem;
23094 }
23095
23096 /* A subroutine of the various atomic expanders. For sub-word operands,
23097 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23098
23099 static rtx
23100 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23101 {
23102 rtx x;
23103
23104 x = gen_reg_rtx (SImode);
23105 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23106 gen_rtx_NOT (SImode, mask),
23107 oldval)));
23108
23109 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23110
23111 return x;
23112 }
23113
23114 /* A subroutine of the various atomic expanders. For sub-word operands,
23115 extract WIDE to NARROW via SHIFT. */
23116
23117 static void
23118 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23119 {
23120 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23121 wide, 1, OPTAB_LIB_WIDEN);
23122 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23123 }
23124
23125 /* Expand an atomic compare and swap operation. */
23126
23127 void
23128 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23129 {
23130 rtx boolval, retval, mem, oldval, newval, cond;
23131 rtx label1, label2, x, mask, shift;
23132 machine_mode mode, orig_mode;
23133 enum memmodel mod_s, mod_f;
23134 bool is_weak;
23135
23136 boolval = operands[0];
23137 retval = operands[1];
23138 mem = operands[2];
23139 oldval = operands[3];
23140 newval = operands[4];
23141 is_weak = (INTVAL (operands[5]) != 0);
23142 mod_s = memmodel_base (INTVAL (operands[6]));
23143 mod_f = memmodel_base (INTVAL (operands[7]));
23144 orig_mode = mode = GET_MODE (mem);
23145
23146 mask = shift = NULL_RTX;
23147 if (mode == QImode || mode == HImode)
23148 {
23149 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23150 lwarx and shift/mask operations. With power8, we need to do the
23151 comparison in SImode, but the store is still done in QI/HImode. */
23152 oldval = convert_modes (SImode, mode, oldval, 1);
23153
23154 if (!TARGET_SYNC_HI_QI)
23155 {
23156 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23157
23158 /* Shift and mask OLDVAL into position with the word. */
23159 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23160 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23161
23162 /* Shift and mask NEWVAL into position within the word. */
23163 newval = convert_modes (SImode, mode, newval, 1);
23164 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23165 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23166 }
23167
23168 /* Prepare to adjust the return value. */
23169 retval = gen_reg_rtx (SImode);
23170 mode = SImode;
23171 }
23172 else if (reg_overlap_mentioned_p (retval, oldval))
23173 oldval = copy_to_reg (oldval);
23174
23175 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23176 oldval = copy_to_mode_reg (mode, oldval);
23177
23178 if (reg_overlap_mentioned_p (retval, newval))
23179 newval = copy_to_reg (newval);
23180
23181 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23182
23183 label1 = NULL_RTX;
23184 if (!is_weak)
23185 {
23186 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23187 emit_label (XEXP (label1, 0));
23188 }
23189 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23190
23191 emit_load_locked (mode, retval, mem);
23192
23193 x = retval;
23194 if (mask)
23195 x = expand_simple_binop (SImode, AND, retval, mask,
23196 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23197
23198 cond = gen_reg_rtx (CCmode);
23199 /* If we have TImode, synthesize a comparison. */
23200 if (mode != TImode)
23201 x = gen_rtx_COMPARE (CCmode, x, oldval);
23202 else
23203 {
23204 rtx xor1_result = gen_reg_rtx (DImode);
23205 rtx xor2_result = gen_reg_rtx (DImode);
23206 rtx or_result = gen_reg_rtx (DImode);
23207 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23208 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23209 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23210 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23211
23212 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23213 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23214 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23215 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23216 }
23217
23218 emit_insn (gen_rtx_SET (cond, x));
23219
23220 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23221 emit_unlikely_jump (x, label2);
23222
23223 x = newval;
23224 if (mask)
23225 x = rs6000_mask_atomic_subword (retval, newval, mask);
23226
23227 emit_store_conditional (orig_mode, cond, mem, x);
23228
23229 if (!is_weak)
23230 {
23231 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23232 emit_unlikely_jump (x, label1);
23233 }
23234
23235 if (!is_mm_relaxed (mod_f))
23236 emit_label (XEXP (label2, 0));
23237
23238 rs6000_post_atomic_barrier (mod_s);
23239
23240 if (is_mm_relaxed (mod_f))
23241 emit_label (XEXP (label2, 0));
23242
23243 if (shift)
23244 rs6000_finish_atomic_subword (operands[1], retval, shift);
23245 else if (mode != GET_MODE (operands[1]))
23246 convert_move (operands[1], retval, 1);
23247
23248 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23249 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23250 emit_insn (gen_rtx_SET (boolval, x));
23251 }
23252
23253 /* Expand an atomic exchange operation. */
23254
23255 void
23256 rs6000_expand_atomic_exchange (rtx operands[])
23257 {
23258 rtx retval, mem, val, cond;
23259 machine_mode mode;
23260 enum memmodel model;
23261 rtx label, x, mask, shift;
23262
23263 retval = operands[0];
23264 mem = operands[1];
23265 val = operands[2];
23266 model = memmodel_base (INTVAL (operands[3]));
23267 mode = GET_MODE (mem);
23268
23269 mask = shift = NULL_RTX;
23270 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23271 {
23272 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23273
23274 /* Shift and mask VAL into position with the word. */
23275 val = convert_modes (SImode, mode, val, 1);
23276 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23277 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23278
23279 /* Prepare to adjust the return value. */
23280 retval = gen_reg_rtx (SImode);
23281 mode = SImode;
23282 }
23283
23284 mem = rs6000_pre_atomic_barrier (mem, model);
23285
23286 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23287 emit_label (XEXP (label, 0));
23288
23289 emit_load_locked (mode, retval, mem);
23290
23291 x = val;
23292 if (mask)
23293 x = rs6000_mask_atomic_subword (retval, val, mask);
23294
23295 cond = gen_reg_rtx (CCmode);
23296 emit_store_conditional (mode, cond, mem, x);
23297
23298 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23299 emit_unlikely_jump (x, label);
23300
23301 rs6000_post_atomic_barrier (model);
23302
23303 if (shift)
23304 rs6000_finish_atomic_subword (operands[0], retval, shift);
23305 }
23306
23307 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23308 to perform. MEM is the memory on which to operate. VAL is the second
23309 operand of the binary operator. BEFORE and AFTER are optional locations to
23310 return the value of MEM either before of after the operation. MODEL_RTX
23311 is a CONST_INT containing the memory model to use. */
23312
23313 void
23314 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23315 rtx orig_before, rtx orig_after, rtx model_rtx)
23316 {
23317 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23318 machine_mode mode = GET_MODE (mem);
23319 machine_mode store_mode = mode;
23320 rtx label, x, cond, mask, shift;
23321 rtx before = orig_before, after = orig_after;
23322
23323 mask = shift = NULL_RTX;
23324 /* On power8, we want to use SImode for the operation. On previous systems,
23325 use the operation in a subword and shift/mask to get the proper byte or
23326 halfword. */
23327 if (mode == QImode || mode == HImode)
23328 {
23329 if (TARGET_SYNC_HI_QI)
23330 {
23331 val = convert_modes (SImode, mode, val, 1);
23332
23333 /* Prepare to adjust the return value. */
23334 before = gen_reg_rtx (SImode);
23335 if (after)
23336 after = gen_reg_rtx (SImode);
23337 mode = SImode;
23338 }
23339 else
23340 {
23341 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23342
23343 /* Shift and mask VAL into position with the word. */
23344 val = convert_modes (SImode, mode, val, 1);
23345 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23346 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23347
23348 switch (code)
23349 {
23350 case IOR:
23351 case XOR:
23352 /* We've already zero-extended VAL. That is sufficient to
23353 make certain that it does not affect other bits. */
23354 mask = NULL;
23355 break;
23356
23357 case AND:
23358 /* If we make certain that all of the other bits in VAL are
23359 set, that will be sufficient to not affect other bits. */
23360 x = gen_rtx_NOT (SImode, mask);
23361 x = gen_rtx_IOR (SImode, x, val);
23362 emit_insn (gen_rtx_SET (val, x));
23363 mask = NULL;
23364 break;
23365
23366 case NOT:
23367 case PLUS:
23368 case MINUS:
23369 /* These will all affect bits outside the field and need
23370 adjustment via MASK within the loop. */
23371 break;
23372
23373 default:
23374 gcc_unreachable ();
23375 }
23376
23377 /* Prepare to adjust the return value. */
23378 before = gen_reg_rtx (SImode);
23379 if (after)
23380 after = gen_reg_rtx (SImode);
23381 store_mode = mode = SImode;
23382 }
23383 }
23384
23385 mem = rs6000_pre_atomic_barrier (mem, model);
23386
23387 label = gen_label_rtx ();
23388 emit_label (label);
23389 label = gen_rtx_LABEL_REF (VOIDmode, label);
23390
23391 if (before == NULL_RTX)
23392 before = gen_reg_rtx (mode);
23393
23394 emit_load_locked (mode, before, mem);
23395
23396 if (code == NOT)
23397 {
23398 x = expand_simple_binop (mode, AND, before, val,
23399 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23400 after = expand_simple_unop (mode, NOT, x, after, 1);
23401 }
23402 else
23403 {
23404 after = expand_simple_binop (mode, code, before, val,
23405 after, 1, OPTAB_LIB_WIDEN);
23406 }
23407
23408 x = after;
23409 if (mask)
23410 {
23411 x = expand_simple_binop (SImode, AND, after, mask,
23412 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23413 x = rs6000_mask_atomic_subword (before, x, mask);
23414 }
23415 else if (store_mode != mode)
23416 x = convert_modes (store_mode, mode, x, 1);
23417
23418 cond = gen_reg_rtx (CCmode);
23419 emit_store_conditional (store_mode, cond, mem, x);
23420
23421 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23422 emit_unlikely_jump (x, label);
23423
23424 rs6000_post_atomic_barrier (model);
23425
23426 if (shift)
23427 {
23428 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23429 then do the calcuations in a SImode register. */
23430 if (orig_before)
23431 rs6000_finish_atomic_subword (orig_before, before, shift);
23432 if (orig_after)
23433 rs6000_finish_atomic_subword (orig_after, after, shift);
23434 }
23435 else if (store_mode != mode)
23436 {
23437 /* QImode/HImode on machines with lbarx/lharx where we do the native
23438 operation and then do the calcuations in a SImode register. */
23439 if (orig_before)
23440 convert_move (orig_before, before, 1);
23441 if (orig_after)
23442 convert_move (orig_after, after, 1);
23443 }
23444 else if (orig_after && after != orig_after)
23445 emit_move_insn (orig_after, after);
23446 }
23447
23448 /* Emit instructions to move SRC to DST. Called by splitters for
23449 multi-register moves. It will emit at most one instruction for
23450 each register that is accessed; that is, it won't emit li/lis pairs
23451 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23452 register. */
23453
23454 void
23455 rs6000_split_multireg_move (rtx dst, rtx src)
23456 {
23457 /* The register number of the first register being moved. */
23458 int reg;
23459 /* The mode that is to be moved. */
23460 machine_mode mode;
23461 /* The mode that the move is being done in, and its size. */
23462 machine_mode reg_mode;
23463 int reg_mode_size;
23464 /* The number of registers that will be moved. */
23465 int nregs;
23466
23467 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23468 mode = GET_MODE (dst);
23469 nregs = hard_regno_nregs (reg, mode);
23470 if (FP_REGNO_P (reg))
23471 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23472 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23473 else if (ALTIVEC_REGNO_P (reg))
23474 reg_mode = V16QImode;
23475 else
23476 reg_mode = word_mode;
23477 reg_mode_size = GET_MODE_SIZE (reg_mode);
23478
23479 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23480
23481 /* TDmode residing in FP registers is special, since the ISA requires that
23482 the lower-numbered word of a register pair is always the most significant
23483 word, even in little-endian mode. This does not match the usual subreg
23484 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23485 the appropriate constituent registers "by hand" in little-endian mode.
23486
23487 Note we do not need to check for destructive overlap here since TDmode
23488 can only reside in even/odd register pairs. */
23489 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23490 {
23491 rtx p_src, p_dst;
23492 int i;
23493
23494 for (i = 0; i < nregs; i++)
23495 {
23496 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23497 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23498 else
23499 p_src = simplify_gen_subreg (reg_mode, src, mode,
23500 i * reg_mode_size);
23501
23502 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23503 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23504 else
23505 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23506 i * reg_mode_size);
23507
23508 emit_insn (gen_rtx_SET (p_dst, p_src));
23509 }
23510
23511 return;
23512 }
23513
23514 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23515 {
23516 /* Move register range backwards, if we might have destructive
23517 overlap. */
23518 int i;
23519 for (i = nregs - 1; i >= 0; i--)
23520 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23521 i * reg_mode_size),
23522 simplify_gen_subreg (reg_mode, src, mode,
23523 i * reg_mode_size)));
23524 }
23525 else
23526 {
23527 int i;
23528 int j = -1;
23529 bool used_update = false;
23530 rtx restore_basereg = NULL_RTX;
23531
23532 if (MEM_P (src) && INT_REGNO_P (reg))
23533 {
23534 rtx breg;
23535
23536 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23537 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23538 {
23539 rtx delta_rtx;
23540 breg = XEXP (XEXP (src, 0), 0);
23541 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23542 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23543 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23544 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23545 src = replace_equiv_address (src, breg);
23546 }
23547 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23548 {
23549 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23550 {
23551 rtx basereg = XEXP (XEXP (src, 0), 0);
23552 if (TARGET_UPDATE)
23553 {
23554 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23555 emit_insn (gen_rtx_SET (ndst,
23556 gen_rtx_MEM (reg_mode,
23557 XEXP (src, 0))));
23558 used_update = true;
23559 }
23560 else
23561 emit_insn (gen_rtx_SET (basereg,
23562 XEXP (XEXP (src, 0), 1)));
23563 src = replace_equiv_address (src, basereg);
23564 }
23565 else
23566 {
23567 rtx basereg = gen_rtx_REG (Pmode, reg);
23568 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23569 src = replace_equiv_address (src, basereg);
23570 }
23571 }
23572
23573 breg = XEXP (src, 0);
23574 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23575 breg = XEXP (breg, 0);
23576
23577 /* If the base register we are using to address memory is
23578 also a destination reg, then change that register last. */
23579 if (REG_P (breg)
23580 && REGNO (breg) >= REGNO (dst)
23581 && REGNO (breg) < REGNO (dst) + nregs)
23582 j = REGNO (breg) - REGNO (dst);
23583 }
23584 else if (MEM_P (dst) && INT_REGNO_P (reg))
23585 {
23586 rtx breg;
23587
23588 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23589 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23590 {
23591 rtx delta_rtx;
23592 breg = XEXP (XEXP (dst, 0), 0);
23593 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23594 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23595 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23596
23597 /* We have to update the breg before doing the store.
23598 Use store with update, if available. */
23599
23600 if (TARGET_UPDATE)
23601 {
23602 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23603 emit_insn (TARGET_32BIT
23604 ? (TARGET_POWERPC64
23605 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23606 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
23607 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23608 used_update = true;
23609 }
23610 else
23611 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23612 dst = replace_equiv_address (dst, breg);
23613 }
23614 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23615 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23616 {
23617 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23618 {
23619 rtx basereg = XEXP (XEXP (dst, 0), 0);
23620 if (TARGET_UPDATE)
23621 {
23622 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23623 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23624 XEXP (dst, 0)),
23625 nsrc));
23626 used_update = true;
23627 }
23628 else
23629 emit_insn (gen_rtx_SET (basereg,
23630 XEXP (XEXP (dst, 0), 1)));
23631 dst = replace_equiv_address (dst, basereg);
23632 }
23633 else
23634 {
23635 rtx basereg = XEXP (XEXP (dst, 0), 0);
23636 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23637 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23638 && REG_P (basereg)
23639 && REG_P (offsetreg)
23640 && REGNO (basereg) != REGNO (offsetreg));
23641 if (REGNO (basereg) == 0)
23642 {
23643 rtx tmp = offsetreg;
23644 offsetreg = basereg;
23645 basereg = tmp;
23646 }
23647 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23648 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23649 dst = replace_equiv_address (dst, basereg);
23650 }
23651 }
23652 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23653 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23654 }
23655
23656 for (i = 0; i < nregs; i++)
23657 {
23658 /* Calculate index to next subword. */
23659 ++j;
23660 if (j == nregs)
23661 j = 0;
23662
23663 /* If compiler already emitted move of first word by
23664 store with update, no need to do anything. */
23665 if (j == 0 && used_update)
23666 continue;
23667
23668 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23669 j * reg_mode_size),
23670 simplify_gen_subreg (reg_mode, src, mode,
23671 j * reg_mode_size)));
23672 }
23673 if (restore_basereg != NULL_RTX)
23674 emit_insn (restore_basereg);
23675 }
23676 }
23677
23678 static GTY(()) alias_set_type set = -1;
23679
23680 alias_set_type
23681 get_TOC_alias_set (void)
23682 {
23683 if (set == -1)
23684 set = new_alias_set ();
23685 return set;
23686 }
23687
23688 /* Return the internal arg pointer used for function incoming
23689 arguments. When -fsplit-stack, the arg pointer is r12 so we need
23690 to copy it to a pseudo in order for it to be preserved over calls
23691 and suchlike. We'd really like to use a pseudo here for the
23692 internal arg pointer but data-flow analysis is not prepared to
23693 accept pseudos as live at the beginning of a function. */
23694
23695 static rtx
23696 rs6000_internal_arg_pointer (void)
23697 {
23698 if (flag_split_stack
23699 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
23700 == NULL))
23701
23702 {
23703 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
23704 {
23705 rtx pat;
23706
23707 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
23708 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
23709
23710 /* Put the pseudo initialization right after the note at the
23711 beginning of the function. */
23712 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
23713 gen_rtx_REG (Pmode, 12));
23714 push_topmost_sequence ();
23715 emit_insn_after (pat, get_insns ());
23716 pop_topmost_sequence ();
23717 }
23718 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
23719 FIRST_PARM_OFFSET (current_function_decl));
23720 return copy_to_reg (ret);
23721 }
23722 return virtual_incoming_args_rtx;
23723 }
23724
23725 /* We may have to tell the dataflow pass that the split stack prologue
23726 is initializing a register. */
23727
23728 static void
23729 rs6000_live_on_entry (bitmap regs)
23730 {
23731 if (flag_split_stack)
23732 bitmap_set_bit (regs, 12);
23733 }
23734
23735 \f
23736 /* A C compound statement that outputs the assembler code for a thunk
23737 function, used to implement C++ virtual function calls with
23738 multiple inheritance. The thunk acts as a wrapper around a virtual
23739 function, adjusting the implicit object parameter before handing
23740 control off to the real function.
23741
23742 First, emit code to add the integer DELTA to the location that
23743 contains the incoming first argument. Assume that this argument
23744 contains a pointer, and is the one used to pass the `this' pointer
23745 in C++. This is the incoming argument *before* the function
23746 prologue, e.g. `%o0' on a sparc. The addition must preserve the
23747 values of all other incoming arguments.
23748
23749 After the addition, emit code to jump to FUNCTION, which is a
23750 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
23751 not touch the return address. Hence returning from FUNCTION will
23752 return to whoever called the current `thunk'.
23753
23754 The effect must be as if FUNCTION had been called directly with the
23755 adjusted first argument. This macro is responsible for emitting
23756 all of the code for a thunk function; output_function_prologue()
23757 and output_function_epilogue() are not invoked.
23758
23759 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
23760 been extracted from it.) It might possibly be useful on some
23761 targets, but probably not.
23762
23763 If you do not define this macro, the target-independent code in the
23764 C++ frontend will generate a less efficient heavyweight thunk that
23765 calls FUNCTION instead of jumping to it. The generic approach does
23766 not support varargs. */
23767
23768 static void
23769 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
23770 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
23771 tree function)
23772 {
23773 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
23774 rtx this_rtx, funexp;
23775 rtx_insn *insn;
23776
23777 reload_completed = 1;
23778 epilogue_completed = 1;
23779
23780 /* Mark the end of the (empty) prologue. */
23781 emit_note (NOTE_INSN_PROLOGUE_END);
23782
23783 /* Find the "this" pointer. If the function returns a structure,
23784 the structure return pointer is in r3. */
23785 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
23786 this_rtx = gen_rtx_REG (Pmode, 4);
23787 else
23788 this_rtx = gen_rtx_REG (Pmode, 3);
23789
23790 /* Apply the constant offset, if required. */
23791 if (delta)
23792 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
23793
23794 /* Apply the offset from the vtable, if required. */
23795 if (vcall_offset)
23796 {
23797 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
23798 rtx tmp = gen_rtx_REG (Pmode, 12);
23799
23800 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
23801 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
23802 {
23803 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
23804 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
23805 }
23806 else
23807 {
23808 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
23809
23810 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
23811 }
23812 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
23813 }
23814
23815 /* Generate a tail call to the target function. */
23816 if (!TREE_USED (function))
23817 {
23818 assemble_external (function);
23819 TREE_USED (function) = 1;
23820 }
23821 funexp = XEXP (DECL_RTL (function), 0);
23822 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
23823
23824 #if TARGET_MACHO
23825 if (MACHOPIC_INDIRECT)
23826 funexp = machopic_indirect_call_target (funexp);
23827 #endif
23828
23829 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
23830 generate sibcall RTL explicitly. */
23831 insn = emit_call_insn (
23832 gen_rtx_PARALLEL (VOIDmode,
23833 gen_rtvec (3,
23834 gen_rtx_CALL (VOIDmode,
23835 funexp, const0_rtx),
23836 gen_rtx_USE (VOIDmode, const0_rtx),
23837 simple_return_rtx)));
23838 SIBLING_CALL_P (insn) = 1;
23839 emit_barrier ();
23840
23841 /* Run just enough of rest_of_compilation to get the insns emitted.
23842 There's not really enough bulk here to make other passes such as
23843 instruction scheduling worth while. */
23844 insn = get_insns ();
23845 shorten_branches (insn);
23846 assemble_start_function (thunk_fndecl, fnname);
23847 final_start_function (insn, file, 1);
23848 final (insn, file, 1);
23849 final_end_function ();
23850 assemble_end_function (thunk_fndecl, fnname);
23851
23852 reload_completed = 0;
23853 epilogue_completed = 0;
23854 }
23855 \f
23856 /* A quick summary of the various types of 'constant-pool tables'
23857 under PowerPC:
23858
23859 Target Flags Name One table per
23860 AIX (none) AIX TOC object file
23861 AIX -mfull-toc AIX TOC object file
23862 AIX -mminimal-toc AIX minimal TOC translation unit
23863 SVR4/EABI (none) SVR4 SDATA object file
23864 SVR4/EABI -fpic SVR4 pic object file
23865 SVR4/EABI -fPIC SVR4 PIC translation unit
23866 SVR4/EABI -mrelocatable EABI TOC function
23867 SVR4/EABI -maix AIX TOC object file
23868 SVR4/EABI -maix -mminimal-toc
23869 AIX minimal TOC translation unit
23870
23871 Name Reg. Set by entries contains:
23872 made by addrs? fp? sum?
23873
23874 AIX TOC 2 crt0 as Y option option
23875 AIX minimal TOC 30 prolog gcc Y Y option
23876 SVR4 SDATA 13 crt0 gcc N Y N
23877 SVR4 pic 30 prolog ld Y not yet N
23878 SVR4 PIC 30 prolog gcc Y option option
23879 EABI TOC 30 prolog gcc Y option option
23880
23881 */
23882
23883 /* Hash functions for the hash table. */
23884
23885 static unsigned
23886 rs6000_hash_constant (rtx k)
23887 {
23888 enum rtx_code code = GET_CODE (k);
23889 machine_mode mode = GET_MODE (k);
23890 unsigned result = (code << 3) ^ mode;
23891 const char *format;
23892 int flen, fidx;
23893
23894 format = GET_RTX_FORMAT (code);
23895 flen = strlen (format);
23896 fidx = 0;
23897
23898 switch (code)
23899 {
23900 case LABEL_REF:
23901 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
23902
23903 case CONST_WIDE_INT:
23904 {
23905 int i;
23906 flen = CONST_WIDE_INT_NUNITS (k);
23907 for (i = 0; i < flen; i++)
23908 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
23909 return result;
23910 }
23911
23912 case CONST_DOUBLE:
23913 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
23914
23915 case CODE_LABEL:
23916 fidx = 3;
23917 break;
23918
23919 default:
23920 break;
23921 }
23922
23923 for (; fidx < flen; fidx++)
23924 switch (format[fidx])
23925 {
23926 case 's':
23927 {
23928 unsigned i, len;
23929 const char *str = XSTR (k, fidx);
23930 len = strlen (str);
23931 result = result * 613 + len;
23932 for (i = 0; i < len; i++)
23933 result = result * 613 + (unsigned) str[i];
23934 break;
23935 }
23936 case 'u':
23937 case 'e':
23938 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
23939 break;
23940 case 'i':
23941 case 'n':
23942 result = result * 613 + (unsigned) XINT (k, fidx);
23943 break;
23944 case 'w':
23945 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
23946 result = result * 613 + (unsigned) XWINT (k, fidx);
23947 else
23948 {
23949 size_t i;
23950 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
23951 result = result * 613 + (unsigned) (XWINT (k, fidx)
23952 >> CHAR_BIT * i);
23953 }
23954 break;
23955 case '0':
23956 break;
23957 default:
23958 gcc_unreachable ();
23959 }
23960
23961 return result;
23962 }
23963
23964 hashval_t
23965 toc_hasher::hash (toc_hash_struct *thc)
23966 {
23967 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
23968 }
23969
23970 /* Compare H1 and H2 for equivalence. */
23971
23972 bool
23973 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
23974 {
23975 rtx r1 = h1->key;
23976 rtx r2 = h2->key;
23977
23978 if (h1->key_mode != h2->key_mode)
23979 return 0;
23980
23981 return rtx_equal_p (r1, r2);
23982 }
23983
23984 /* These are the names given by the C++ front-end to vtables, and
23985 vtable-like objects. Ideally, this logic should not be here;
23986 instead, there should be some programmatic way of inquiring as
23987 to whether or not an object is a vtable. */
23988
23989 #define VTABLE_NAME_P(NAME) \
23990 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
23991 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
23992 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
23993 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
23994 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
23995
23996 #ifdef NO_DOLLAR_IN_LABEL
23997 /* Return a GGC-allocated character string translating dollar signs in
23998 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
23999
24000 const char *
24001 rs6000_xcoff_strip_dollar (const char *name)
24002 {
24003 char *strip, *p;
24004 const char *q;
24005 size_t len;
24006
24007 q = (const char *) strchr (name, '$');
24008
24009 if (q == 0 || q == name)
24010 return name;
24011
24012 len = strlen (name);
24013 strip = XALLOCAVEC (char, len + 1);
24014 strcpy (strip, name);
24015 p = strip + (q - name);
24016 while (p)
24017 {
24018 *p = '_';
24019 p = strchr (p + 1, '$');
24020 }
24021
24022 return ggc_alloc_string (strip, len);
24023 }
24024 #endif
24025
24026 void
24027 rs6000_output_symbol_ref (FILE *file, rtx x)
24028 {
24029 const char *name = XSTR (x, 0);
24030
24031 /* Currently C++ toc references to vtables can be emitted before it
24032 is decided whether the vtable is public or private. If this is
24033 the case, then the linker will eventually complain that there is
24034 a reference to an unknown section. Thus, for vtables only,
24035 we emit the TOC reference to reference the identifier and not the
24036 symbol. */
24037 if (VTABLE_NAME_P (name))
24038 {
24039 RS6000_OUTPUT_BASENAME (file, name);
24040 }
24041 else
24042 assemble_name (file, name);
24043 }
24044
24045 /* Output a TOC entry. We derive the entry name from what is being
24046 written. */
24047
24048 void
24049 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
24050 {
24051 char buf[256];
24052 const char *name = buf;
24053 rtx base = x;
24054 HOST_WIDE_INT offset = 0;
24055
24056 gcc_assert (!TARGET_NO_TOC);
24057
24058 /* When the linker won't eliminate them, don't output duplicate
24059 TOC entries (this happens on AIX if there is any kind of TOC,
24060 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
24061 CODE_LABELs. */
24062 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
24063 {
24064 struct toc_hash_struct *h;
24065
24066 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
24067 time because GGC is not initialized at that point. */
24068 if (toc_hash_table == NULL)
24069 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
24070
24071 h = ggc_alloc<toc_hash_struct> ();
24072 h->key = x;
24073 h->key_mode = mode;
24074 h->labelno = labelno;
24075
24076 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
24077 if (*found == NULL)
24078 *found = h;
24079 else /* This is indeed a duplicate.
24080 Set this label equal to that label. */
24081 {
24082 fputs ("\t.set ", file);
24083 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24084 fprintf (file, "%d,", labelno);
24085 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24086 fprintf (file, "%d\n", ((*found)->labelno));
24087
24088 #ifdef HAVE_AS_TLS
24089 if (TARGET_XCOFF && SYMBOL_REF_P (x)
24090 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
24091 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
24092 {
24093 fputs ("\t.set ", file);
24094 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24095 fprintf (file, "%d,", labelno);
24096 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24097 fprintf (file, "%d\n", ((*found)->labelno));
24098 }
24099 #endif
24100 return;
24101 }
24102 }
24103
24104 /* If we're going to put a double constant in the TOC, make sure it's
24105 aligned properly when strict alignment is on. */
24106 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
24107 && STRICT_ALIGNMENT
24108 && GET_MODE_BITSIZE (mode) >= 64
24109 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
24110 ASM_OUTPUT_ALIGN (file, 3);
24111 }
24112
24113 (*targetm.asm_out.internal_label) (file, "LC", labelno);
24114
24115 /* Handle FP constants specially. Note that if we have a minimal
24116 TOC, things we put here aren't actually in the TOC, so we can allow
24117 FP constants. */
24118 if (CONST_DOUBLE_P (x)
24119 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
24120 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
24121 {
24122 long k[4];
24123
24124 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24125 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
24126 else
24127 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24128
24129 if (TARGET_64BIT)
24130 {
24131 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24132 fputs (DOUBLE_INT_ASM_OP, file);
24133 else
24134 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24135 k[0] & 0xffffffff, k[1] & 0xffffffff,
24136 k[2] & 0xffffffff, k[3] & 0xffffffff);
24137 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
24138 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24139 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
24140 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
24141 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
24142 return;
24143 }
24144 else
24145 {
24146 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24147 fputs ("\t.long ", file);
24148 else
24149 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24150 k[0] & 0xffffffff, k[1] & 0xffffffff,
24151 k[2] & 0xffffffff, k[3] & 0xffffffff);
24152 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
24153 k[0] & 0xffffffff, k[1] & 0xffffffff,
24154 k[2] & 0xffffffff, k[3] & 0xffffffff);
24155 return;
24156 }
24157 }
24158 else if (CONST_DOUBLE_P (x)
24159 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
24160 {
24161 long k[2];
24162
24163 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24164 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
24165 else
24166 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
24167
24168 if (TARGET_64BIT)
24169 {
24170 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24171 fputs (DOUBLE_INT_ASM_OP, file);
24172 else
24173 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24174 k[0] & 0xffffffff, k[1] & 0xffffffff);
24175 fprintf (file, "0x%lx%08lx\n",
24176 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24177 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
24178 return;
24179 }
24180 else
24181 {
24182 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24183 fputs ("\t.long ", file);
24184 else
24185 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24186 k[0] & 0xffffffff, k[1] & 0xffffffff);
24187 fprintf (file, "0x%lx,0x%lx\n",
24188 k[0] & 0xffffffff, k[1] & 0xffffffff);
24189 return;
24190 }
24191 }
24192 else if (CONST_DOUBLE_P (x)
24193 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
24194 {
24195 long l;
24196
24197 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24198 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
24199 else
24200 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
24201
24202 if (TARGET_64BIT)
24203 {
24204 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24205 fputs (DOUBLE_INT_ASM_OP, file);
24206 else
24207 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24208 if (WORDS_BIG_ENDIAN)
24209 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
24210 else
24211 fprintf (file, "0x%lx\n", l & 0xffffffff);
24212 return;
24213 }
24214 else
24215 {
24216 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24217 fputs ("\t.long ", file);
24218 else
24219 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24220 fprintf (file, "0x%lx\n", l & 0xffffffff);
24221 return;
24222 }
24223 }
24224 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
24225 {
24226 unsigned HOST_WIDE_INT low;
24227 HOST_WIDE_INT high;
24228
24229 low = INTVAL (x) & 0xffffffff;
24230 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
24231
24232 /* TOC entries are always Pmode-sized, so when big-endian
24233 smaller integer constants in the TOC need to be padded.
24234 (This is still a win over putting the constants in
24235 a separate constant pool, because then we'd have
24236 to have both a TOC entry _and_ the actual constant.)
24237
24238 For a 32-bit target, CONST_INT values are loaded and shifted
24239 entirely within `low' and can be stored in one TOC entry. */
24240
24241 /* It would be easy to make this work, but it doesn't now. */
24242 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
24243
24244 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
24245 {
24246 low |= high << 32;
24247 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
24248 high = (HOST_WIDE_INT) low >> 32;
24249 low &= 0xffffffff;
24250 }
24251
24252 if (TARGET_64BIT)
24253 {
24254 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24255 fputs (DOUBLE_INT_ASM_OP, file);
24256 else
24257 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24258 (long) high & 0xffffffff, (long) low & 0xffffffff);
24259 fprintf (file, "0x%lx%08lx\n",
24260 (long) high & 0xffffffff, (long) low & 0xffffffff);
24261 return;
24262 }
24263 else
24264 {
24265 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
24266 {
24267 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24268 fputs ("\t.long ", file);
24269 else
24270 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24271 (long) high & 0xffffffff, (long) low & 0xffffffff);
24272 fprintf (file, "0x%lx,0x%lx\n",
24273 (long) high & 0xffffffff, (long) low & 0xffffffff);
24274 }
24275 else
24276 {
24277 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24278 fputs ("\t.long ", file);
24279 else
24280 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
24281 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
24282 }
24283 return;
24284 }
24285 }
24286
24287 if (GET_CODE (x) == CONST)
24288 {
24289 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
24290 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
24291
24292 base = XEXP (XEXP (x, 0), 0);
24293 offset = INTVAL (XEXP (XEXP (x, 0), 1));
24294 }
24295
24296 switch (GET_CODE (base))
24297 {
24298 case SYMBOL_REF:
24299 name = XSTR (base, 0);
24300 break;
24301
24302 case LABEL_REF:
24303 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
24304 CODE_LABEL_NUMBER (XEXP (base, 0)));
24305 break;
24306
24307 case CODE_LABEL:
24308 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
24309 break;
24310
24311 default:
24312 gcc_unreachable ();
24313 }
24314
24315 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24316 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
24317 else
24318 {
24319 fputs ("\t.tc ", file);
24320 RS6000_OUTPUT_BASENAME (file, name);
24321
24322 if (offset < 0)
24323 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
24324 else if (offset)
24325 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
24326
24327 /* Mark large TOC symbols on AIX with [TE] so they are mapped
24328 after other TOC symbols, reducing overflow of small TOC access
24329 to [TC] symbols. */
24330 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
24331 ? "[TE]," : "[TC],", file);
24332 }
24333
24334 /* Currently C++ toc references to vtables can be emitted before it
24335 is decided whether the vtable is public or private. If this is
24336 the case, then the linker will eventually complain that there is
24337 a TOC reference to an unknown section. Thus, for vtables only,
24338 we emit the TOC reference to reference the symbol and not the
24339 section. */
24340 if (VTABLE_NAME_P (name))
24341 {
24342 RS6000_OUTPUT_BASENAME (file, name);
24343 if (offset < 0)
24344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
24345 else if (offset > 0)
24346 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
24347 }
24348 else
24349 output_addr_const (file, x);
24350
24351 #if HAVE_AS_TLS
24352 if (TARGET_XCOFF && SYMBOL_REF_P (base))
24353 {
24354 switch (SYMBOL_REF_TLS_MODEL (base))
24355 {
24356 case 0:
24357 break;
24358 case TLS_MODEL_LOCAL_EXEC:
24359 fputs ("@le", file);
24360 break;
24361 case TLS_MODEL_INITIAL_EXEC:
24362 fputs ("@ie", file);
24363 break;
24364 /* Use global-dynamic for local-dynamic. */
24365 case TLS_MODEL_GLOBAL_DYNAMIC:
24366 case TLS_MODEL_LOCAL_DYNAMIC:
24367 putc ('\n', file);
24368 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
24369 fputs ("\t.tc .", file);
24370 RS6000_OUTPUT_BASENAME (file, name);
24371 fputs ("[TC],", file);
24372 output_addr_const (file, x);
24373 fputs ("@m", file);
24374 break;
24375 default:
24376 gcc_unreachable ();
24377 }
24378 }
24379 #endif
24380
24381 putc ('\n', file);
24382 }
24383 \f
24384 /* Output an assembler pseudo-op to write an ASCII string of N characters
24385 starting at P to FILE.
24386
24387 On the RS/6000, we have to do this using the .byte operation and
24388 write out special characters outside the quoted string.
24389 Also, the assembler is broken; very long strings are truncated,
24390 so we must artificially break them up early. */
24391
24392 void
24393 output_ascii (FILE *file, const char *p, int n)
24394 {
24395 char c;
24396 int i, count_string;
24397 const char *for_string = "\t.byte \"";
24398 const char *for_decimal = "\t.byte ";
24399 const char *to_close = NULL;
24400
24401 count_string = 0;
24402 for (i = 0; i < n; i++)
24403 {
24404 c = *p++;
24405 if (c >= ' ' && c < 0177)
24406 {
24407 if (for_string)
24408 fputs (for_string, file);
24409 putc (c, file);
24410
24411 /* Write two quotes to get one. */
24412 if (c == '"')
24413 {
24414 putc (c, file);
24415 ++count_string;
24416 }
24417
24418 for_string = NULL;
24419 for_decimal = "\"\n\t.byte ";
24420 to_close = "\"\n";
24421 ++count_string;
24422
24423 if (count_string >= 512)
24424 {
24425 fputs (to_close, file);
24426
24427 for_string = "\t.byte \"";
24428 for_decimal = "\t.byte ";
24429 to_close = NULL;
24430 count_string = 0;
24431 }
24432 }
24433 else
24434 {
24435 if (for_decimal)
24436 fputs (for_decimal, file);
24437 fprintf (file, "%d", c);
24438
24439 for_string = "\n\t.byte \"";
24440 for_decimal = ", ";
24441 to_close = "\n";
24442 count_string = 0;
24443 }
24444 }
24445
24446 /* Now close the string if we have written one. Then end the line. */
24447 if (to_close)
24448 fputs (to_close, file);
24449 }
24450 \f
24451 /* Generate a unique section name for FILENAME for a section type
24452 represented by SECTION_DESC. Output goes into BUF.
24453
24454 SECTION_DESC can be any string, as long as it is different for each
24455 possible section type.
24456
24457 We name the section in the same manner as xlc. The name begins with an
24458 underscore followed by the filename (after stripping any leading directory
24459 names) with the last period replaced by the string SECTION_DESC. If
24460 FILENAME does not contain a period, SECTION_DESC is appended to the end of
24461 the name. */
24462
24463 void
24464 rs6000_gen_section_name (char **buf, const char *filename,
24465 const char *section_desc)
24466 {
24467 const char *q, *after_last_slash, *last_period = 0;
24468 char *p;
24469 int len;
24470
24471 after_last_slash = filename;
24472 for (q = filename; *q; q++)
24473 {
24474 if (*q == '/')
24475 after_last_slash = q + 1;
24476 else if (*q == '.')
24477 last_period = q;
24478 }
24479
24480 len = strlen (after_last_slash) + strlen (section_desc) + 2;
24481 *buf = (char *) xmalloc (len);
24482
24483 p = *buf;
24484 *p++ = '_';
24485
24486 for (q = after_last_slash; *q; q++)
24487 {
24488 if (q == last_period)
24489 {
24490 strcpy (p, section_desc);
24491 p += strlen (section_desc);
24492 break;
24493 }
24494
24495 else if (ISALNUM (*q))
24496 *p++ = *q;
24497 }
24498
24499 if (last_period == 0)
24500 strcpy (p, section_desc);
24501 else
24502 *p = '\0';
24503 }
24504 \f
24505 /* Emit profile function. */
24506
24507 void
24508 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
24509 {
24510 /* Non-standard profiling for kernels, which just saves LR then calls
24511 _mcount without worrying about arg saves. The idea is to change
24512 the function prologue as little as possible as it isn't easy to
24513 account for arg save/restore code added just for _mcount. */
24514 if (TARGET_PROFILE_KERNEL)
24515 return;
24516
24517 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24518 {
24519 #ifndef NO_PROFILE_COUNTERS
24520 # define NO_PROFILE_COUNTERS 0
24521 #endif
24522 if (NO_PROFILE_COUNTERS)
24523 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24524 LCT_NORMAL, VOIDmode);
24525 else
24526 {
24527 char buf[30];
24528 const char *label_name;
24529 rtx fun;
24530
24531 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24532 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
24533 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
24534
24535 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24536 LCT_NORMAL, VOIDmode, fun, Pmode);
24537 }
24538 }
24539 else if (DEFAULT_ABI == ABI_DARWIN)
24540 {
24541 const char *mcount_name = RS6000_MCOUNT;
24542 int caller_addr_regno = LR_REGNO;
24543
24544 /* Be conservative and always set this, at least for now. */
24545 crtl->uses_pic_offset_table = 1;
24546
24547 #if TARGET_MACHO
24548 /* For PIC code, set up a stub and collect the caller's address
24549 from r0, which is where the prologue puts it. */
24550 if (MACHOPIC_INDIRECT
24551 && crtl->uses_pic_offset_table)
24552 caller_addr_regno = 0;
24553 #endif
24554 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
24555 LCT_NORMAL, VOIDmode,
24556 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
24557 }
24558 }
24559
24560 /* Write function profiler code. */
24561
24562 void
24563 output_function_profiler (FILE *file, int labelno)
24564 {
24565 char buf[100];
24566
24567 switch (DEFAULT_ABI)
24568 {
24569 default:
24570 gcc_unreachable ();
24571
24572 case ABI_V4:
24573 if (!TARGET_32BIT)
24574 {
24575 warning (0, "no profiling of 64-bit code for this ABI");
24576 return;
24577 }
24578 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24579 fprintf (file, "\tmflr %s\n", reg_names[0]);
24580 if (NO_PROFILE_COUNTERS)
24581 {
24582 asm_fprintf (file, "\tstw %s,4(%s)\n",
24583 reg_names[0], reg_names[1]);
24584 }
24585 else if (TARGET_SECURE_PLT && flag_pic)
24586 {
24587 if (TARGET_LINK_STACK)
24588 {
24589 char name[32];
24590 get_ppc476_thunk_name (name);
24591 asm_fprintf (file, "\tbl %s\n", name);
24592 }
24593 else
24594 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
24595 asm_fprintf (file, "\tstw %s,4(%s)\n",
24596 reg_names[0], reg_names[1]);
24597 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24598 asm_fprintf (file, "\taddis %s,%s,",
24599 reg_names[12], reg_names[12]);
24600 assemble_name (file, buf);
24601 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
24602 assemble_name (file, buf);
24603 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
24604 }
24605 else if (flag_pic == 1)
24606 {
24607 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
24608 asm_fprintf (file, "\tstw %s,4(%s)\n",
24609 reg_names[0], reg_names[1]);
24610 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24611 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
24612 assemble_name (file, buf);
24613 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
24614 }
24615 else if (flag_pic > 1)
24616 {
24617 asm_fprintf (file, "\tstw %s,4(%s)\n",
24618 reg_names[0], reg_names[1]);
24619 /* Now, we need to get the address of the label. */
24620 if (TARGET_LINK_STACK)
24621 {
24622 char name[32];
24623 get_ppc476_thunk_name (name);
24624 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
24625 assemble_name (file, buf);
24626 fputs ("-.\n1:", file);
24627 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24628 asm_fprintf (file, "\taddi %s,%s,4\n",
24629 reg_names[11], reg_names[11]);
24630 }
24631 else
24632 {
24633 fputs ("\tbcl 20,31,1f\n\t.long ", file);
24634 assemble_name (file, buf);
24635 fputs ("-.\n1:", file);
24636 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24637 }
24638 asm_fprintf (file, "\tlwz %s,0(%s)\n",
24639 reg_names[0], reg_names[11]);
24640 asm_fprintf (file, "\tadd %s,%s,%s\n",
24641 reg_names[0], reg_names[0], reg_names[11]);
24642 }
24643 else
24644 {
24645 asm_fprintf (file, "\tlis %s,", reg_names[12]);
24646 assemble_name (file, buf);
24647 fputs ("@ha\n", file);
24648 asm_fprintf (file, "\tstw %s,4(%s)\n",
24649 reg_names[0], reg_names[1]);
24650 asm_fprintf (file, "\tla %s,", reg_names[0]);
24651 assemble_name (file, buf);
24652 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
24653 }
24654
24655 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
24656 fprintf (file, "\tbl %s%s\n",
24657 RS6000_MCOUNT, flag_pic ? "@plt" : "");
24658 break;
24659
24660 case ABI_AIX:
24661 case ABI_ELFv2:
24662 case ABI_DARWIN:
24663 /* Don't do anything, done in output_profile_hook (). */
24664 break;
24665 }
24666 }
24667
24668 \f
24669
24670 /* The following variable value is the last issued insn. */
24671
24672 static rtx_insn *last_scheduled_insn;
24673
24674 /* The following variable helps to balance issuing of load and
24675 store instructions */
24676
24677 static int load_store_pendulum;
24678
24679 /* The following variable helps pair divide insns during scheduling. */
24680 static int divide_cnt;
24681 /* The following variable helps pair and alternate vector and vector load
24682 insns during scheduling. */
24683 static int vec_pairing;
24684
24685
24686 /* Power4 load update and store update instructions are cracked into a
24687 load or store and an integer insn which are executed in the same cycle.
24688 Branches have their own dispatch slot which does not count against the
24689 GCC issue rate, but it changes the program flow so there are no other
24690 instructions to issue in this cycle. */
24691
24692 static int
24693 rs6000_variable_issue_1 (rtx_insn *insn, int more)
24694 {
24695 last_scheduled_insn = insn;
24696 if (GET_CODE (PATTERN (insn)) == USE
24697 || GET_CODE (PATTERN (insn)) == CLOBBER)
24698 {
24699 cached_can_issue_more = more;
24700 return cached_can_issue_more;
24701 }
24702
24703 if (insn_terminates_group_p (insn, current_group))
24704 {
24705 cached_can_issue_more = 0;
24706 return cached_can_issue_more;
24707 }
24708
24709 /* If no reservation, but reach here */
24710 if (recog_memoized (insn) < 0)
24711 return more;
24712
24713 if (rs6000_sched_groups)
24714 {
24715 if (is_microcoded_insn (insn))
24716 cached_can_issue_more = 0;
24717 else if (is_cracked_insn (insn))
24718 cached_can_issue_more = more > 2 ? more - 2 : 0;
24719 else
24720 cached_can_issue_more = more - 1;
24721
24722 return cached_can_issue_more;
24723 }
24724
24725 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
24726 return 0;
24727
24728 cached_can_issue_more = more - 1;
24729 return cached_can_issue_more;
24730 }
24731
24732 static int
24733 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
24734 {
24735 int r = rs6000_variable_issue_1 (insn, more);
24736 if (verbose)
24737 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
24738 return r;
24739 }
24740
24741 /* Adjust the cost of a scheduling dependency. Return the new cost of
24742 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
24743
24744 static int
24745 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
24746 unsigned int)
24747 {
24748 enum attr_type attr_type;
24749
24750 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
24751 return cost;
24752
24753 switch (dep_type)
24754 {
24755 case REG_DEP_TRUE:
24756 {
24757 /* Data dependency; DEP_INSN writes a register that INSN reads
24758 some cycles later. */
24759
24760 /* Separate a load from a narrower, dependent store. */
24761 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
24762 || rs6000_tune == PROCESSOR_FUTURE)
24763 && GET_CODE (PATTERN (insn)) == SET
24764 && GET_CODE (PATTERN (dep_insn)) == SET
24765 && MEM_P (XEXP (PATTERN (insn), 1))
24766 && MEM_P (XEXP (PATTERN (dep_insn), 0))
24767 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
24768 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
24769 return cost + 14;
24770
24771 attr_type = get_attr_type (insn);
24772
24773 switch (attr_type)
24774 {
24775 case TYPE_JMPREG:
24776 /* Tell the first scheduling pass about the latency between
24777 a mtctr and bctr (and mtlr and br/blr). The first
24778 scheduling pass will not know about this latency since
24779 the mtctr instruction, which has the latency associated
24780 to it, will be generated by reload. */
24781 return 4;
24782 case TYPE_BRANCH:
24783 /* Leave some extra cycles between a compare and its
24784 dependent branch, to inhibit expensive mispredicts. */
24785 if ((rs6000_tune == PROCESSOR_PPC603
24786 || rs6000_tune == PROCESSOR_PPC604
24787 || rs6000_tune == PROCESSOR_PPC604e
24788 || rs6000_tune == PROCESSOR_PPC620
24789 || rs6000_tune == PROCESSOR_PPC630
24790 || rs6000_tune == PROCESSOR_PPC750
24791 || rs6000_tune == PROCESSOR_PPC7400
24792 || rs6000_tune == PROCESSOR_PPC7450
24793 || rs6000_tune == PROCESSOR_PPCE5500
24794 || rs6000_tune == PROCESSOR_PPCE6500
24795 || rs6000_tune == PROCESSOR_POWER4
24796 || rs6000_tune == PROCESSOR_POWER5
24797 || rs6000_tune == PROCESSOR_POWER7
24798 || rs6000_tune == PROCESSOR_POWER8
24799 || rs6000_tune == PROCESSOR_POWER9
24800 || rs6000_tune == PROCESSOR_FUTURE
24801 || rs6000_tune == PROCESSOR_CELL)
24802 && recog_memoized (dep_insn)
24803 && (INSN_CODE (dep_insn) >= 0))
24804
24805 switch (get_attr_type (dep_insn))
24806 {
24807 case TYPE_CMP:
24808 case TYPE_FPCOMPARE:
24809 case TYPE_CR_LOGICAL:
24810 return cost + 2;
24811 case TYPE_EXTS:
24812 case TYPE_MUL:
24813 if (get_attr_dot (dep_insn) == DOT_YES)
24814 return cost + 2;
24815 else
24816 break;
24817 case TYPE_SHIFT:
24818 if (get_attr_dot (dep_insn) == DOT_YES
24819 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
24820 return cost + 2;
24821 else
24822 break;
24823 default:
24824 break;
24825 }
24826 break;
24827
24828 case TYPE_STORE:
24829 case TYPE_FPSTORE:
24830 if ((rs6000_tune == PROCESSOR_POWER6)
24831 && recog_memoized (dep_insn)
24832 && (INSN_CODE (dep_insn) >= 0))
24833 {
24834
24835 if (GET_CODE (PATTERN (insn)) != SET)
24836 /* If this happens, we have to extend this to schedule
24837 optimally. Return default for now. */
24838 return cost;
24839
24840 /* Adjust the cost for the case where the value written
24841 by a fixed point operation is used as the address
24842 gen value on a store. */
24843 switch (get_attr_type (dep_insn))
24844 {
24845 case TYPE_LOAD:
24846 case TYPE_CNTLZ:
24847 {
24848 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24849 return get_attr_sign_extend (dep_insn)
24850 == SIGN_EXTEND_YES ? 6 : 4;
24851 break;
24852 }
24853 case TYPE_SHIFT:
24854 {
24855 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24856 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24857 6 : 3;
24858 break;
24859 }
24860 case TYPE_INTEGER:
24861 case TYPE_ADD:
24862 case TYPE_LOGICAL:
24863 case TYPE_EXTS:
24864 case TYPE_INSERT:
24865 {
24866 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24867 return 3;
24868 break;
24869 }
24870 case TYPE_STORE:
24871 case TYPE_FPLOAD:
24872 case TYPE_FPSTORE:
24873 {
24874 if (get_attr_update (dep_insn) == UPDATE_YES
24875 && ! rs6000_store_data_bypass_p (dep_insn, insn))
24876 return 3;
24877 break;
24878 }
24879 case TYPE_MUL:
24880 {
24881 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24882 return 17;
24883 break;
24884 }
24885 case TYPE_DIV:
24886 {
24887 if (! rs6000_store_data_bypass_p (dep_insn, insn))
24888 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24889 break;
24890 }
24891 default:
24892 break;
24893 }
24894 }
24895 break;
24896
24897 case TYPE_LOAD:
24898 if ((rs6000_tune == PROCESSOR_POWER6)
24899 && recog_memoized (dep_insn)
24900 && (INSN_CODE (dep_insn) >= 0))
24901 {
24902
24903 /* Adjust the cost for the case where the value written
24904 by a fixed point instruction is used within the address
24905 gen portion of a subsequent load(u)(x) */
24906 switch (get_attr_type (dep_insn))
24907 {
24908 case TYPE_LOAD:
24909 case TYPE_CNTLZ:
24910 {
24911 if (set_to_load_agen (dep_insn, insn))
24912 return get_attr_sign_extend (dep_insn)
24913 == SIGN_EXTEND_YES ? 6 : 4;
24914 break;
24915 }
24916 case TYPE_SHIFT:
24917 {
24918 if (set_to_load_agen (dep_insn, insn))
24919 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
24920 6 : 3;
24921 break;
24922 }
24923 case TYPE_INTEGER:
24924 case TYPE_ADD:
24925 case TYPE_LOGICAL:
24926 case TYPE_EXTS:
24927 case TYPE_INSERT:
24928 {
24929 if (set_to_load_agen (dep_insn, insn))
24930 return 3;
24931 break;
24932 }
24933 case TYPE_STORE:
24934 case TYPE_FPLOAD:
24935 case TYPE_FPSTORE:
24936 {
24937 if (get_attr_update (dep_insn) == UPDATE_YES
24938 && set_to_load_agen (dep_insn, insn))
24939 return 3;
24940 break;
24941 }
24942 case TYPE_MUL:
24943 {
24944 if (set_to_load_agen (dep_insn, insn))
24945 return 17;
24946 break;
24947 }
24948 case TYPE_DIV:
24949 {
24950 if (set_to_load_agen (dep_insn, insn))
24951 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
24952 break;
24953 }
24954 default:
24955 break;
24956 }
24957 }
24958 break;
24959
24960 case TYPE_FPLOAD:
24961 if ((rs6000_tune == PROCESSOR_POWER6)
24962 && get_attr_update (insn) == UPDATE_NO
24963 && recog_memoized (dep_insn)
24964 && (INSN_CODE (dep_insn) >= 0)
24965 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
24966 return 2;
24967
24968 default:
24969 break;
24970 }
24971
24972 /* Fall out to return default cost. */
24973 }
24974 break;
24975
24976 case REG_DEP_OUTPUT:
24977 /* Output dependency; DEP_INSN writes a register that INSN writes some
24978 cycles later. */
24979 if ((rs6000_tune == PROCESSOR_POWER6)
24980 && recog_memoized (dep_insn)
24981 && (INSN_CODE (dep_insn) >= 0))
24982 {
24983 attr_type = get_attr_type (insn);
24984
24985 switch (attr_type)
24986 {
24987 case TYPE_FP:
24988 case TYPE_FPSIMPLE:
24989 if (get_attr_type (dep_insn) == TYPE_FP
24990 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
24991 return 1;
24992 break;
24993 case TYPE_FPLOAD:
24994 if (get_attr_update (insn) == UPDATE_NO
24995 && get_attr_type (dep_insn) == TYPE_MFFGPR)
24996 return 2;
24997 break;
24998 default:
24999 break;
25000 }
25001 }
25002 /* Fall through, no cost for output dependency. */
25003 /* FALLTHRU */
25004
25005 case REG_DEP_ANTI:
25006 /* Anti dependency; DEP_INSN reads a register that INSN writes some
25007 cycles later. */
25008 return 0;
25009
25010 default:
25011 gcc_unreachable ();
25012 }
25013
25014 return cost;
25015 }
25016
25017 /* Debug version of rs6000_adjust_cost. */
25018
25019 static int
25020 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
25021 int cost, unsigned int dw)
25022 {
25023 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
25024
25025 if (ret != cost)
25026 {
25027 const char *dep;
25028
25029 switch (dep_type)
25030 {
25031 default: dep = "unknown depencency"; break;
25032 case REG_DEP_TRUE: dep = "data dependency"; break;
25033 case REG_DEP_OUTPUT: dep = "output dependency"; break;
25034 case REG_DEP_ANTI: dep = "anti depencency"; break;
25035 }
25036
25037 fprintf (stderr,
25038 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
25039 "%s, insn:\n", ret, cost, dep);
25040
25041 debug_rtx (insn);
25042 }
25043
25044 return ret;
25045 }
25046
25047 /* The function returns a true if INSN is microcoded.
25048 Return false otherwise. */
25049
25050 static bool
25051 is_microcoded_insn (rtx_insn *insn)
25052 {
25053 if (!insn || !NONDEBUG_INSN_P (insn)
25054 || GET_CODE (PATTERN (insn)) == USE
25055 || GET_CODE (PATTERN (insn)) == CLOBBER)
25056 return false;
25057
25058 if (rs6000_tune == PROCESSOR_CELL)
25059 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
25060
25061 if (rs6000_sched_groups
25062 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25063 {
25064 enum attr_type type = get_attr_type (insn);
25065 if ((type == TYPE_LOAD
25066 && get_attr_update (insn) == UPDATE_YES
25067 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25068 || ((type == TYPE_LOAD || type == TYPE_STORE)
25069 && get_attr_update (insn) == UPDATE_YES
25070 && get_attr_indexed (insn) == INDEXED_YES)
25071 || type == TYPE_MFCR)
25072 return true;
25073 }
25074
25075 return false;
25076 }
25077
25078 /* The function returns true if INSN is cracked into 2 instructions
25079 by the processor (and therefore occupies 2 issue slots). */
25080
25081 static bool
25082 is_cracked_insn (rtx_insn *insn)
25083 {
25084 if (!insn || !NONDEBUG_INSN_P (insn)
25085 || GET_CODE (PATTERN (insn)) == USE
25086 || GET_CODE (PATTERN (insn)) == CLOBBER)
25087 return false;
25088
25089 if (rs6000_sched_groups
25090 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
25091 {
25092 enum attr_type type = get_attr_type (insn);
25093 if ((type == TYPE_LOAD
25094 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
25095 && get_attr_update (insn) == UPDATE_NO)
25096 || (type == TYPE_LOAD
25097 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
25098 && get_attr_update (insn) == UPDATE_YES
25099 && get_attr_indexed (insn) == INDEXED_NO)
25100 || (type == TYPE_STORE
25101 && get_attr_update (insn) == UPDATE_YES
25102 && get_attr_indexed (insn) == INDEXED_NO)
25103 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
25104 && get_attr_update (insn) == UPDATE_YES)
25105 || (type == TYPE_CR_LOGICAL
25106 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
25107 || (type == TYPE_EXTS
25108 && get_attr_dot (insn) == DOT_YES)
25109 || (type == TYPE_SHIFT
25110 && get_attr_dot (insn) == DOT_YES
25111 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
25112 || (type == TYPE_MUL
25113 && get_attr_dot (insn) == DOT_YES)
25114 || type == TYPE_DIV
25115 || (type == TYPE_INSERT
25116 && get_attr_size (insn) == SIZE_32))
25117 return true;
25118 }
25119
25120 return false;
25121 }
25122
25123 /* The function returns true if INSN can be issued only from
25124 the branch slot. */
25125
25126 static bool
25127 is_branch_slot_insn (rtx_insn *insn)
25128 {
25129 if (!insn || !NONDEBUG_INSN_P (insn)
25130 || GET_CODE (PATTERN (insn)) == USE
25131 || GET_CODE (PATTERN (insn)) == CLOBBER)
25132 return false;
25133
25134 if (rs6000_sched_groups)
25135 {
25136 enum attr_type type = get_attr_type (insn);
25137 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
25138 return true;
25139 return false;
25140 }
25141
25142 return false;
25143 }
25144
25145 /* The function returns true if out_inst sets a value that is
25146 used in the address generation computation of in_insn */
25147 static bool
25148 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
25149 {
25150 rtx out_set, in_set;
25151
25152 /* For performance reasons, only handle the simple case where
25153 both loads are a single_set. */
25154 out_set = single_set (out_insn);
25155 if (out_set)
25156 {
25157 in_set = single_set (in_insn);
25158 if (in_set)
25159 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
25160 }
25161
25162 return false;
25163 }
25164
25165 /* Try to determine base/offset/size parts of the given MEM.
25166 Return true if successful, false if all the values couldn't
25167 be determined.
25168
25169 This function only looks for REG or REG+CONST address forms.
25170 REG+REG address form will return false. */
25171
25172 static bool
25173 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
25174 HOST_WIDE_INT *size)
25175 {
25176 rtx addr_rtx;
25177 if MEM_SIZE_KNOWN_P (mem)
25178 *size = MEM_SIZE (mem);
25179 else
25180 return false;
25181
25182 addr_rtx = (XEXP (mem, 0));
25183 if (GET_CODE (addr_rtx) == PRE_MODIFY)
25184 addr_rtx = XEXP (addr_rtx, 1);
25185
25186 *offset = 0;
25187 while (GET_CODE (addr_rtx) == PLUS
25188 && CONST_INT_P (XEXP (addr_rtx, 1)))
25189 {
25190 *offset += INTVAL (XEXP (addr_rtx, 1));
25191 addr_rtx = XEXP (addr_rtx, 0);
25192 }
25193 if (!REG_P (addr_rtx))
25194 return false;
25195
25196 *base = addr_rtx;
25197 return true;
25198 }
25199
25200 /* The function returns true if the target storage location of
25201 mem1 is adjacent to the target storage location of mem2 */
25202 /* Return 1 if memory locations are adjacent. */
25203
25204 static bool
25205 adjacent_mem_locations (rtx mem1, rtx mem2)
25206 {
25207 rtx reg1, reg2;
25208 HOST_WIDE_INT off1, size1, off2, size2;
25209
25210 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25211 && get_memref_parts (mem2, &reg2, &off2, &size2))
25212 return ((REGNO (reg1) == REGNO (reg2))
25213 && ((off1 + size1 == off2)
25214 || (off2 + size2 == off1)));
25215
25216 return false;
25217 }
25218
25219 /* This function returns true if it can be determined that the two MEM
25220 locations overlap by at least 1 byte based on base reg/offset/size. */
25221
25222 static bool
25223 mem_locations_overlap (rtx mem1, rtx mem2)
25224 {
25225 rtx reg1, reg2;
25226 HOST_WIDE_INT off1, size1, off2, size2;
25227
25228 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25229 && get_memref_parts (mem2, &reg2, &off2, &size2))
25230 return ((REGNO (reg1) == REGNO (reg2))
25231 && (((off1 <= off2) && (off1 + size1 > off2))
25232 || ((off2 <= off1) && (off2 + size2 > off1))));
25233
25234 return false;
25235 }
25236
25237 /* A C statement (sans semicolon) to update the integer scheduling
25238 priority INSN_PRIORITY (INSN). Increase the priority to execute the
25239 INSN earlier, reduce the priority to execute INSN later. Do not
25240 define this macro if you do not need to adjust the scheduling
25241 priorities of insns. */
25242
25243 static int
25244 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
25245 {
25246 rtx load_mem, str_mem;
25247 /* On machines (like the 750) which have asymmetric integer units,
25248 where one integer unit can do multiply and divides and the other
25249 can't, reduce the priority of multiply/divide so it is scheduled
25250 before other integer operations. */
25251
25252 #if 0
25253 if (! INSN_P (insn))
25254 return priority;
25255
25256 if (GET_CODE (PATTERN (insn)) == USE)
25257 return priority;
25258
25259 switch (rs6000_tune) {
25260 case PROCESSOR_PPC750:
25261 switch (get_attr_type (insn))
25262 {
25263 default:
25264 break;
25265
25266 case TYPE_MUL:
25267 case TYPE_DIV:
25268 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
25269 priority, priority);
25270 if (priority >= 0 && priority < 0x01000000)
25271 priority >>= 3;
25272 break;
25273 }
25274 }
25275 #endif
25276
25277 if (insn_must_be_first_in_group (insn)
25278 && reload_completed
25279 && current_sched_info->sched_max_insns_priority
25280 && rs6000_sched_restricted_insns_priority)
25281 {
25282
25283 /* Prioritize insns that can be dispatched only in the first
25284 dispatch slot. */
25285 if (rs6000_sched_restricted_insns_priority == 1)
25286 /* Attach highest priority to insn. This means that in
25287 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
25288 precede 'priority' (critical path) considerations. */
25289 return current_sched_info->sched_max_insns_priority;
25290 else if (rs6000_sched_restricted_insns_priority == 2)
25291 /* Increase priority of insn by a minimal amount. This means that in
25292 haifa-sched.c:ready_sort(), only 'priority' (critical path)
25293 considerations precede dispatch-slot restriction considerations. */
25294 return (priority + 1);
25295 }
25296
25297 if (rs6000_tune == PROCESSOR_POWER6
25298 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
25299 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
25300 /* Attach highest priority to insn if the scheduler has just issued two
25301 stores and this instruction is a load, or two loads and this instruction
25302 is a store. Power6 wants loads and stores scheduled alternately
25303 when possible */
25304 return current_sched_info->sched_max_insns_priority;
25305
25306 return priority;
25307 }
25308
25309 /* Return true if the instruction is nonpipelined on the Cell. */
25310 static bool
25311 is_nonpipeline_insn (rtx_insn *insn)
25312 {
25313 enum attr_type type;
25314 if (!insn || !NONDEBUG_INSN_P (insn)
25315 || GET_CODE (PATTERN (insn)) == USE
25316 || GET_CODE (PATTERN (insn)) == CLOBBER)
25317 return false;
25318
25319 type = get_attr_type (insn);
25320 if (type == TYPE_MUL
25321 || type == TYPE_DIV
25322 || type == TYPE_SDIV
25323 || type == TYPE_DDIV
25324 || type == TYPE_SSQRT
25325 || type == TYPE_DSQRT
25326 || type == TYPE_MFCR
25327 || type == TYPE_MFCRF
25328 || type == TYPE_MFJMPR)
25329 {
25330 return true;
25331 }
25332 return false;
25333 }
25334
25335
25336 /* Return how many instructions the machine can issue per cycle. */
25337
25338 static int
25339 rs6000_issue_rate (void)
25340 {
25341 /* Unless scheduling for register pressure, use issue rate of 1 for
25342 first scheduling pass to decrease degradation. */
25343 if (!reload_completed && !flag_sched_pressure)
25344 return 1;
25345
25346 switch (rs6000_tune) {
25347 case PROCESSOR_RS64A:
25348 case PROCESSOR_PPC601: /* ? */
25349 case PROCESSOR_PPC7450:
25350 return 3;
25351 case PROCESSOR_PPC440:
25352 case PROCESSOR_PPC603:
25353 case PROCESSOR_PPC750:
25354 case PROCESSOR_PPC7400:
25355 case PROCESSOR_PPC8540:
25356 case PROCESSOR_PPC8548:
25357 case PROCESSOR_CELL:
25358 case PROCESSOR_PPCE300C2:
25359 case PROCESSOR_PPCE300C3:
25360 case PROCESSOR_PPCE500MC:
25361 case PROCESSOR_PPCE500MC64:
25362 case PROCESSOR_PPCE5500:
25363 case PROCESSOR_PPCE6500:
25364 case PROCESSOR_TITAN:
25365 return 2;
25366 case PROCESSOR_PPC476:
25367 case PROCESSOR_PPC604:
25368 case PROCESSOR_PPC604e:
25369 case PROCESSOR_PPC620:
25370 case PROCESSOR_PPC630:
25371 return 4;
25372 case PROCESSOR_POWER4:
25373 case PROCESSOR_POWER5:
25374 case PROCESSOR_POWER6:
25375 case PROCESSOR_POWER7:
25376 return 5;
25377 case PROCESSOR_POWER8:
25378 return 7;
25379 case PROCESSOR_POWER9:
25380 case PROCESSOR_FUTURE:
25381 return 6;
25382 default:
25383 return 1;
25384 }
25385 }
25386
25387 /* Return how many instructions to look ahead for better insn
25388 scheduling. */
25389
25390 static int
25391 rs6000_use_sched_lookahead (void)
25392 {
25393 switch (rs6000_tune)
25394 {
25395 case PROCESSOR_PPC8540:
25396 case PROCESSOR_PPC8548:
25397 return 4;
25398
25399 case PROCESSOR_CELL:
25400 return (reload_completed ? 8 : 0);
25401
25402 default:
25403 return 0;
25404 }
25405 }
25406
25407 /* We are choosing insn from the ready queue. Return zero if INSN can be
25408 chosen. */
25409 static int
25410 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
25411 {
25412 if (ready_index == 0)
25413 return 0;
25414
25415 if (rs6000_tune != PROCESSOR_CELL)
25416 return 0;
25417
25418 gcc_assert (insn != NULL_RTX && INSN_P (insn));
25419
25420 if (!reload_completed
25421 || is_nonpipeline_insn (insn)
25422 || is_microcoded_insn (insn))
25423 return 1;
25424
25425 return 0;
25426 }
25427
25428 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
25429 and return true. */
25430
25431 static bool
25432 find_mem_ref (rtx pat, rtx *mem_ref)
25433 {
25434 const char * fmt;
25435 int i, j;
25436
25437 /* stack_tie does not produce any real memory traffic. */
25438 if (tie_operand (pat, VOIDmode))
25439 return false;
25440
25441 if (MEM_P (pat))
25442 {
25443 *mem_ref = pat;
25444 return true;
25445 }
25446
25447 /* Recursively process the pattern. */
25448 fmt = GET_RTX_FORMAT (GET_CODE (pat));
25449
25450 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
25451 {
25452 if (fmt[i] == 'e')
25453 {
25454 if (find_mem_ref (XEXP (pat, i), mem_ref))
25455 return true;
25456 }
25457 else if (fmt[i] == 'E')
25458 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
25459 {
25460 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
25461 return true;
25462 }
25463 }
25464
25465 return false;
25466 }
25467
25468 /* Determine if PAT is a PATTERN of a load insn. */
25469
25470 static bool
25471 is_load_insn1 (rtx pat, rtx *load_mem)
25472 {
25473 if (!pat || pat == NULL_RTX)
25474 return false;
25475
25476 if (GET_CODE (pat) == SET)
25477 return find_mem_ref (SET_SRC (pat), load_mem);
25478
25479 if (GET_CODE (pat) == PARALLEL)
25480 {
25481 int i;
25482
25483 for (i = 0; i < XVECLEN (pat, 0); i++)
25484 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
25485 return true;
25486 }
25487
25488 return false;
25489 }
25490
25491 /* Determine if INSN loads from memory. */
25492
25493 static bool
25494 is_load_insn (rtx insn, rtx *load_mem)
25495 {
25496 if (!insn || !INSN_P (insn))
25497 return false;
25498
25499 if (CALL_P (insn))
25500 return false;
25501
25502 return is_load_insn1 (PATTERN (insn), load_mem);
25503 }
25504
25505 /* Determine if PAT is a PATTERN of a store insn. */
25506
25507 static bool
25508 is_store_insn1 (rtx pat, rtx *str_mem)
25509 {
25510 if (!pat || pat == NULL_RTX)
25511 return false;
25512
25513 if (GET_CODE (pat) == SET)
25514 return find_mem_ref (SET_DEST (pat), str_mem);
25515
25516 if (GET_CODE (pat) == PARALLEL)
25517 {
25518 int i;
25519
25520 for (i = 0; i < XVECLEN (pat, 0); i++)
25521 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
25522 return true;
25523 }
25524
25525 return false;
25526 }
25527
25528 /* Determine if INSN stores to memory. */
25529
25530 static bool
25531 is_store_insn (rtx insn, rtx *str_mem)
25532 {
25533 if (!insn || !INSN_P (insn))
25534 return false;
25535
25536 return is_store_insn1 (PATTERN (insn), str_mem);
25537 }
25538
25539 /* Return whether TYPE is a Power9 pairable vector instruction type. */
25540
25541 static bool
25542 is_power9_pairable_vec_type (enum attr_type type)
25543 {
25544 switch (type)
25545 {
25546 case TYPE_VECSIMPLE:
25547 case TYPE_VECCOMPLEX:
25548 case TYPE_VECDIV:
25549 case TYPE_VECCMP:
25550 case TYPE_VECPERM:
25551 case TYPE_VECFLOAT:
25552 case TYPE_VECFDIV:
25553 case TYPE_VECDOUBLE:
25554 return true;
25555 default:
25556 break;
25557 }
25558 return false;
25559 }
25560
25561 /* Returns whether the dependence between INSN and NEXT is considered
25562 costly by the given target. */
25563
25564 static bool
25565 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
25566 {
25567 rtx insn;
25568 rtx next;
25569 rtx load_mem, str_mem;
25570
25571 /* If the flag is not enabled - no dependence is considered costly;
25572 allow all dependent insns in the same group.
25573 This is the most aggressive option. */
25574 if (rs6000_sched_costly_dep == no_dep_costly)
25575 return false;
25576
25577 /* If the flag is set to 1 - a dependence is always considered costly;
25578 do not allow dependent instructions in the same group.
25579 This is the most conservative option. */
25580 if (rs6000_sched_costly_dep == all_deps_costly)
25581 return true;
25582
25583 insn = DEP_PRO (dep);
25584 next = DEP_CON (dep);
25585
25586 if (rs6000_sched_costly_dep == store_to_load_dep_costly
25587 && is_load_insn (next, &load_mem)
25588 && is_store_insn (insn, &str_mem))
25589 /* Prevent load after store in the same group. */
25590 return true;
25591
25592 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
25593 && is_load_insn (next, &load_mem)
25594 && is_store_insn (insn, &str_mem)
25595 && DEP_TYPE (dep) == REG_DEP_TRUE
25596 && mem_locations_overlap(str_mem, load_mem))
25597 /* Prevent load after store in the same group if it is a true
25598 dependence. */
25599 return true;
25600
25601 /* The flag is set to X; dependences with latency >= X are considered costly,
25602 and will not be scheduled in the same group. */
25603 if (rs6000_sched_costly_dep <= max_dep_latency
25604 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
25605 return true;
25606
25607 return false;
25608 }
25609
25610 /* Return the next insn after INSN that is found before TAIL is reached,
25611 skipping any "non-active" insns - insns that will not actually occupy
25612 an issue slot. Return NULL_RTX if such an insn is not found. */
25613
25614 static rtx_insn *
25615 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
25616 {
25617 if (insn == NULL_RTX || insn == tail)
25618 return NULL;
25619
25620 while (1)
25621 {
25622 insn = NEXT_INSN (insn);
25623 if (insn == NULL_RTX || insn == tail)
25624 return NULL;
25625
25626 if (CALL_P (insn)
25627 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
25628 || (NONJUMP_INSN_P (insn)
25629 && GET_CODE (PATTERN (insn)) != USE
25630 && GET_CODE (PATTERN (insn)) != CLOBBER
25631 && INSN_CODE (insn) != CODE_FOR_stack_tie))
25632 break;
25633 }
25634 return insn;
25635 }
25636
25637 /* Do Power9 specific sched_reorder2 reordering of ready list. */
25638
25639 static int
25640 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
25641 {
25642 int pos;
25643 int i;
25644 rtx_insn *tmp;
25645 enum attr_type type, type2;
25646
25647 type = get_attr_type (last_scheduled_insn);
25648
25649 /* Try to issue fixed point divides back-to-back in pairs so they will be
25650 routed to separate execution units and execute in parallel. */
25651 if (type == TYPE_DIV && divide_cnt == 0)
25652 {
25653 /* First divide has been scheduled. */
25654 divide_cnt = 1;
25655
25656 /* Scan the ready list looking for another divide, if found move it
25657 to the end of the list so it is chosen next. */
25658 pos = lastpos;
25659 while (pos >= 0)
25660 {
25661 if (recog_memoized (ready[pos]) >= 0
25662 && get_attr_type (ready[pos]) == TYPE_DIV)
25663 {
25664 tmp = ready[pos];
25665 for (i = pos; i < lastpos; i++)
25666 ready[i] = ready[i + 1];
25667 ready[lastpos] = tmp;
25668 break;
25669 }
25670 pos--;
25671 }
25672 }
25673 else
25674 {
25675 /* Last insn was the 2nd divide or not a divide, reset the counter. */
25676 divide_cnt = 0;
25677
25678 /* The best dispatch throughput for vector and vector load insns can be
25679 achieved by interleaving a vector and vector load such that they'll
25680 dispatch to the same superslice. If this pairing cannot be achieved
25681 then it is best to pair vector insns together and vector load insns
25682 together.
25683
25684 To aid in this pairing, vec_pairing maintains the current state with
25685 the following values:
25686
25687 0 : Initial state, no vecload/vector pairing has been started.
25688
25689 1 : A vecload or vector insn has been issued and a candidate for
25690 pairing has been found and moved to the end of the ready
25691 list. */
25692 if (type == TYPE_VECLOAD)
25693 {
25694 /* Issued a vecload. */
25695 if (vec_pairing == 0)
25696 {
25697 int vecload_pos = -1;
25698 /* We issued a single vecload, look for a vector insn to pair it
25699 with. If one isn't found, try to pair another vecload. */
25700 pos = lastpos;
25701 while (pos >= 0)
25702 {
25703 if (recog_memoized (ready[pos]) >= 0)
25704 {
25705 type2 = get_attr_type (ready[pos]);
25706 if (is_power9_pairable_vec_type (type2))
25707 {
25708 /* Found a vector insn to pair with, move it to the
25709 end of the ready list so it is scheduled next. */
25710 tmp = ready[pos];
25711 for (i = pos; i < lastpos; i++)
25712 ready[i] = ready[i + 1];
25713 ready[lastpos] = tmp;
25714 vec_pairing = 1;
25715 return cached_can_issue_more;
25716 }
25717 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
25718 /* Remember position of first vecload seen. */
25719 vecload_pos = pos;
25720 }
25721 pos--;
25722 }
25723 if (vecload_pos >= 0)
25724 {
25725 /* Didn't find a vector to pair with but did find a vecload,
25726 move it to the end of the ready list. */
25727 tmp = ready[vecload_pos];
25728 for (i = vecload_pos; i < lastpos; i++)
25729 ready[i] = ready[i + 1];
25730 ready[lastpos] = tmp;
25731 vec_pairing = 1;
25732 return cached_can_issue_more;
25733 }
25734 }
25735 }
25736 else if (is_power9_pairable_vec_type (type))
25737 {
25738 /* Issued a vector operation. */
25739 if (vec_pairing == 0)
25740 {
25741 int vec_pos = -1;
25742 /* We issued a single vector insn, look for a vecload to pair it
25743 with. If one isn't found, try to pair another vector. */
25744 pos = lastpos;
25745 while (pos >= 0)
25746 {
25747 if (recog_memoized (ready[pos]) >= 0)
25748 {
25749 type2 = get_attr_type (ready[pos]);
25750 if (type2 == TYPE_VECLOAD)
25751 {
25752 /* Found a vecload insn to pair with, move it to the
25753 end of the ready list so it is scheduled next. */
25754 tmp = ready[pos];
25755 for (i = pos; i < lastpos; i++)
25756 ready[i] = ready[i + 1];
25757 ready[lastpos] = tmp;
25758 vec_pairing = 1;
25759 return cached_can_issue_more;
25760 }
25761 else if (is_power9_pairable_vec_type (type2)
25762 && vec_pos == -1)
25763 /* Remember position of first vector insn seen. */
25764 vec_pos = pos;
25765 }
25766 pos--;
25767 }
25768 if (vec_pos >= 0)
25769 {
25770 /* Didn't find a vecload to pair with but did find a vector
25771 insn, move it to the end of the ready list. */
25772 tmp = ready[vec_pos];
25773 for (i = vec_pos; i < lastpos; i++)
25774 ready[i] = ready[i + 1];
25775 ready[lastpos] = tmp;
25776 vec_pairing = 1;
25777 return cached_can_issue_more;
25778 }
25779 }
25780 }
25781
25782 /* We've either finished a vec/vecload pair, couldn't find an insn to
25783 continue the current pair, or the last insn had nothing to do with
25784 with pairing. In any case, reset the state. */
25785 vec_pairing = 0;
25786 }
25787
25788 return cached_can_issue_more;
25789 }
25790
25791 /* We are about to begin issuing insns for this clock cycle. */
25792
25793 static int
25794 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
25795 rtx_insn **ready ATTRIBUTE_UNUSED,
25796 int *pn_ready ATTRIBUTE_UNUSED,
25797 int clock_var ATTRIBUTE_UNUSED)
25798 {
25799 int n_ready = *pn_ready;
25800
25801 if (sched_verbose)
25802 fprintf (dump, "// rs6000_sched_reorder :\n");
25803
25804 /* Reorder the ready list, if the second to last ready insn
25805 is a nonepipeline insn. */
25806 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
25807 {
25808 if (is_nonpipeline_insn (ready[n_ready - 1])
25809 && (recog_memoized (ready[n_ready - 2]) > 0))
25810 /* Simply swap first two insns. */
25811 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
25812 }
25813
25814 if (rs6000_tune == PROCESSOR_POWER6)
25815 load_store_pendulum = 0;
25816
25817 return rs6000_issue_rate ();
25818 }
25819
25820 /* Like rs6000_sched_reorder, but called after issuing each insn. */
25821
25822 static int
25823 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
25824 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
25825 {
25826 if (sched_verbose)
25827 fprintf (dump, "// rs6000_sched_reorder2 :\n");
25828
25829 /* For Power6, we need to handle some special cases to try and keep the
25830 store queue from overflowing and triggering expensive flushes.
25831
25832 This code monitors how load and store instructions are being issued
25833 and skews the ready list one way or the other to increase the likelihood
25834 that a desired instruction is issued at the proper time.
25835
25836 A couple of things are done. First, we maintain a "load_store_pendulum"
25837 to track the current state of load/store issue.
25838
25839 - If the pendulum is at zero, then no loads or stores have been
25840 issued in the current cycle so we do nothing.
25841
25842 - If the pendulum is 1, then a single load has been issued in this
25843 cycle and we attempt to locate another load in the ready list to
25844 issue with it.
25845
25846 - If the pendulum is -2, then two stores have already been
25847 issued in this cycle, so we increase the priority of the first load
25848 in the ready list to increase it's likelihood of being chosen first
25849 in the next cycle.
25850
25851 - If the pendulum is -1, then a single store has been issued in this
25852 cycle and we attempt to locate another store in the ready list to
25853 issue with it, preferring a store to an adjacent memory location to
25854 facilitate store pairing in the store queue.
25855
25856 - If the pendulum is 2, then two loads have already been
25857 issued in this cycle, so we increase the priority of the first store
25858 in the ready list to increase it's likelihood of being chosen first
25859 in the next cycle.
25860
25861 - If the pendulum < -2 or > 2, then do nothing.
25862
25863 Note: This code covers the most common scenarios. There exist non
25864 load/store instructions which make use of the LSU and which
25865 would need to be accounted for to strictly model the behavior
25866 of the machine. Those instructions are currently unaccounted
25867 for to help minimize compile time overhead of this code.
25868 */
25869 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
25870 {
25871 int pos;
25872 int i;
25873 rtx_insn *tmp;
25874 rtx load_mem, str_mem;
25875
25876 if (is_store_insn (last_scheduled_insn, &str_mem))
25877 /* Issuing a store, swing the load_store_pendulum to the left */
25878 load_store_pendulum--;
25879 else if (is_load_insn (last_scheduled_insn, &load_mem))
25880 /* Issuing a load, swing the load_store_pendulum to the right */
25881 load_store_pendulum++;
25882 else
25883 return cached_can_issue_more;
25884
25885 /* If the pendulum is balanced, or there is only one instruction on
25886 the ready list, then all is well, so return. */
25887 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
25888 return cached_can_issue_more;
25889
25890 if (load_store_pendulum == 1)
25891 {
25892 /* A load has been issued in this cycle. Scan the ready list
25893 for another load to issue with it */
25894 pos = *pn_ready-1;
25895
25896 while (pos >= 0)
25897 {
25898 if (is_load_insn (ready[pos], &load_mem))
25899 {
25900 /* Found a load. Move it to the head of the ready list,
25901 and adjust it's priority so that it is more likely to
25902 stay there */
25903 tmp = ready[pos];
25904 for (i=pos; i<*pn_ready-1; i++)
25905 ready[i] = ready[i + 1];
25906 ready[*pn_ready-1] = tmp;
25907
25908 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25909 INSN_PRIORITY (tmp)++;
25910 break;
25911 }
25912 pos--;
25913 }
25914 }
25915 else if (load_store_pendulum == -2)
25916 {
25917 /* Two stores have been issued in this cycle. Increase the
25918 priority of the first load in the ready list to favor it for
25919 issuing in the next cycle. */
25920 pos = *pn_ready-1;
25921
25922 while (pos >= 0)
25923 {
25924 if (is_load_insn (ready[pos], &load_mem)
25925 && !sel_sched_p ()
25926 && INSN_PRIORITY_KNOWN (ready[pos]))
25927 {
25928 INSN_PRIORITY (ready[pos])++;
25929
25930 /* Adjust the pendulum to account for the fact that a load
25931 was found and increased in priority. This is to prevent
25932 increasing the priority of multiple loads */
25933 load_store_pendulum--;
25934
25935 break;
25936 }
25937 pos--;
25938 }
25939 }
25940 else if (load_store_pendulum == -1)
25941 {
25942 /* A store has been issued in this cycle. Scan the ready list for
25943 another store to issue with it, preferring a store to an adjacent
25944 memory location */
25945 int first_store_pos = -1;
25946
25947 pos = *pn_ready-1;
25948
25949 while (pos >= 0)
25950 {
25951 if (is_store_insn (ready[pos], &str_mem))
25952 {
25953 rtx str_mem2;
25954 /* Maintain the index of the first store found on the
25955 list */
25956 if (first_store_pos == -1)
25957 first_store_pos = pos;
25958
25959 if (is_store_insn (last_scheduled_insn, &str_mem2)
25960 && adjacent_mem_locations (str_mem, str_mem2))
25961 {
25962 /* Found an adjacent store. Move it to the head of the
25963 ready list, and adjust it's priority so that it is
25964 more likely to stay there */
25965 tmp = ready[pos];
25966 for (i=pos; i<*pn_ready-1; i++)
25967 ready[i] = ready[i + 1];
25968 ready[*pn_ready-1] = tmp;
25969
25970 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25971 INSN_PRIORITY (tmp)++;
25972
25973 first_store_pos = -1;
25974
25975 break;
25976 };
25977 }
25978 pos--;
25979 }
25980
25981 if (first_store_pos >= 0)
25982 {
25983 /* An adjacent store wasn't found, but a non-adjacent store was,
25984 so move the non-adjacent store to the front of the ready
25985 list, and adjust its priority so that it is more likely to
25986 stay there. */
25987 tmp = ready[first_store_pos];
25988 for (i=first_store_pos; i<*pn_ready-1; i++)
25989 ready[i] = ready[i + 1];
25990 ready[*pn_ready-1] = tmp;
25991 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25992 INSN_PRIORITY (tmp)++;
25993 }
25994 }
25995 else if (load_store_pendulum == 2)
25996 {
25997 /* Two loads have been issued in this cycle. Increase the priority
25998 of the first store in the ready list to favor it for issuing in
25999 the next cycle. */
26000 pos = *pn_ready-1;
26001
26002 while (pos >= 0)
26003 {
26004 if (is_store_insn (ready[pos], &str_mem)
26005 && !sel_sched_p ()
26006 && INSN_PRIORITY_KNOWN (ready[pos]))
26007 {
26008 INSN_PRIORITY (ready[pos])++;
26009
26010 /* Adjust the pendulum to account for the fact that a store
26011 was found and increased in priority. This is to prevent
26012 increasing the priority of multiple stores */
26013 load_store_pendulum++;
26014
26015 break;
26016 }
26017 pos--;
26018 }
26019 }
26020 }
26021
26022 /* Do Power9 dependent reordering if necessary. */
26023 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
26024 && recog_memoized (last_scheduled_insn) >= 0)
26025 return power9_sched_reorder2 (ready, *pn_ready - 1);
26026
26027 return cached_can_issue_more;
26028 }
26029
26030 /* Return whether the presence of INSN causes a dispatch group termination
26031 of group WHICH_GROUP.
26032
26033 If WHICH_GROUP == current_group, this function will return true if INSN
26034 causes the termination of the current group (i.e, the dispatch group to
26035 which INSN belongs). This means that INSN will be the last insn in the
26036 group it belongs to.
26037
26038 If WHICH_GROUP == previous_group, this function will return true if INSN
26039 causes the termination of the previous group (i.e, the dispatch group that
26040 precedes the group to which INSN belongs). This means that INSN will be
26041 the first insn in the group it belongs to). */
26042
26043 static bool
26044 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
26045 {
26046 bool first, last;
26047
26048 if (! insn)
26049 return false;
26050
26051 first = insn_must_be_first_in_group (insn);
26052 last = insn_must_be_last_in_group (insn);
26053
26054 if (first && last)
26055 return true;
26056
26057 if (which_group == current_group)
26058 return last;
26059 else if (which_group == previous_group)
26060 return first;
26061
26062 return false;
26063 }
26064
26065
26066 static bool
26067 insn_must_be_first_in_group (rtx_insn *insn)
26068 {
26069 enum attr_type type;
26070
26071 if (!insn
26072 || NOTE_P (insn)
26073 || DEBUG_INSN_P (insn)
26074 || GET_CODE (PATTERN (insn)) == USE
26075 || GET_CODE (PATTERN (insn)) == CLOBBER)
26076 return false;
26077
26078 switch (rs6000_tune)
26079 {
26080 case PROCESSOR_POWER5:
26081 if (is_cracked_insn (insn))
26082 return true;
26083 /* FALLTHRU */
26084 case PROCESSOR_POWER4:
26085 if (is_microcoded_insn (insn))
26086 return true;
26087
26088 if (!rs6000_sched_groups)
26089 return false;
26090
26091 type = get_attr_type (insn);
26092
26093 switch (type)
26094 {
26095 case TYPE_MFCR:
26096 case TYPE_MFCRF:
26097 case TYPE_MTCR:
26098 case TYPE_CR_LOGICAL:
26099 case TYPE_MTJMPR:
26100 case TYPE_MFJMPR:
26101 case TYPE_DIV:
26102 case TYPE_LOAD_L:
26103 case TYPE_STORE_C:
26104 case TYPE_ISYNC:
26105 case TYPE_SYNC:
26106 return true;
26107 default:
26108 break;
26109 }
26110 break;
26111 case PROCESSOR_POWER6:
26112 type = get_attr_type (insn);
26113
26114 switch (type)
26115 {
26116 case TYPE_EXTS:
26117 case TYPE_CNTLZ:
26118 case TYPE_TRAP:
26119 case TYPE_MUL:
26120 case TYPE_INSERT:
26121 case TYPE_FPCOMPARE:
26122 case TYPE_MFCR:
26123 case TYPE_MTCR:
26124 case TYPE_MFJMPR:
26125 case TYPE_MTJMPR:
26126 case TYPE_ISYNC:
26127 case TYPE_SYNC:
26128 case TYPE_LOAD_L:
26129 case TYPE_STORE_C:
26130 return true;
26131 case TYPE_SHIFT:
26132 if (get_attr_dot (insn) == DOT_NO
26133 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26134 return true;
26135 else
26136 break;
26137 case TYPE_DIV:
26138 if (get_attr_size (insn) == SIZE_32)
26139 return true;
26140 else
26141 break;
26142 case TYPE_LOAD:
26143 case TYPE_STORE:
26144 case TYPE_FPLOAD:
26145 case TYPE_FPSTORE:
26146 if (get_attr_update (insn) == UPDATE_YES)
26147 return true;
26148 else
26149 break;
26150 default:
26151 break;
26152 }
26153 break;
26154 case PROCESSOR_POWER7:
26155 type = get_attr_type (insn);
26156
26157 switch (type)
26158 {
26159 case TYPE_CR_LOGICAL:
26160 case TYPE_MFCR:
26161 case TYPE_MFCRF:
26162 case TYPE_MTCR:
26163 case TYPE_DIV:
26164 case TYPE_ISYNC:
26165 case TYPE_LOAD_L:
26166 case TYPE_STORE_C:
26167 case TYPE_MFJMPR:
26168 case TYPE_MTJMPR:
26169 return true;
26170 case TYPE_MUL:
26171 case TYPE_SHIFT:
26172 case TYPE_EXTS:
26173 if (get_attr_dot (insn) == DOT_YES)
26174 return true;
26175 else
26176 break;
26177 case TYPE_LOAD:
26178 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26179 || get_attr_update (insn) == UPDATE_YES)
26180 return true;
26181 else
26182 break;
26183 case TYPE_STORE:
26184 case TYPE_FPLOAD:
26185 case TYPE_FPSTORE:
26186 if (get_attr_update (insn) == UPDATE_YES)
26187 return true;
26188 else
26189 break;
26190 default:
26191 break;
26192 }
26193 break;
26194 case PROCESSOR_POWER8:
26195 type = get_attr_type (insn);
26196
26197 switch (type)
26198 {
26199 case TYPE_CR_LOGICAL:
26200 case TYPE_MFCR:
26201 case TYPE_MFCRF:
26202 case TYPE_MTCR:
26203 case TYPE_SYNC:
26204 case TYPE_ISYNC:
26205 case TYPE_LOAD_L:
26206 case TYPE_STORE_C:
26207 case TYPE_VECSTORE:
26208 case TYPE_MFJMPR:
26209 case TYPE_MTJMPR:
26210 return true;
26211 case TYPE_SHIFT:
26212 case TYPE_EXTS:
26213 case TYPE_MUL:
26214 if (get_attr_dot (insn) == DOT_YES)
26215 return true;
26216 else
26217 break;
26218 case TYPE_LOAD:
26219 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26220 || get_attr_update (insn) == UPDATE_YES)
26221 return true;
26222 else
26223 break;
26224 case TYPE_STORE:
26225 if (get_attr_update (insn) == UPDATE_YES
26226 && get_attr_indexed (insn) == INDEXED_YES)
26227 return true;
26228 else
26229 break;
26230 default:
26231 break;
26232 }
26233 break;
26234 default:
26235 break;
26236 }
26237
26238 return false;
26239 }
26240
26241 static bool
26242 insn_must_be_last_in_group (rtx_insn *insn)
26243 {
26244 enum attr_type type;
26245
26246 if (!insn
26247 || NOTE_P (insn)
26248 || DEBUG_INSN_P (insn)
26249 || GET_CODE (PATTERN (insn)) == USE
26250 || GET_CODE (PATTERN (insn)) == CLOBBER)
26251 return false;
26252
26253 switch (rs6000_tune) {
26254 case PROCESSOR_POWER4:
26255 case PROCESSOR_POWER5:
26256 if (is_microcoded_insn (insn))
26257 return true;
26258
26259 if (is_branch_slot_insn (insn))
26260 return true;
26261
26262 break;
26263 case PROCESSOR_POWER6:
26264 type = get_attr_type (insn);
26265
26266 switch (type)
26267 {
26268 case TYPE_EXTS:
26269 case TYPE_CNTLZ:
26270 case TYPE_TRAP:
26271 case TYPE_MUL:
26272 case TYPE_FPCOMPARE:
26273 case TYPE_MFCR:
26274 case TYPE_MTCR:
26275 case TYPE_MFJMPR:
26276 case TYPE_MTJMPR:
26277 case TYPE_ISYNC:
26278 case TYPE_SYNC:
26279 case TYPE_LOAD_L:
26280 case TYPE_STORE_C:
26281 return true;
26282 case TYPE_SHIFT:
26283 if (get_attr_dot (insn) == DOT_NO
26284 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
26285 return true;
26286 else
26287 break;
26288 case TYPE_DIV:
26289 if (get_attr_size (insn) == SIZE_32)
26290 return true;
26291 else
26292 break;
26293 default:
26294 break;
26295 }
26296 break;
26297 case PROCESSOR_POWER7:
26298 type = get_attr_type (insn);
26299
26300 switch (type)
26301 {
26302 case TYPE_ISYNC:
26303 case TYPE_SYNC:
26304 case TYPE_LOAD_L:
26305 case TYPE_STORE_C:
26306 return true;
26307 case TYPE_LOAD:
26308 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26309 && get_attr_update (insn) == UPDATE_YES)
26310 return true;
26311 else
26312 break;
26313 case TYPE_STORE:
26314 if (get_attr_update (insn) == UPDATE_YES
26315 && get_attr_indexed (insn) == INDEXED_YES)
26316 return true;
26317 else
26318 break;
26319 default:
26320 break;
26321 }
26322 break;
26323 case PROCESSOR_POWER8:
26324 type = get_attr_type (insn);
26325
26326 switch (type)
26327 {
26328 case TYPE_MFCR:
26329 case TYPE_MTCR:
26330 case TYPE_ISYNC:
26331 case TYPE_SYNC:
26332 case TYPE_LOAD_L:
26333 case TYPE_STORE_C:
26334 return true;
26335 case TYPE_LOAD:
26336 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26337 && get_attr_update (insn) == UPDATE_YES)
26338 return true;
26339 else
26340 break;
26341 case TYPE_STORE:
26342 if (get_attr_update (insn) == UPDATE_YES
26343 && get_attr_indexed (insn) == INDEXED_YES)
26344 return true;
26345 else
26346 break;
26347 default:
26348 break;
26349 }
26350 break;
26351 default:
26352 break;
26353 }
26354
26355 return false;
26356 }
26357
26358 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
26359 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
26360
26361 static bool
26362 is_costly_group (rtx *group_insns, rtx next_insn)
26363 {
26364 int i;
26365 int issue_rate = rs6000_issue_rate ();
26366
26367 for (i = 0; i < issue_rate; i++)
26368 {
26369 sd_iterator_def sd_it;
26370 dep_t dep;
26371 rtx insn = group_insns[i];
26372
26373 if (!insn)
26374 continue;
26375
26376 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
26377 {
26378 rtx next = DEP_CON (dep);
26379
26380 if (next == next_insn
26381 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
26382 return true;
26383 }
26384 }
26385
26386 return false;
26387 }
26388
26389 /* Utility of the function redefine_groups.
26390 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
26391 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
26392 to keep it "far" (in a separate group) from GROUP_INSNS, following
26393 one of the following schemes, depending on the value of the flag
26394 -minsert_sched_nops = X:
26395 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
26396 in order to force NEXT_INSN into a separate group.
26397 (2) X < sched_finish_regroup_exact: insert exactly X nops.
26398 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
26399 insertion (has a group just ended, how many vacant issue slots remain in the
26400 last group, and how many dispatch groups were encountered so far). */
26401
26402 static int
26403 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
26404 rtx_insn *next_insn, bool *group_end, int can_issue_more,
26405 int *group_count)
26406 {
26407 rtx nop;
26408 bool force;
26409 int issue_rate = rs6000_issue_rate ();
26410 bool end = *group_end;
26411 int i;
26412
26413 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
26414 return can_issue_more;
26415
26416 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
26417 return can_issue_more;
26418
26419 force = is_costly_group (group_insns, next_insn);
26420 if (!force)
26421 return can_issue_more;
26422
26423 if (sched_verbose > 6)
26424 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
26425 *group_count ,can_issue_more);
26426
26427 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
26428 {
26429 if (*group_end)
26430 can_issue_more = 0;
26431
26432 /* Since only a branch can be issued in the last issue_slot, it is
26433 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
26434 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
26435 in this case the last nop will start a new group and the branch
26436 will be forced to the new group. */
26437 if (can_issue_more && !is_branch_slot_insn (next_insn))
26438 can_issue_more--;
26439
26440 /* Do we have a special group ending nop? */
26441 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
26442 || rs6000_tune == PROCESSOR_POWER8)
26443 {
26444 nop = gen_group_ending_nop ();
26445 emit_insn_before (nop, next_insn);
26446 can_issue_more = 0;
26447 }
26448 else
26449 while (can_issue_more > 0)
26450 {
26451 nop = gen_nop ();
26452 emit_insn_before (nop, next_insn);
26453 can_issue_more--;
26454 }
26455
26456 *group_end = true;
26457 return 0;
26458 }
26459
26460 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
26461 {
26462 int n_nops = rs6000_sched_insert_nops;
26463
26464 /* Nops can't be issued from the branch slot, so the effective
26465 issue_rate for nops is 'issue_rate - 1'. */
26466 if (can_issue_more == 0)
26467 can_issue_more = issue_rate;
26468 can_issue_more--;
26469 if (can_issue_more == 0)
26470 {
26471 can_issue_more = issue_rate - 1;
26472 (*group_count)++;
26473 end = true;
26474 for (i = 0; i < issue_rate; i++)
26475 {
26476 group_insns[i] = 0;
26477 }
26478 }
26479
26480 while (n_nops > 0)
26481 {
26482 nop = gen_nop ();
26483 emit_insn_before (nop, next_insn);
26484 if (can_issue_more == issue_rate - 1) /* new group begins */
26485 end = false;
26486 can_issue_more--;
26487 if (can_issue_more == 0)
26488 {
26489 can_issue_more = issue_rate - 1;
26490 (*group_count)++;
26491 end = true;
26492 for (i = 0; i < issue_rate; i++)
26493 {
26494 group_insns[i] = 0;
26495 }
26496 }
26497 n_nops--;
26498 }
26499
26500 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
26501 can_issue_more++;
26502
26503 /* Is next_insn going to start a new group? */
26504 *group_end
26505 = (end
26506 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26507 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26508 || (can_issue_more < issue_rate &&
26509 insn_terminates_group_p (next_insn, previous_group)));
26510 if (*group_end && end)
26511 (*group_count)--;
26512
26513 if (sched_verbose > 6)
26514 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
26515 *group_count, can_issue_more);
26516 return can_issue_more;
26517 }
26518
26519 return can_issue_more;
26520 }
26521
26522 /* This function tries to synch the dispatch groups that the compiler "sees"
26523 with the dispatch groups that the processor dispatcher is expected to
26524 form in practice. It tries to achieve this synchronization by forcing the
26525 estimated processor grouping on the compiler (as opposed to the function
26526 'pad_goups' which tries to force the scheduler's grouping on the processor).
26527
26528 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
26529 examines the (estimated) dispatch groups that will be formed by the processor
26530 dispatcher. It marks these group boundaries to reflect the estimated
26531 processor grouping, overriding the grouping that the scheduler had marked.
26532 Depending on the value of the flag '-minsert-sched-nops' this function can
26533 force certain insns into separate groups or force a certain distance between
26534 them by inserting nops, for example, if there exists a "costly dependence"
26535 between the insns.
26536
26537 The function estimates the group boundaries that the processor will form as
26538 follows: It keeps track of how many vacant issue slots are available after
26539 each insn. A subsequent insn will start a new group if one of the following
26540 4 cases applies:
26541 - no more vacant issue slots remain in the current dispatch group.
26542 - only the last issue slot, which is the branch slot, is vacant, but the next
26543 insn is not a branch.
26544 - only the last 2 or less issue slots, including the branch slot, are vacant,
26545 which means that a cracked insn (which occupies two issue slots) can't be
26546 issued in this group.
26547 - less than 'issue_rate' slots are vacant, and the next insn always needs to
26548 start a new group. */
26549
26550 static int
26551 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26552 rtx_insn *tail)
26553 {
26554 rtx_insn *insn, *next_insn;
26555 int issue_rate;
26556 int can_issue_more;
26557 int slot, i;
26558 bool group_end;
26559 int group_count = 0;
26560 rtx *group_insns;
26561
26562 /* Initialize. */
26563 issue_rate = rs6000_issue_rate ();
26564 group_insns = XALLOCAVEC (rtx, issue_rate);
26565 for (i = 0; i < issue_rate; i++)
26566 {
26567 group_insns[i] = 0;
26568 }
26569 can_issue_more = issue_rate;
26570 slot = 0;
26571 insn = get_next_active_insn (prev_head_insn, tail);
26572 group_end = false;
26573
26574 while (insn != NULL_RTX)
26575 {
26576 slot = (issue_rate - can_issue_more);
26577 group_insns[slot] = insn;
26578 can_issue_more =
26579 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26580 if (insn_terminates_group_p (insn, current_group))
26581 can_issue_more = 0;
26582
26583 next_insn = get_next_active_insn (insn, tail);
26584 if (next_insn == NULL_RTX)
26585 return group_count + 1;
26586
26587 /* Is next_insn going to start a new group? */
26588 group_end
26589 = (can_issue_more == 0
26590 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26591 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26592 || (can_issue_more < issue_rate &&
26593 insn_terminates_group_p (next_insn, previous_group)));
26594
26595 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
26596 next_insn, &group_end, can_issue_more,
26597 &group_count);
26598
26599 if (group_end)
26600 {
26601 group_count++;
26602 can_issue_more = 0;
26603 for (i = 0; i < issue_rate; i++)
26604 {
26605 group_insns[i] = 0;
26606 }
26607 }
26608
26609 if (GET_MODE (next_insn) == TImode && can_issue_more)
26610 PUT_MODE (next_insn, VOIDmode);
26611 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
26612 PUT_MODE (next_insn, TImode);
26613
26614 insn = next_insn;
26615 if (can_issue_more == 0)
26616 can_issue_more = issue_rate;
26617 } /* while */
26618
26619 return group_count;
26620 }
26621
26622 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
26623 dispatch group boundaries that the scheduler had marked. Pad with nops
26624 any dispatch groups which have vacant issue slots, in order to force the
26625 scheduler's grouping on the processor dispatcher. The function
26626 returns the number of dispatch groups found. */
26627
26628 static int
26629 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
26630 rtx_insn *tail)
26631 {
26632 rtx_insn *insn, *next_insn;
26633 rtx nop;
26634 int issue_rate;
26635 int can_issue_more;
26636 int group_end;
26637 int group_count = 0;
26638
26639 /* Initialize issue_rate. */
26640 issue_rate = rs6000_issue_rate ();
26641 can_issue_more = issue_rate;
26642
26643 insn = get_next_active_insn (prev_head_insn, tail);
26644 next_insn = get_next_active_insn (insn, tail);
26645
26646 while (insn != NULL_RTX)
26647 {
26648 can_issue_more =
26649 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26650
26651 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
26652
26653 if (next_insn == NULL_RTX)
26654 break;
26655
26656 if (group_end)
26657 {
26658 /* If the scheduler had marked group termination at this location
26659 (between insn and next_insn), and neither insn nor next_insn will
26660 force group termination, pad the group with nops to force group
26661 termination. */
26662 if (can_issue_more
26663 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
26664 && !insn_terminates_group_p (insn, current_group)
26665 && !insn_terminates_group_p (next_insn, previous_group))
26666 {
26667 if (!is_branch_slot_insn (next_insn))
26668 can_issue_more--;
26669
26670 while (can_issue_more)
26671 {
26672 nop = gen_nop ();
26673 emit_insn_before (nop, next_insn);
26674 can_issue_more--;
26675 }
26676 }
26677
26678 can_issue_more = issue_rate;
26679 group_count++;
26680 }
26681
26682 insn = next_insn;
26683 next_insn = get_next_active_insn (insn, tail);
26684 }
26685
26686 return group_count;
26687 }
26688
26689 /* We're beginning a new block. Initialize data structures as necessary. */
26690
26691 static void
26692 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
26693 int sched_verbose ATTRIBUTE_UNUSED,
26694 int max_ready ATTRIBUTE_UNUSED)
26695 {
26696 last_scheduled_insn = NULL;
26697 load_store_pendulum = 0;
26698 divide_cnt = 0;
26699 vec_pairing = 0;
26700 }
26701
26702 /* The following function is called at the end of scheduling BB.
26703 After reload, it inserts nops at insn group bundling. */
26704
26705 static void
26706 rs6000_sched_finish (FILE *dump, int sched_verbose)
26707 {
26708 int n_groups;
26709
26710 if (sched_verbose)
26711 fprintf (dump, "=== Finishing schedule.\n");
26712
26713 if (reload_completed && rs6000_sched_groups)
26714 {
26715 /* Do not run sched_finish hook when selective scheduling enabled. */
26716 if (sel_sched_p ())
26717 return;
26718
26719 if (rs6000_sched_insert_nops == sched_finish_none)
26720 return;
26721
26722 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
26723 n_groups = pad_groups (dump, sched_verbose,
26724 current_sched_info->prev_head,
26725 current_sched_info->next_tail);
26726 else
26727 n_groups = redefine_groups (dump, sched_verbose,
26728 current_sched_info->prev_head,
26729 current_sched_info->next_tail);
26730
26731 if (sched_verbose >= 6)
26732 {
26733 fprintf (dump, "ngroups = %d\n", n_groups);
26734 print_rtl (dump, current_sched_info->prev_head);
26735 fprintf (dump, "Done finish_sched\n");
26736 }
26737 }
26738 }
26739
26740 struct rs6000_sched_context
26741 {
26742 short cached_can_issue_more;
26743 rtx_insn *last_scheduled_insn;
26744 int load_store_pendulum;
26745 int divide_cnt;
26746 int vec_pairing;
26747 };
26748
26749 typedef struct rs6000_sched_context rs6000_sched_context_def;
26750 typedef rs6000_sched_context_def *rs6000_sched_context_t;
26751
26752 /* Allocate store for new scheduling context. */
26753 static void *
26754 rs6000_alloc_sched_context (void)
26755 {
26756 return xmalloc (sizeof (rs6000_sched_context_def));
26757 }
26758
26759 /* If CLEAN_P is true then initializes _SC with clean data,
26760 and from the global context otherwise. */
26761 static void
26762 rs6000_init_sched_context (void *_sc, bool clean_p)
26763 {
26764 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26765
26766 if (clean_p)
26767 {
26768 sc->cached_can_issue_more = 0;
26769 sc->last_scheduled_insn = NULL;
26770 sc->load_store_pendulum = 0;
26771 sc->divide_cnt = 0;
26772 sc->vec_pairing = 0;
26773 }
26774 else
26775 {
26776 sc->cached_can_issue_more = cached_can_issue_more;
26777 sc->last_scheduled_insn = last_scheduled_insn;
26778 sc->load_store_pendulum = load_store_pendulum;
26779 sc->divide_cnt = divide_cnt;
26780 sc->vec_pairing = vec_pairing;
26781 }
26782 }
26783
26784 /* Sets the global scheduling context to the one pointed to by _SC. */
26785 static void
26786 rs6000_set_sched_context (void *_sc)
26787 {
26788 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26789
26790 gcc_assert (sc != NULL);
26791
26792 cached_can_issue_more = sc->cached_can_issue_more;
26793 last_scheduled_insn = sc->last_scheduled_insn;
26794 load_store_pendulum = sc->load_store_pendulum;
26795 divide_cnt = sc->divide_cnt;
26796 vec_pairing = sc->vec_pairing;
26797 }
26798
26799 /* Free _SC. */
26800 static void
26801 rs6000_free_sched_context (void *_sc)
26802 {
26803 gcc_assert (_sc != NULL);
26804
26805 free (_sc);
26806 }
26807
26808 static bool
26809 rs6000_sched_can_speculate_insn (rtx_insn *insn)
26810 {
26811 switch (get_attr_type (insn))
26812 {
26813 case TYPE_DIV:
26814 case TYPE_SDIV:
26815 case TYPE_DDIV:
26816 case TYPE_VECDIV:
26817 case TYPE_SSQRT:
26818 case TYPE_DSQRT:
26819 return false;
26820
26821 default:
26822 return true;
26823 }
26824 }
26825 \f
26826 /* Length in units of the trampoline for entering a nested function. */
26827
26828 int
26829 rs6000_trampoline_size (void)
26830 {
26831 int ret = 0;
26832
26833 switch (DEFAULT_ABI)
26834 {
26835 default:
26836 gcc_unreachable ();
26837
26838 case ABI_AIX:
26839 ret = (TARGET_32BIT) ? 12 : 24;
26840 break;
26841
26842 case ABI_ELFv2:
26843 gcc_assert (!TARGET_32BIT);
26844 ret = 32;
26845 break;
26846
26847 case ABI_DARWIN:
26848 case ABI_V4:
26849 ret = (TARGET_32BIT) ? 40 : 48;
26850 break;
26851 }
26852
26853 return ret;
26854 }
26855
26856 /* Emit RTL insns to initialize the variable parts of a trampoline.
26857 FNADDR is an RTX for the address of the function's pure code.
26858 CXT is an RTX for the static chain value for the function. */
26859
26860 static void
26861 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
26862 {
26863 int regsize = (TARGET_32BIT) ? 4 : 8;
26864 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
26865 rtx ctx_reg = force_reg (Pmode, cxt);
26866 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
26867
26868 switch (DEFAULT_ABI)
26869 {
26870 default:
26871 gcc_unreachable ();
26872
26873 /* Under AIX, just build the 3 word function descriptor */
26874 case ABI_AIX:
26875 {
26876 rtx fnmem, fn_reg, toc_reg;
26877
26878 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
26879 error ("you cannot take the address of a nested function if you use "
26880 "the %qs option", "-mno-pointers-to-nested-functions");
26881
26882 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
26883 fn_reg = gen_reg_rtx (Pmode);
26884 toc_reg = gen_reg_rtx (Pmode);
26885
26886 /* Macro to shorten the code expansions below. */
26887 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
26888
26889 m_tramp = replace_equiv_address (m_tramp, addr);
26890
26891 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
26892 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
26893 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
26894 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
26895 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
26896
26897 # undef MEM_PLUS
26898 }
26899 break;
26900
26901 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
26902 case ABI_ELFv2:
26903 case ABI_DARWIN:
26904 case ABI_V4:
26905 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
26906 LCT_NORMAL, VOIDmode,
26907 addr, Pmode,
26908 GEN_INT (rs6000_trampoline_size ()), SImode,
26909 fnaddr, Pmode,
26910 ctx_reg, Pmode);
26911 break;
26912 }
26913 }
26914
26915 \f
26916 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
26917 identifier as an argument, so the front end shouldn't look it up. */
26918
26919 static bool
26920 rs6000_attribute_takes_identifier_p (const_tree attr_id)
26921 {
26922 return is_attribute_p ("altivec", attr_id);
26923 }
26924
26925 /* Handle the "altivec" attribute. The attribute may have
26926 arguments as follows:
26927
26928 __attribute__((altivec(vector__)))
26929 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
26930 __attribute__((altivec(bool__))) (always followed by 'unsigned')
26931
26932 and may appear more than once (e.g., 'vector bool char') in a
26933 given declaration. */
26934
26935 static tree
26936 rs6000_handle_altivec_attribute (tree *node,
26937 tree name ATTRIBUTE_UNUSED,
26938 tree args,
26939 int flags ATTRIBUTE_UNUSED,
26940 bool *no_add_attrs)
26941 {
26942 tree type = *node, result = NULL_TREE;
26943 machine_mode mode;
26944 int unsigned_p;
26945 char altivec_type
26946 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
26947 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
26948 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
26949 : '?');
26950
26951 while (POINTER_TYPE_P (type)
26952 || TREE_CODE (type) == FUNCTION_TYPE
26953 || TREE_CODE (type) == METHOD_TYPE
26954 || TREE_CODE (type) == ARRAY_TYPE)
26955 type = TREE_TYPE (type);
26956
26957 mode = TYPE_MODE (type);
26958
26959 /* Check for invalid AltiVec type qualifiers. */
26960 if (type == long_double_type_node)
26961 error ("use of %<long double%> in AltiVec types is invalid");
26962 else if (type == boolean_type_node)
26963 error ("use of boolean types in AltiVec types is invalid");
26964 else if (TREE_CODE (type) == COMPLEX_TYPE)
26965 error ("use of %<complex%> in AltiVec types is invalid");
26966 else if (DECIMAL_FLOAT_MODE_P (mode))
26967 error ("use of decimal floating point types in AltiVec types is invalid");
26968 else if (!TARGET_VSX)
26969 {
26970 if (type == long_unsigned_type_node || type == long_integer_type_node)
26971 {
26972 if (TARGET_64BIT)
26973 error ("use of %<long%> in AltiVec types is invalid for "
26974 "64-bit code without %qs", "-mvsx");
26975 else if (rs6000_warn_altivec_long)
26976 warning (0, "use of %<long%> in AltiVec types is deprecated; "
26977 "use %<int%>");
26978 }
26979 else if (type == long_long_unsigned_type_node
26980 || type == long_long_integer_type_node)
26981 error ("use of %<long long%> in AltiVec types is invalid without %qs",
26982 "-mvsx");
26983 else if (type == double_type_node)
26984 error ("use of %<double%> in AltiVec types is invalid without %qs",
26985 "-mvsx");
26986 }
26987
26988 switch (altivec_type)
26989 {
26990 case 'v':
26991 unsigned_p = TYPE_UNSIGNED (type);
26992 switch (mode)
26993 {
26994 case E_TImode:
26995 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
26996 break;
26997 case E_DImode:
26998 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
26999 break;
27000 case E_SImode:
27001 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
27002 break;
27003 case E_HImode:
27004 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
27005 break;
27006 case E_QImode:
27007 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
27008 break;
27009 case E_SFmode: result = V4SF_type_node; break;
27010 case E_DFmode: result = V2DF_type_node; break;
27011 /* If the user says 'vector int bool', we may be handed the 'bool'
27012 attribute _before_ the 'vector' attribute, and so select the
27013 proper type in the 'b' case below. */
27014 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
27015 case E_V2DImode: case E_V2DFmode:
27016 result = type;
27017 default: break;
27018 }
27019 break;
27020 case 'b':
27021 switch (mode)
27022 {
27023 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
27024 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
27025 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
27026 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
27027 default: break;
27028 }
27029 break;
27030 case 'p':
27031 switch (mode)
27032 {
27033 case E_V8HImode: result = pixel_V8HI_type_node;
27034 default: break;
27035 }
27036 default: break;
27037 }
27038
27039 /* Propagate qualifiers attached to the element type
27040 onto the vector type. */
27041 if (result && result != type && TYPE_QUALS (type))
27042 result = build_qualified_type (result, TYPE_QUALS (type));
27043
27044 *no_add_attrs = true; /* No need to hang on to the attribute. */
27045
27046 if (result)
27047 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
27048
27049 return NULL_TREE;
27050 }
27051
27052 /* AltiVec defines five built-in scalar types that serve as vector
27053 elements; we must teach the compiler how to mangle them. The 128-bit
27054 floating point mangling is target-specific as well. */
27055
27056 static const char *
27057 rs6000_mangle_type (const_tree type)
27058 {
27059 type = TYPE_MAIN_VARIANT (type);
27060
27061 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27062 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27063 return NULL;
27064
27065 if (type == bool_char_type_node) return "U6__boolc";
27066 if (type == bool_short_type_node) return "U6__bools";
27067 if (type == pixel_type_node) return "u7__pixel";
27068 if (type == bool_int_type_node) return "U6__booli";
27069 if (type == bool_long_long_type_node) return "U6__boolx";
27070
27071 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
27072 return "g";
27073 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
27074 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
27075
27076 /* For all other types, use the default mangling. */
27077 return NULL;
27078 }
27079
27080 /* Handle a "longcall" or "shortcall" attribute; arguments as in
27081 struct attribute_spec.handler. */
27082
27083 static tree
27084 rs6000_handle_longcall_attribute (tree *node, tree name,
27085 tree args ATTRIBUTE_UNUSED,
27086 int flags ATTRIBUTE_UNUSED,
27087 bool *no_add_attrs)
27088 {
27089 if (TREE_CODE (*node) != FUNCTION_TYPE
27090 && TREE_CODE (*node) != FIELD_DECL
27091 && TREE_CODE (*node) != TYPE_DECL)
27092 {
27093 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27094 name);
27095 *no_add_attrs = true;
27096 }
27097
27098 return NULL_TREE;
27099 }
27100
27101 /* Set longcall attributes on all functions declared when
27102 rs6000_default_long_calls is true. */
27103 static void
27104 rs6000_set_default_type_attributes (tree type)
27105 {
27106 if (rs6000_default_long_calls
27107 && (TREE_CODE (type) == FUNCTION_TYPE
27108 || TREE_CODE (type) == METHOD_TYPE))
27109 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
27110 NULL_TREE,
27111 TYPE_ATTRIBUTES (type));
27112
27113 #if TARGET_MACHO
27114 darwin_set_default_type_attributes (type);
27115 #endif
27116 }
27117
27118 /* Return a reference suitable for calling a function with the
27119 longcall attribute. */
27120
27121 static rtx
27122 rs6000_longcall_ref (rtx call_ref, rtx arg)
27123 {
27124 /* System V adds '.' to the internal name, so skip them. */
27125 const char *call_name = XSTR (call_ref, 0);
27126 if (*call_name == '.')
27127 {
27128 while (*call_name == '.')
27129 call_name++;
27130
27131 tree node = get_identifier (call_name);
27132 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
27133 }
27134
27135 if (TARGET_PLTSEQ)
27136 {
27137 rtx base = const0_rtx;
27138 int regno = 12;
27139 if (rs6000_pcrel_p (cfun))
27140 {
27141 rtx reg = gen_rtx_REG (Pmode, regno);
27142 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27143 UNSPEC_PLT_PCREL);
27144 emit_insn (gen_rtx_SET (reg, u));
27145 return reg;
27146 }
27147
27148 if (DEFAULT_ABI == ABI_ELFv2)
27149 base = gen_rtx_REG (Pmode, TOC_REGISTER);
27150 else
27151 {
27152 if (flag_pic)
27153 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27154 regno = 11;
27155 }
27156 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
27157 may be used by a function global entry point. For SysV4, r11
27158 is used by __glink_PLTresolve lazy resolver entry. */
27159 rtx reg = gen_rtx_REG (Pmode, regno);
27160 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
27161 UNSPEC_PLT16_HA);
27162 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
27163 UNSPEC_PLT16_LO);
27164 emit_insn (gen_rtx_SET (reg, hi));
27165 emit_insn (gen_rtx_SET (reg, lo));
27166 return reg;
27167 }
27168
27169 return force_reg (Pmode, call_ref);
27170 }
27171 \f
27172 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
27173 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
27174 #endif
27175
27176 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27177 struct attribute_spec.handler. */
27178 static tree
27179 rs6000_handle_struct_attribute (tree *node, tree name,
27180 tree args ATTRIBUTE_UNUSED,
27181 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27182 {
27183 tree *type = NULL;
27184 if (DECL_P (*node))
27185 {
27186 if (TREE_CODE (*node) == TYPE_DECL)
27187 type = &TREE_TYPE (*node);
27188 }
27189 else
27190 type = node;
27191
27192 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27193 || TREE_CODE (*type) == UNION_TYPE)))
27194 {
27195 warning (OPT_Wattributes, "%qE attribute ignored", name);
27196 *no_add_attrs = true;
27197 }
27198
27199 else if ((is_attribute_p ("ms_struct", name)
27200 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27201 || ((is_attribute_p ("gcc_struct", name)
27202 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27203 {
27204 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27205 name);
27206 *no_add_attrs = true;
27207 }
27208
27209 return NULL_TREE;
27210 }
27211
27212 static bool
27213 rs6000_ms_bitfield_layout_p (const_tree record_type)
27214 {
27215 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
27216 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27217 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27218 }
27219 \f
27220 #ifdef USING_ELFOS_H
27221
27222 /* A get_unnamed_section callback, used for switching to toc_section. */
27223
27224 static void
27225 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27226 {
27227 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27228 && TARGET_MINIMAL_TOC)
27229 {
27230 if (!toc_initialized)
27231 {
27232 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27233 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27234 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
27235 fprintf (asm_out_file, "\t.tc ");
27236 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
27237 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27238 fprintf (asm_out_file, "\n");
27239
27240 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27241 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27242 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27243 fprintf (asm_out_file, " = .+32768\n");
27244 toc_initialized = 1;
27245 }
27246 else
27247 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27248 }
27249 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27250 {
27251 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
27252 if (!toc_initialized)
27253 {
27254 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27255 toc_initialized = 1;
27256 }
27257 }
27258 else
27259 {
27260 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27261 if (!toc_initialized)
27262 {
27263 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
27264 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
27265 fprintf (asm_out_file, " = .+32768\n");
27266 toc_initialized = 1;
27267 }
27268 }
27269 }
27270
27271 /* Implement TARGET_ASM_INIT_SECTIONS. */
27272
27273 static void
27274 rs6000_elf_asm_init_sections (void)
27275 {
27276 toc_section
27277 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
27278
27279 sdata2_section
27280 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
27281 SDATA2_SECTION_ASM_OP);
27282 }
27283
27284 /* Implement TARGET_SELECT_RTX_SECTION. */
27285
27286 static section *
27287 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
27288 unsigned HOST_WIDE_INT align)
27289 {
27290 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
27291 return toc_section;
27292 else
27293 return default_elf_select_rtx_section (mode, x, align);
27294 }
27295 \f
27296 /* For a SYMBOL_REF, set generic flags and then perform some
27297 target-specific processing.
27298
27299 When the AIX ABI is requested on a non-AIX system, replace the
27300 function name with the real name (with a leading .) rather than the
27301 function descriptor name. This saves a lot of overriding code to
27302 read the prefixes. */
27303
27304 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
27305 static void
27306 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
27307 {
27308 default_encode_section_info (decl, rtl, first);
27309
27310 if (first
27311 && TREE_CODE (decl) == FUNCTION_DECL
27312 && !TARGET_AIX
27313 && DEFAULT_ABI == ABI_AIX)
27314 {
27315 rtx sym_ref = XEXP (rtl, 0);
27316 size_t len = strlen (XSTR (sym_ref, 0));
27317 char *str = XALLOCAVEC (char, len + 2);
27318 str[0] = '.';
27319 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
27320 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
27321 }
27322 }
27323
27324 static inline bool
27325 compare_section_name (const char *section, const char *templ)
27326 {
27327 int len;
27328
27329 len = strlen (templ);
27330 return (strncmp (section, templ, len) == 0
27331 && (section[len] == 0 || section[len] == '.'));
27332 }
27333
27334 bool
27335 rs6000_elf_in_small_data_p (const_tree decl)
27336 {
27337 if (rs6000_sdata == SDATA_NONE)
27338 return false;
27339
27340 /* We want to merge strings, so we never consider them small data. */
27341 if (TREE_CODE (decl) == STRING_CST)
27342 return false;
27343
27344 /* Functions are never in the small data area. */
27345 if (TREE_CODE (decl) == FUNCTION_DECL)
27346 return false;
27347
27348 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
27349 {
27350 const char *section = DECL_SECTION_NAME (decl);
27351 if (compare_section_name (section, ".sdata")
27352 || compare_section_name (section, ".sdata2")
27353 || compare_section_name (section, ".gnu.linkonce.s")
27354 || compare_section_name (section, ".sbss")
27355 || compare_section_name (section, ".sbss2")
27356 || compare_section_name (section, ".gnu.linkonce.sb")
27357 || strcmp (section, ".PPC.EMB.sdata0") == 0
27358 || strcmp (section, ".PPC.EMB.sbss0") == 0)
27359 return true;
27360 }
27361 else
27362 {
27363 /* If we are told not to put readonly data in sdata, then don't. */
27364 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
27365 && !rs6000_readonly_in_sdata)
27366 return false;
27367
27368 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
27369
27370 if (size > 0
27371 && size <= g_switch_value
27372 /* If it's not public, and we're not going to reference it there,
27373 there's no need to put it in the small data section. */
27374 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
27375 return true;
27376 }
27377
27378 return false;
27379 }
27380
27381 #endif /* USING_ELFOS_H */
27382 \f
27383 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
27384
27385 static bool
27386 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
27387 {
27388 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
27389 }
27390
27391 /* Do not place thread-local symbols refs in the object blocks. */
27392
27393 static bool
27394 rs6000_use_blocks_for_decl_p (const_tree decl)
27395 {
27396 return !DECL_THREAD_LOCAL_P (decl);
27397 }
27398 \f
27399 /* Return a REG that occurs in ADDR with coefficient 1.
27400 ADDR can be effectively incremented by incrementing REG.
27401
27402 r0 is special and we must not select it as an address
27403 register by this routine since our caller will try to
27404 increment the returned register via an "la" instruction. */
27405
27406 rtx
27407 find_addr_reg (rtx addr)
27408 {
27409 while (GET_CODE (addr) == PLUS)
27410 {
27411 if (REG_P (XEXP (addr, 0))
27412 && REGNO (XEXP (addr, 0)) != 0)
27413 addr = XEXP (addr, 0);
27414 else if (REG_P (XEXP (addr, 1))
27415 && REGNO (XEXP (addr, 1)) != 0)
27416 addr = XEXP (addr, 1);
27417 else if (CONSTANT_P (XEXP (addr, 0)))
27418 addr = XEXP (addr, 1);
27419 else if (CONSTANT_P (XEXP (addr, 1)))
27420 addr = XEXP (addr, 0);
27421 else
27422 gcc_unreachable ();
27423 }
27424 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
27425 return addr;
27426 }
27427
27428 void
27429 rs6000_fatal_bad_address (rtx op)
27430 {
27431 fatal_insn ("bad address", op);
27432 }
27433
27434 #if TARGET_MACHO
27435
27436 vec<branch_island, va_gc> *branch_islands;
27437
27438 /* Remember to generate a branch island for far calls to the given
27439 function. */
27440
27441 static void
27442 add_compiler_branch_island (tree label_name, tree function_name,
27443 int line_number)
27444 {
27445 branch_island bi = {function_name, label_name, line_number};
27446 vec_safe_push (branch_islands, bi);
27447 }
27448
27449 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
27450 already there or not. */
27451
27452 static int
27453 no_previous_def (tree function_name)
27454 {
27455 branch_island *bi;
27456 unsigned ix;
27457
27458 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27459 if (function_name == bi->function_name)
27460 return 0;
27461 return 1;
27462 }
27463
27464 /* GET_PREV_LABEL gets the label name from the previous definition of
27465 the function. */
27466
27467 static tree
27468 get_prev_label (tree function_name)
27469 {
27470 branch_island *bi;
27471 unsigned ix;
27472
27473 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27474 if (function_name == bi->function_name)
27475 return bi->label_name;
27476 return NULL_TREE;
27477 }
27478
27479 /* Generate PIC and indirect symbol stubs. */
27480
27481 void
27482 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27483 {
27484 unsigned int length;
27485 char *symbol_name, *lazy_ptr_name;
27486 char *local_label_0;
27487 static unsigned label = 0;
27488
27489 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27490 symb = (*targetm.strip_name_encoding) (symb);
27491
27492
27493 length = strlen (symb);
27494 symbol_name = XALLOCAVEC (char, length + 32);
27495 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27496
27497 lazy_ptr_name = XALLOCAVEC (char, length + 32);
27498 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
27499
27500 if (flag_pic == 2)
27501 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
27502 else
27503 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
27504
27505 if (flag_pic == 2)
27506 {
27507 fprintf (file, "\t.align 5\n");
27508
27509 fprintf (file, "%s:\n", stub);
27510 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27511
27512 label++;
27513 local_label_0 = XALLOCAVEC (char, 16);
27514 sprintf (local_label_0, "L%u$spb", label);
27515
27516 fprintf (file, "\tmflr r0\n");
27517 if (TARGET_LINK_STACK)
27518 {
27519 char name[32];
27520 get_ppc476_thunk_name (name);
27521 fprintf (file, "\tbl %s\n", name);
27522 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27523 }
27524 else
27525 {
27526 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
27527 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27528 }
27529 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
27530 lazy_ptr_name, local_label_0);
27531 fprintf (file, "\tmtlr r0\n");
27532 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
27533 (TARGET_64BIT ? "ldu" : "lwzu"),
27534 lazy_ptr_name, local_label_0);
27535 fprintf (file, "\tmtctr r12\n");
27536 fprintf (file, "\tbctr\n");
27537 }
27538 else
27539 {
27540 fprintf (file, "\t.align 4\n");
27541
27542 fprintf (file, "%s:\n", stub);
27543 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27544
27545 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
27546 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
27547 (TARGET_64BIT ? "ldu" : "lwzu"),
27548 lazy_ptr_name);
27549 fprintf (file, "\tmtctr r12\n");
27550 fprintf (file, "\tbctr\n");
27551 }
27552
27553 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27554 fprintf (file, "%s:\n", lazy_ptr_name);
27555 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27556 fprintf (file, "%sdyld_stub_binding_helper\n",
27557 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
27558 }
27559
27560 /* Legitimize PIC addresses. If the address is already
27561 position-independent, we return ORIG. Newly generated
27562 position-independent addresses go into a reg. This is REG if non
27563 zero, otherwise we allocate register(s) as necessary. */
27564
27565 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
27566
27567 rtx
27568 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
27569 rtx reg)
27570 {
27571 rtx base, offset;
27572
27573 if (reg == NULL && !reload_completed)
27574 reg = gen_reg_rtx (Pmode);
27575
27576 if (GET_CODE (orig) == CONST)
27577 {
27578 rtx reg_temp;
27579
27580 if (GET_CODE (XEXP (orig, 0)) == PLUS
27581 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
27582 return orig;
27583
27584 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
27585
27586 /* Use a different reg for the intermediate value, as
27587 it will be marked UNCHANGING. */
27588 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
27589 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
27590 Pmode, reg_temp);
27591 offset =
27592 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
27593 Pmode, reg);
27594
27595 if (CONST_INT_P (offset))
27596 {
27597 if (SMALL_INT (offset))
27598 return plus_constant (Pmode, base, INTVAL (offset));
27599 else if (!reload_completed)
27600 offset = force_reg (Pmode, offset);
27601 else
27602 {
27603 rtx mem = force_const_mem (Pmode, orig);
27604 return machopic_legitimize_pic_address (mem, Pmode, reg);
27605 }
27606 }
27607 return gen_rtx_PLUS (Pmode, base, offset);
27608 }
27609
27610 /* Fall back on generic machopic code. */
27611 return machopic_legitimize_pic_address (orig, mode, reg);
27612 }
27613
27614 /* Output a .machine directive for the Darwin assembler, and call
27615 the generic start_file routine. */
27616
27617 static void
27618 rs6000_darwin_file_start (void)
27619 {
27620 static const struct
27621 {
27622 const char *arg;
27623 const char *name;
27624 HOST_WIDE_INT if_set;
27625 } mapping[] = {
27626 { "ppc64", "ppc64", MASK_64BIT },
27627 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
27628 { "power4", "ppc970", 0 },
27629 { "G5", "ppc970", 0 },
27630 { "7450", "ppc7450", 0 },
27631 { "7400", "ppc7400", MASK_ALTIVEC },
27632 { "G4", "ppc7400", 0 },
27633 { "750", "ppc750", 0 },
27634 { "740", "ppc750", 0 },
27635 { "G3", "ppc750", 0 },
27636 { "604e", "ppc604e", 0 },
27637 { "604", "ppc604", 0 },
27638 { "603e", "ppc603", 0 },
27639 { "603", "ppc603", 0 },
27640 { "601", "ppc601", 0 },
27641 { NULL, "ppc", 0 } };
27642 const char *cpu_id = "";
27643 size_t i;
27644
27645 rs6000_file_start ();
27646 darwin_file_start ();
27647
27648 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
27649
27650 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
27651 cpu_id = rs6000_default_cpu;
27652
27653 if (global_options_set.x_rs6000_cpu_index)
27654 cpu_id = processor_target_table[rs6000_cpu_index].name;
27655
27656 /* Look through the mapping array. Pick the first name that either
27657 matches the argument, has a bit set in IF_SET that is also set
27658 in the target flags, or has a NULL name. */
27659
27660 i = 0;
27661 while (mapping[i].arg != NULL
27662 && strcmp (mapping[i].arg, cpu_id) != 0
27663 && (mapping[i].if_set & rs6000_isa_flags) == 0)
27664 i++;
27665
27666 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
27667 }
27668
27669 #endif /* TARGET_MACHO */
27670
27671 #if TARGET_ELF
27672 static int
27673 rs6000_elf_reloc_rw_mask (void)
27674 {
27675 if (flag_pic)
27676 return 3;
27677 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27678 return 2;
27679 else
27680 return 0;
27681 }
27682
27683 /* Record an element in the table of global constructors. SYMBOL is
27684 a SYMBOL_REF of the function to be called; PRIORITY is a number
27685 between 0 and MAX_INIT_PRIORITY.
27686
27687 This differs from default_named_section_asm_out_constructor in
27688 that we have special handling for -mrelocatable. */
27689
27690 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
27691 static void
27692 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
27693 {
27694 const char *section = ".ctors";
27695 char buf[18];
27696
27697 if (priority != DEFAULT_INIT_PRIORITY)
27698 {
27699 sprintf (buf, ".ctors.%.5u",
27700 /* Invert the numbering so the linker puts us in the proper
27701 order; constructors are run from right to left, and the
27702 linker sorts in increasing order. */
27703 MAX_INIT_PRIORITY - priority);
27704 section = buf;
27705 }
27706
27707 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27708 assemble_align (POINTER_SIZE);
27709
27710 if (DEFAULT_ABI == ABI_V4
27711 && (TARGET_RELOCATABLE || flag_pic > 1))
27712 {
27713 fputs ("\t.long (", asm_out_file);
27714 output_addr_const (asm_out_file, symbol);
27715 fputs (")@fixup\n", asm_out_file);
27716 }
27717 else
27718 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27719 }
27720
27721 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
27722 static void
27723 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
27724 {
27725 const char *section = ".dtors";
27726 char buf[18];
27727
27728 if (priority != DEFAULT_INIT_PRIORITY)
27729 {
27730 sprintf (buf, ".dtors.%.5u",
27731 /* Invert the numbering so the linker puts us in the proper
27732 order; constructors are run from right to left, and the
27733 linker sorts in increasing order. */
27734 MAX_INIT_PRIORITY - priority);
27735 section = buf;
27736 }
27737
27738 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27739 assemble_align (POINTER_SIZE);
27740
27741 if (DEFAULT_ABI == ABI_V4
27742 && (TARGET_RELOCATABLE || flag_pic > 1))
27743 {
27744 fputs ("\t.long (", asm_out_file);
27745 output_addr_const (asm_out_file, symbol);
27746 fputs (")@fixup\n", asm_out_file);
27747 }
27748 else
27749 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27750 }
27751
27752 void
27753 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
27754 {
27755 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
27756 {
27757 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
27758 ASM_OUTPUT_LABEL (file, name);
27759 fputs (DOUBLE_INT_ASM_OP, file);
27760 rs6000_output_function_entry (file, name);
27761 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
27762 if (DOT_SYMBOLS)
27763 {
27764 fputs ("\t.size\t", file);
27765 assemble_name (file, name);
27766 fputs (",24\n\t.type\t.", file);
27767 assemble_name (file, name);
27768 fputs (",@function\n", file);
27769 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
27770 {
27771 fputs ("\t.globl\t.", file);
27772 assemble_name (file, name);
27773 putc ('\n', file);
27774 }
27775 }
27776 else
27777 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27778 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27779 rs6000_output_function_entry (file, name);
27780 fputs (":\n", file);
27781 return;
27782 }
27783
27784 int uses_toc;
27785 if (DEFAULT_ABI == ABI_V4
27786 && (TARGET_RELOCATABLE || flag_pic > 1)
27787 && !TARGET_SECURE_PLT
27788 && (!constant_pool_empty_p () || crtl->profile)
27789 && (uses_toc = uses_TOC ()))
27790 {
27791 char buf[256];
27792
27793 if (uses_toc == 2)
27794 switch_to_other_text_partition ();
27795 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27796
27797 fprintf (file, "\t.long ");
27798 assemble_name (file, toc_label_name);
27799 need_toc_init = 1;
27800 putc ('-', file);
27801 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27802 assemble_name (file, buf);
27803 putc ('\n', file);
27804 if (uses_toc == 2)
27805 switch_to_other_text_partition ();
27806 }
27807
27808 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27809 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27810
27811 if (TARGET_CMODEL == CMODEL_LARGE
27812 && rs6000_global_entry_point_prologue_needed_p ())
27813 {
27814 char buf[256];
27815
27816 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27817
27818 fprintf (file, "\t.quad .TOC.-");
27819 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27820 assemble_name (file, buf);
27821 putc ('\n', file);
27822 }
27823
27824 if (DEFAULT_ABI == ABI_AIX)
27825 {
27826 const char *desc_name, *orig_name;
27827
27828 orig_name = (*targetm.strip_name_encoding) (name);
27829 desc_name = orig_name;
27830 while (*desc_name == '.')
27831 desc_name++;
27832
27833 if (TREE_PUBLIC (decl))
27834 fprintf (file, "\t.globl %s\n", desc_name);
27835
27836 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27837 fprintf (file, "%s:\n", desc_name);
27838 fprintf (file, "\t.long %s\n", orig_name);
27839 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
27840 fputs ("\t.long 0\n", file);
27841 fprintf (file, "\t.previous\n");
27842 }
27843 ASM_OUTPUT_LABEL (file, name);
27844 }
27845
27846 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
27847 static void
27848 rs6000_elf_file_end (void)
27849 {
27850 #ifdef HAVE_AS_GNU_ATTRIBUTE
27851 /* ??? The value emitted depends on options active at file end.
27852 Assume anyone using #pragma or attributes that might change
27853 options knows what they are doing. */
27854 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
27855 && rs6000_passes_float)
27856 {
27857 int fp;
27858
27859 if (TARGET_HARD_FLOAT)
27860 fp = 1;
27861 else
27862 fp = 2;
27863 if (rs6000_passes_long_double)
27864 {
27865 if (!TARGET_LONG_DOUBLE_128)
27866 fp |= 2 * 4;
27867 else if (TARGET_IEEEQUAD)
27868 fp |= 3 * 4;
27869 else
27870 fp |= 1 * 4;
27871 }
27872 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
27873 }
27874 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
27875 {
27876 if (rs6000_passes_vector)
27877 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
27878 (TARGET_ALTIVEC_ABI ? 2 : 1));
27879 if (rs6000_returns_struct)
27880 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
27881 aix_struct_return ? 2 : 1);
27882 }
27883 #endif
27884 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27885 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
27886 file_end_indicate_exec_stack ();
27887 #endif
27888
27889 if (flag_split_stack)
27890 file_end_indicate_split_stack ();
27891
27892 if (cpu_builtin_p)
27893 {
27894 /* We have expanded a CPU builtin, so we need to emit a reference to
27895 the special symbol that LIBC uses to declare it supports the
27896 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
27897 switch_to_section (data_section);
27898 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
27899 fprintf (asm_out_file, "\t%s %s\n",
27900 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
27901 }
27902 }
27903 #endif
27904
27905 #if TARGET_XCOFF
27906
27907 #ifndef HAVE_XCOFF_DWARF_EXTRAS
27908 #define HAVE_XCOFF_DWARF_EXTRAS 0
27909 #endif
27910
27911 static enum unwind_info_type
27912 rs6000_xcoff_debug_unwind_info (void)
27913 {
27914 return UI_NONE;
27915 }
27916
27917 static void
27918 rs6000_xcoff_asm_output_anchor (rtx symbol)
27919 {
27920 char buffer[100];
27921
27922 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
27923 SYMBOL_REF_BLOCK_OFFSET (symbol));
27924 fprintf (asm_out_file, "%s", SET_ASM_OP);
27925 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
27926 fprintf (asm_out_file, ",");
27927 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
27928 fprintf (asm_out_file, "\n");
27929 }
27930
27931 static void
27932 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
27933 {
27934 fputs (GLOBAL_ASM_OP, stream);
27935 RS6000_OUTPUT_BASENAME (stream, name);
27936 putc ('\n', stream);
27937 }
27938
27939 /* A get_unnamed_decl callback, used for read-only sections. PTR
27940 points to the section string variable. */
27941
27942 static void
27943 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
27944 {
27945 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
27946 *(const char *const *) directive,
27947 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27948 }
27949
27950 /* Likewise for read-write sections. */
27951
27952 static void
27953 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
27954 {
27955 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
27956 *(const char *const *) directive,
27957 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27958 }
27959
27960 static void
27961 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
27962 {
27963 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
27964 *(const char *const *) directive,
27965 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27966 }
27967
27968 /* A get_unnamed_section callback, used for switching to toc_section. */
27969
27970 static void
27971 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27972 {
27973 if (TARGET_MINIMAL_TOC)
27974 {
27975 /* toc_section is always selected at least once from
27976 rs6000_xcoff_file_start, so this is guaranteed to
27977 always be defined once and only once in each file. */
27978 if (!toc_initialized)
27979 {
27980 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
27981 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
27982 toc_initialized = 1;
27983 }
27984 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
27985 (TARGET_32BIT ? "" : ",3"));
27986 }
27987 else
27988 fputs ("\t.toc\n", asm_out_file);
27989 }
27990
27991 /* Implement TARGET_ASM_INIT_SECTIONS. */
27992
27993 static void
27994 rs6000_xcoff_asm_init_sections (void)
27995 {
27996 read_only_data_section
27997 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
27998 &xcoff_read_only_section_name);
27999
28000 private_data_section
28001 = get_unnamed_section (SECTION_WRITE,
28002 rs6000_xcoff_output_readwrite_section_asm_op,
28003 &xcoff_private_data_section_name);
28004
28005 read_only_private_data_section
28006 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
28007 &xcoff_private_rodata_section_name);
28008
28009 tls_data_section
28010 = get_unnamed_section (SECTION_TLS,
28011 rs6000_xcoff_output_tls_section_asm_op,
28012 &xcoff_tls_data_section_name);
28013
28014 tls_private_data_section
28015 = get_unnamed_section (SECTION_TLS,
28016 rs6000_xcoff_output_tls_section_asm_op,
28017 &xcoff_private_data_section_name);
28018
28019 toc_section
28020 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
28021
28022 readonly_data_section = read_only_data_section;
28023 }
28024
28025 static int
28026 rs6000_xcoff_reloc_rw_mask (void)
28027 {
28028 return 3;
28029 }
28030
28031 static void
28032 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
28033 tree decl ATTRIBUTE_UNUSED)
28034 {
28035 int smclass;
28036 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
28037
28038 if (flags & SECTION_EXCLUDE)
28039 smclass = 4;
28040 else if (flags & SECTION_DEBUG)
28041 {
28042 fprintf (asm_out_file, "\t.dwsect %s\n", name);
28043 return;
28044 }
28045 else if (flags & SECTION_CODE)
28046 smclass = 0;
28047 else if (flags & SECTION_TLS)
28048 smclass = 3;
28049 else if (flags & SECTION_WRITE)
28050 smclass = 2;
28051 else
28052 smclass = 1;
28053
28054 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
28055 (flags & SECTION_CODE) ? "." : "",
28056 name, suffix[smclass], flags & SECTION_ENTSIZE);
28057 }
28058
28059 #define IN_NAMED_SECTION(DECL) \
28060 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
28061 && DECL_SECTION_NAME (DECL) != NULL)
28062
28063 static section *
28064 rs6000_xcoff_select_section (tree decl, int reloc,
28065 unsigned HOST_WIDE_INT align)
28066 {
28067 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
28068 named section. */
28069 if (align > BIGGEST_ALIGNMENT)
28070 {
28071 resolve_unique_section (decl, reloc, true);
28072 if (IN_NAMED_SECTION (decl))
28073 return get_named_section (decl, NULL, reloc);
28074 }
28075
28076 if (decl_readonly_section (decl, reloc))
28077 {
28078 if (TREE_PUBLIC (decl))
28079 return read_only_data_section;
28080 else
28081 return read_only_private_data_section;
28082 }
28083 else
28084 {
28085 #if HAVE_AS_TLS
28086 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28087 {
28088 if (TREE_PUBLIC (decl))
28089 return tls_data_section;
28090 else if (bss_initializer_p (decl))
28091 {
28092 /* Convert to COMMON to emit in BSS. */
28093 DECL_COMMON (decl) = 1;
28094 return tls_comm_section;
28095 }
28096 else
28097 return tls_private_data_section;
28098 }
28099 else
28100 #endif
28101 if (TREE_PUBLIC (decl))
28102 return data_section;
28103 else
28104 return private_data_section;
28105 }
28106 }
28107
28108 static void
28109 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
28110 {
28111 const char *name;
28112
28113 /* Use select_section for private data and uninitialized data with
28114 alignment <= BIGGEST_ALIGNMENT. */
28115 if (!TREE_PUBLIC (decl)
28116 || DECL_COMMON (decl)
28117 || (DECL_INITIAL (decl) == NULL_TREE
28118 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
28119 || DECL_INITIAL (decl) == error_mark_node
28120 || (flag_zero_initialized_in_bss
28121 && initializer_zerop (DECL_INITIAL (decl))))
28122 return;
28123
28124 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
28125 name = (*targetm.strip_name_encoding) (name);
28126 set_decl_section_name (decl, name);
28127 }
28128
28129 /* Select section for constant in constant pool.
28130
28131 On RS/6000, all constants are in the private read-only data area.
28132 However, if this is being placed in the TOC it must be output as a
28133 toc entry. */
28134
28135 static section *
28136 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
28137 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
28138 {
28139 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28140 return toc_section;
28141 else
28142 return read_only_private_data_section;
28143 }
28144
28145 /* Remove any trailing [DS] or the like from the symbol name. */
28146
28147 static const char *
28148 rs6000_xcoff_strip_name_encoding (const char *name)
28149 {
28150 size_t len;
28151 if (*name == '*')
28152 name++;
28153 len = strlen (name);
28154 if (name[len - 1] == ']')
28155 return ggc_alloc_string (name, len - 4);
28156 else
28157 return name;
28158 }
28159
28160 /* Section attributes. AIX is always PIC. */
28161
28162 static unsigned int
28163 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
28164 {
28165 unsigned int align;
28166 unsigned int flags = default_section_type_flags (decl, name, reloc);
28167
28168 /* Align to at least UNIT size. */
28169 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
28170 align = MIN_UNITS_PER_WORD;
28171 else
28172 /* Increase alignment of large objects if not already stricter. */
28173 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
28174 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
28175 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
28176
28177 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
28178 }
28179
28180 /* Output at beginning of assembler file.
28181
28182 Initialize the section names for the RS/6000 at this point.
28183
28184 Specify filename, including full path, to assembler.
28185
28186 We want to go into the TOC section so at least one .toc will be emitted.
28187 Also, in order to output proper .bs/.es pairs, we need at least one static
28188 [RW] section emitted.
28189
28190 Finally, declare mcount when profiling to make the assembler happy. */
28191
28192 static void
28193 rs6000_xcoff_file_start (void)
28194 {
28195 rs6000_gen_section_name (&xcoff_bss_section_name,
28196 main_input_filename, ".bss_");
28197 rs6000_gen_section_name (&xcoff_private_data_section_name,
28198 main_input_filename, ".rw_");
28199 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
28200 main_input_filename, ".rop_");
28201 rs6000_gen_section_name (&xcoff_read_only_section_name,
28202 main_input_filename, ".ro_");
28203 rs6000_gen_section_name (&xcoff_tls_data_section_name,
28204 main_input_filename, ".tls_");
28205 rs6000_gen_section_name (&xcoff_tbss_section_name,
28206 main_input_filename, ".tbss_[UL]");
28207
28208 fputs ("\t.file\t", asm_out_file);
28209 output_quoted_string (asm_out_file, main_input_filename);
28210 fputc ('\n', asm_out_file);
28211 if (write_symbols != NO_DEBUG)
28212 switch_to_section (private_data_section);
28213 switch_to_section (toc_section);
28214 switch_to_section (text_section);
28215 if (profile_flag)
28216 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
28217 rs6000_file_start ();
28218 }
28219
28220 /* Output at end of assembler file.
28221 On the RS/6000, referencing data should automatically pull in text. */
28222
28223 static void
28224 rs6000_xcoff_file_end (void)
28225 {
28226 switch_to_section (text_section);
28227 fputs ("_section_.text:\n", asm_out_file);
28228 switch_to_section (data_section);
28229 fputs (TARGET_32BIT
28230 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
28231 asm_out_file);
28232 }
28233
28234 struct declare_alias_data
28235 {
28236 FILE *file;
28237 bool function_descriptor;
28238 };
28239
28240 /* Declare alias N. A helper function for for_node_and_aliases. */
28241
28242 static bool
28243 rs6000_declare_alias (struct symtab_node *n, void *d)
28244 {
28245 struct declare_alias_data *data = (struct declare_alias_data *)d;
28246 /* Main symbol is output specially, because varasm machinery does part of
28247 the job for us - we do not need to declare .globl/lglobs and such. */
28248 if (!n->alias || n->weakref)
28249 return false;
28250
28251 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
28252 return false;
28253
28254 /* Prevent assemble_alias from trying to use .set pseudo operation
28255 that does not behave as expected by the middle-end. */
28256 TREE_ASM_WRITTEN (n->decl) = true;
28257
28258 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
28259 char *buffer = (char *) alloca (strlen (name) + 2);
28260 char *p;
28261 int dollar_inside = 0;
28262
28263 strcpy (buffer, name);
28264 p = strchr (buffer, '$');
28265 while (p) {
28266 *p = '_';
28267 dollar_inside++;
28268 p = strchr (p + 1, '$');
28269 }
28270 if (TREE_PUBLIC (n->decl))
28271 {
28272 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
28273 {
28274 if (dollar_inside) {
28275 if (data->function_descriptor)
28276 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28277 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28278 }
28279 if (data->function_descriptor)
28280 {
28281 fputs ("\t.globl .", data->file);
28282 RS6000_OUTPUT_BASENAME (data->file, buffer);
28283 putc ('\n', data->file);
28284 }
28285 fputs ("\t.globl ", data->file);
28286 RS6000_OUTPUT_BASENAME (data->file, buffer);
28287 putc ('\n', data->file);
28288 }
28289 #ifdef ASM_WEAKEN_DECL
28290 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
28291 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
28292 #endif
28293 }
28294 else
28295 {
28296 if (dollar_inside)
28297 {
28298 if (data->function_descriptor)
28299 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
28300 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
28301 }
28302 if (data->function_descriptor)
28303 {
28304 fputs ("\t.lglobl .", data->file);
28305 RS6000_OUTPUT_BASENAME (data->file, buffer);
28306 putc ('\n', data->file);
28307 }
28308 fputs ("\t.lglobl ", data->file);
28309 RS6000_OUTPUT_BASENAME (data->file, buffer);
28310 putc ('\n', data->file);
28311 }
28312 if (data->function_descriptor)
28313 fputs (".", data->file);
28314 RS6000_OUTPUT_BASENAME (data->file, buffer);
28315 fputs (":\n", data->file);
28316 return false;
28317 }
28318
28319
28320 #ifdef HAVE_GAS_HIDDEN
28321 /* Helper function to calculate visibility of a DECL
28322 and return the value as a const string. */
28323
28324 static const char *
28325 rs6000_xcoff_visibility (tree decl)
28326 {
28327 static const char * const visibility_types[] = {
28328 "", ",protected", ",hidden", ",internal"
28329 };
28330
28331 enum symbol_visibility vis = DECL_VISIBILITY (decl);
28332 return visibility_types[vis];
28333 }
28334 #endif
28335
28336
28337 /* This macro produces the initial definition of a function name.
28338 On the RS/6000, we need to place an extra '.' in the function name and
28339 output the function descriptor.
28340 Dollar signs are converted to underscores.
28341
28342 The csect for the function will have already been created when
28343 text_section was selected. We do have to go back to that csect, however.
28344
28345 The third and fourth parameters to the .function pseudo-op (16 and 044)
28346 are placeholders which no longer have any use.
28347
28348 Because AIX assembler's .set command has unexpected semantics, we output
28349 all aliases as alternative labels in front of the definition. */
28350
28351 void
28352 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
28353 {
28354 char *buffer = (char *) alloca (strlen (name) + 1);
28355 char *p;
28356 int dollar_inside = 0;
28357 struct declare_alias_data data = {file, false};
28358
28359 strcpy (buffer, name);
28360 p = strchr (buffer, '$');
28361 while (p) {
28362 *p = '_';
28363 dollar_inside++;
28364 p = strchr (p + 1, '$');
28365 }
28366 if (TREE_PUBLIC (decl))
28367 {
28368 if (!RS6000_WEAK || !DECL_WEAK (decl))
28369 {
28370 if (dollar_inside) {
28371 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28372 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28373 }
28374 fputs ("\t.globl .", file);
28375 RS6000_OUTPUT_BASENAME (file, buffer);
28376 #ifdef HAVE_GAS_HIDDEN
28377 fputs (rs6000_xcoff_visibility (decl), file);
28378 #endif
28379 putc ('\n', file);
28380 }
28381 }
28382 else
28383 {
28384 if (dollar_inside) {
28385 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
28386 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
28387 }
28388 fputs ("\t.lglobl .", file);
28389 RS6000_OUTPUT_BASENAME (file, buffer);
28390 putc ('\n', file);
28391 }
28392 fputs ("\t.csect ", file);
28393 RS6000_OUTPUT_BASENAME (file, buffer);
28394 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
28395 RS6000_OUTPUT_BASENAME (file, buffer);
28396 fputs (":\n", file);
28397 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28398 &data, true);
28399 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
28400 RS6000_OUTPUT_BASENAME (file, buffer);
28401 fputs (", TOC[tc0], 0\n", file);
28402 in_section = NULL;
28403 switch_to_section (function_section (decl));
28404 putc ('.', file);
28405 RS6000_OUTPUT_BASENAME (file, buffer);
28406 fputs (":\n", file);
28407 data.function_descriptor = true;
28408 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28409 &data, true);
28410 if (!DECL_IGNORED_P (decl))
28411 {
28412 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28413 xcoffout_declare_function (file, decl, buffer);
28414 else if (write_symbols == DWARF2_DEBUG)
28415 {
28416 name = (*targetm.strip_name_encoding) (name);
28417 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
28418 }
28419 }
28420 return;
28421 }
28422
28423
28424 /* Output assembly language to globalize a symbol from a DECL,
28425 possibly with visibility. */
28426
28427 void
28428 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
28429 {
28430 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
28431 fputs (GLOBAL_ASM_OP, stream);
28432 RS6000_OUTPUT_BASENAME (stream, name);
28433 #ifdef HAVE_GAS_HIDDEN
28434 fputs (rs6000_xcoff_visibility (decl), stream);
28435 #endif
28436 putc ('\n', stream);
28437 }
28438
28439 /* Output assembly language to define a symbol as COMMON from a DECL,
28440 possibly with visibility. */
28441
28442 void
28443 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
28444 tree decl ATTRIBUTE_UNUSED,
28445 const char *name,
28446 unsigned HOST_WIDE_INT size,
28447 unsigned HOST_WIDE_INT align)
28448 {
28449 unsigned HOST_WIDE_INT align2 = 2;
28450
28451 if (align > 32)
28452 align2 = floor_log2 (align / BITS_PER_UNIT);
28453 else if (size > 4)
28454 align2 = 3;
28455
28456 fputs (COMMON_ASM_OP, stream);
28457 RS6000_OUTPUT_BASENAME (stream, name);
28458
28459 fprintf (stream,
28460 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
28461 size, align2);
28462
28463 #ifdef HAVE_GAS_HIDDEN
28464 if (decl != NULL)
28465 fputs (rs6000_xcoff_visibility (decl), stream);
28466 #endif
28467 putc ('\n', stream);
28468 }
28469
28470 /* This macro produces the initial definition of a object (variable) name.
28471 Because AIX assembler's .set command has unexpected semantics, we output
28472 all aliases as alternative labels in front of the definition. */
28473
28474 void
28475 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
28476 {
28477 struct declare_alias_data data = {file, false};
28478 RS6000_OUTPUT_BASENAME (file, name);
28479 fputs (":\n", file);
28480 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
28481 &data, true);
28482 }
28483
28484 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
28485
28486 void
28487 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
28488 {
28489 fputs (integer_asm_op (size, FALSE), file);
28490 assemble_name (file, label);
28491 fputs ("-$", file);
28492 }
28493
28494 /* Output a symbol offset relative to the dbase for the current object.
28495 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
28496 signed offsets.
28497
28498 __gcc_unwind_dbase is embedded in all executables/libraries through
28499 libgcc/config/rs6000/crtdbase.S. */
28500
28501 void
28502 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
28503 {
28504 fputs (integer_asm_op (size, FALSE), file);
28505 assemble_name (file, label);
28506 fputs("-__gcc_unwind_dbase", file);
28507 }
28508
28509 #ifdef HAVE_AS_TLS
28510 static void
28511 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
28512 {
28513 rtx symbol;
28514 int flags;
28515 const char *symname;
28516
28517 default_encode_section_info (decl, rtl, first);
28518
28519 /* Careful not to prod global register variables. */
28520 if (!MEM_P (rtl))
28521 return;
28522 symbol = XEXP (rtl, 0);
28523 if (!SYMBOL_REF_P (symbol))
28524 return;
28525
28526 flags = SYMBOL_REF_FLAGS (symbol);
28527
28528 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
28529 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
28530
28531 SYMBOL_REF_FLAGS (symbol) = flags;
28532
28533 /* Append mapping class to extern decls. */
28534 symname = XSTR (symbol, 0);
28535 if (decl /* sync condition with assemble_external () */
28536 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
28537 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
28538 || TREE_CODE (decl) == FUNCTION_DECL)
28539 && symname[strlen (symname) - 1] != ']')
28540 {
28541 char *newname = (char *) alloca (strlen (symname) + 5);
28542 strcpy (newname, symname);
28543 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
28544 ? "[DS]" : "[UA]"));
28545 XSTR (symbol, 0) = ggc_strdup (newname);
28546 }
28547 }
28548 #endif /* HAVE_AS_TLS */
28549 #endif /* TARGET_XCOFF */
28550
28551 void
28552 rs6000_asm_weaken_decl (FILE *stream, tree decl,
28553 const char *name, const char *val)
28554 {
28555 fputs ("\t.weak\t", stream);
28556 RS6000_OUTPUT_BASENAME (stream, name);
28557 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28558 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28559 {
28560 if (TARGET_XCOFF)
28561 fputs ("[DS]", stream);
28562 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28563 if (TARGET_XCOFF)
28564 fputs (rs6000_xcoff_visibility (decl), stream);
28565 #endif
28566 fputs ("\n\t.weak\t.", stream);
28567 RS6000_OUTPUT_BASENAME (stream, name);
28568 }
28569 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
28570 if (TARGET_XCOFF)
28571 fputs (rs6000_xcoff_visibility (decl), stream);
28572 #endif
28573 fputc ('\n', stream);
28574 if (val)
28575 {
28576 #ifdef ASM_OUTPUT_DEF
28577 ASM_OUTPUT_DEF (stream, name, val);
28578 #endif
28579 if (decl && TREE_CODE (decl) == FUNCTION_DECL
28580 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
28581 {
28582 fputs ("\t.set\t.", stream);
28583 RS6000_OUTPUT_BASENAME (stream, name);
28584 fputs (",.", stream);
28585 RS6000_OUTPUT_BASENAME (stream, val);
28586 fputc ('\n', stream);
28587 }
28588 }
28589 }
28590
28591
28592 /* Return true if INSN should not be copied. */
28593
28594 static bool
28595 rs6000_cannot_copy_insn_p (rtx_insn *insn)
28596 {
28597 return recog_memoized (insn) >= 0
28598 && get_attr_cannot_copy (insn);
28599 }
28600
28601 /* Compute a (partial) cost for rtx X. Return true if the complete
28602 cost has been computed, and false if subexpressions should be
28603 scanned. In either case, *TOTAL contains the cost result. */
28604
28605 static bool
28606 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
28607 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
28608 {
28609 int code = GET_CODE (x);
28610
28611 switch (code)
28612 {
28613 /* On the RS/6000, if it is valid in the insn, it is free. */
28614 case CONST_INT:
28615 if (((outer_code == SET
28616 || outer_code == PLUS
28617 || outer_code == MINUS)
28618 && (satisfies_constraint_I (x)
28619 || satisfies_constraint_L (x)))
28620 || (outer_code == AND
28621 && (satisfies_constraint_K (x)
28622 || (mode == SImode
28623 ? satisfies_constraint_L (x)
28624 : satisfies_constraint_J (x))))
28625 || ((outer_code == IOR || outer_code == XOR)
28626 && (satisfies_constraint_K (x)
28627 || (mode == SImode
28628 ? satisfies_constraint_L (x)
28629 : satisfies_constraint_J (x))))
28630 || outer_code == ASHIFT
28631 || outer_code == ASHIFTRT
28632 || outer_code == LSHIFTRT
28633 || outer_code == ROTATE
28634 || outer_code == ROTATERT
28635 || outer_code == ZERO_EXTRACT
28636 || (outer_code == MULT
28637 && satisfies_constraint_I (x))
28638 || ((outer_code == DIV || outer_code == UDIV
28639 || outer_code == MOD || outer_code == UMOD)
28640 && exact_log2 (INTVAL (x)) >= 0)
28641 || (outer_code == COMPARE
28642 && (satisfies_constraint_I (x)
28643 || satisfies_constraint_K (x)))
28644 || ((outer_code == EQ || outer_code == NE)
28645 && (satisfies_constraint_I (x)
28646 || satisfies_constraint_K (x)
28647 || (mode == SImode
28648 ? satisfies_constraint_L (x)
28649 : satisfies_constraint_J (x))))
28650 || (outer_code == GTU
28651 && satisfies_constraint_I (x))
28652 || (outer_code == LTU
28653 && satisfies_constraint_P (x)))
28654 {
28655 *total = 0;
28656 return true;
28657 }
28658 else if ((outer_code == PLUS
28659 && reg_or_add_cint_operand (x, VOIDmode))
28660 || (outer_code == MINUS
28661 && reg_or_sub_cint_operand (x, VOIDmode))
28662 || ((outer_code == SET
28663 || outer_code == IOR
28664 || outer_code == XOR)
28665 && (INTVAL (x)
28666 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
28667 {
28668 *total = COSTS_N_INSNS (1);
28669 return true;
28670 }
28671 /* FALLTHRU */
28672
28673 case CONST_DOUBLE:
28674 case CONST_WIDE_INT:
28675 case CONST:
28676 case HIGH:
28677 case SYMBOL_REF:
28678 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28679 return true;
28680
28681 case MEM:
28682 /* When optimizing for size, MEM should be slightly more expensive
28683 than generating address, e.g., (plus (reg) (const)).
28684 L1 cache latency is about two instructions. */
28685 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28686 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
28687 *total += COSTS_N_INSNS (100);
28688 return true;
28689
28690 case LABEL_REF:
28691 *total = 0;
28692 return true;
28693
28694 case PLUS:
28695 case MINUS:
28696 if (FLOAT_MODE_P (mode))
28697 *total = rs6000_cost->fp;
28698 else
28699 *total = COSTS_N_INSNS (1);
28700 return false;
28701
28702 case MULT:
28703 if (CONST_INT_P (XEXP (x, 1))
28704 && satisfies_constraint_I (XEXP (x, 1)))
28705 {
28706 if (INTVAL (XEXP (x, 1)) >= -256
28707 && INTVAL (XEXP (x, 1)) <= 255)
28708 *total = rs6000_cost->mulsi_const9;
28709 else
28710 *total = rs6000_cost->mulsi_const;
28711 }
28712 else if (mode == SFmode)
28713 *total = rs6000_cost->fp;
28714 else if (FLOAT_MODE_P (mode))
28715 *total = rs6000_cost->dmul;
28716 else if (mode == DImode)
28717 *total = rs6000_cost->muldi;
28718 else
28719 *total = rs6000_cost->mulsi;
28720 return false;
28721
28722 case FMA:
28723 if (mode == SFmode)
28724 *total = rs6000_cost->fp;
28725 else
28726 *total = rs6000_cost->dmul;
28727 break;
28728
28729 case DIV:
28730 case MOD:
28731 if (FLOAT_MODE_P (mode))
28732 {
28733 *total = mode == DFmode ? rs6000_cost->ddiv
28734 : rs6000_cost->sdiv;
28735 return false;
28736 }
28737 /* FALLTHRU */
28738
28739 case UDIV:
28740 case UMOD:
28741 if (CONST_INT_P (XEXP (x, 1))
28742 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
28743 {
28744 if (code == DIV || code == MOD)
28745 /* Shift, addze */
28746 *total = COSTS_N_INSNS (2);
28747 else
28748 /* Shift */
28749 *total = COSTS_N_INSNS (1);
28750 }
28751 else
28752 {
28753 if (GET_MODE (XEXP (x, 1)) == DImode)
28754 *total = rs6000_cost->divdi;
28755 else
28756 *total = rs6000_cost->divsi;
28757 }
28758 /* Add in shift and subtract for MOD unless we have a mod instruction. */
28759 if (!TARGET_MODULO && (code == MOD || code == UMOD))
28760 *total += COSTS_N_INSNS (2);
28761 return false;
28762
28763 case CTZ:
28764 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
28765 return false;
28766
28767 case FFS:
28768 *total = COSTS_N_INSNS (4);
28769 return false;
28770
28771 case POPCOUNT:
28772 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
28773 return false;
28774
28775 case PARITY:
28776 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
28777 return false;
28778
28779 case NOT:
28780 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
28781 *total = 0;
28782 else
28783 *total = COSTS_N_INSNS (1);
28784 return false;
28785
28786 case AND:
28787 if (CONST_INT_P (XEXP (x, 1)))
28788 {
28789 rtx left = XEXP (x, 0);
28790 rtx_code left_code = GET_CODE (left);
28791
28792 /* rotate-and-mask: 1 insn. */
28793 if ((left_code == ROTATE
28794 || left_code == ASHIFT
28795 || left_code == LSHIFTRT)
28796 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
28797 {
28798 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
28799 if (!CONST_INT_P (XEXP (left, 1)))
28800 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
28801 *total += COSTS_N_INSNS (1);
28802 return true;
28803 }
28804
28805 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
28806 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
28807 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
28808 || (val & 0xffff) == val
28809 || (val & 0xffff0000) == val
28810 || ((val & 0xffff) == 0 && mode == SImode))
28811 {
28812 *total = rtx_cost (left, mode, AND, 0, speed);
28813 *total += COSTS_N_INSNS (1);
28814 return true;
28815 }
28816
28817 /* 2 insns. */
28818 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
28819 {
28820 *total = rtx_cost (left, mode, AND, 0, speed);
28821 *total += COSTS_N_INSNS (2);
28822 return true;
28823 }
28824 }
28825
28826 *total = COSTS_N_INSNS (1);
28827 return false;
28828
28829 case IOR:
28830 /* FIXME */
28831 *total = COSTS_N_INSNS (1);
28832 return true;
28833
28834 case CLZ:
28835 case XOR:
28836 case ZERO_EXTRACT:
28837 *total = COSTS_N_INSNS (1);
28838 return false;
28839
28840 case ASHIFT:
28841 /* The EXTSWSLI instruction is a combined instruction. Don't count both
28842 the sign extend and shift separately within the insn. */
28843 if (TARGET_EXTSWSLI && mode == DImode
28844 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
28845 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
28846 {
28847 *total = 0;
28848 return false;
28849 }
28850 /* fall through */
28851
28852 case ASHIFTRT:
28853 case LSHIFTRT:
28854 case ROTATE:
28855 case ROTATERT:
28856 /* Handle mul_highpart. */
28857 if (outer_code == TRUNCATE
28858 && GET_CODE (XEXP (x, 0)) == MULT)
28859 {
28860 if (mode == DImode)
28861 *total = rs6000_cost->muldi;
28862 else
28863 *total = rs6000_cost->mulsi;
28864 return true;
28865 }
28866 else if (outer_code == AND)
28867 *total = 0;
28868 else
28869 *total = COSTS_N_INSNS (1);
28870 return false;
28871
28872 case SIGN_EXTEND:
28873 case ZERO_EXTEND:
28874 if (MEM_P (XEXP (x, 0)))
28875 *total = 0;
28876 else
28877 *total = COSTS_N_INSNS (1);
28878 return false;
28879
28880 case COMPARE:
28881 case NEG:
28882 case ABS:
28883 if (!FLOAT_MODE_P (mode))
28884 {
28885 *total = COSTS_N_INSNS (1);
28886 return false;
28887 }
28888 /* FALLTHRU */
28889
28890 case FLOAT:
28891 case UNSIGNED_FLOAT:
28892 case FIX:
28893 case UNSIGNED_FIX:
28894 case FLOAT_TRUNCATE:
28895 *total = rs6000_cost->fp;
28896 return false;
28897
28898 case FLOAT_EXTEND:
28899 if (mode == DFmode)
28900 *total = rs6000_cost->sfdf_convert;
28901 else
28902 *total = rs6000_cost->fp;
28903 return false;
28904
28905 case UNSPEC:
28906 switch (XINT (x, 1))
28907 {
28908 case UNSPEC_FRSP:
28909 *total = rs6000_cost->fp;
28910 return true;
28911
28912 default:
28913 break;
28914 }
28915 break;
28916
28917 case CALL:
28918 case IF_THEN_ELSE:
28919 if (!speed)
28920 {
28921 *total = COSTS_N_INSNS (1);
28922 return true;
28923 }
28924 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
28925 {
28926 *total = rs6000_cost->fp;
28927 return false;
28928 }
28929 break;
28930
28931 case NE:
28932 case EQ:
28933 case GTU:
28934 case LTU:
28935 /* Carry bit requires mode == Pmode.
28936 NEG or PLUS already counted so only add one. */
28937 if (mode == Pmode
28938 && (outer_code == NEG || outer_code == PLUS))
28939 {
28940 *total = COSTS_N_INSNS (1);
28941 return true;
28942 }
28943 /* FALLTHRU */
28944
28945 case GT:
28946 case LT:
28947 case UNORDERED:
28948 if (outer_code == SET)
28949 {
28950 if (XEXP (x, 1) == const0_rtx)
28951 {
28952 *total = COSTS_N_INSNS (2);
28953 return true;
28954 }
28955 else
28956 {
28957 *total = COSTS_N_INSNS (3);
28958 return false;
28959 }
28960 }
28961 /* CC COMPARE. */
28962 if (outer_code == COMPARE)
28963 {
28964 *total = 0;
28965 return true;
28966 }
28967 break;
28968
28969 default:
28970 break;
28971 }
28972
28973 return false;
28974 }
28975
28976 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
28977
28978 static bool
28979 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
28980 int opno, int *total, bool speed)
28981 {
28982 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
28983
28984 fprintf (stderr,
28985 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
28986 "opno = %d, total = %d, speed = %s, x:\n",
28987 ret ? "complete" : "scan inner",
28988 GET_MODE_NAME (mode),
28989 GET_RTX_NAME (outer_code),
28990 opno,
28991 *total,
28992 speed ? "true" : "false");
28993
28994 debug_rtx (x);
28995
28996 return ret;
28997 }
28998
28999 static int
29000 rs6000_insn_cost (rtx_insn *insn, bool speed)
29001 {
29002 if (recog_memoized (insn) < 0)
29003 return 0;
29004
29005 if (!speed)
29006 return get_attr_length (insn);
29007
29008 int cost = get_attr_cost (insn);
29009 if (cost > 0)
29010 return cost;
29011
29012 int n = get_attr_length (insn) / 4;
29013 enum attr_type type = get_attr_type (insn);
29014
29015 switch (type)
29016 {
29017 case TYPE_LOAD:
29018 case TYPE_FPLOAD:
29019 case TYPE_VECLOAD:
29020 cost = COSTS_N_INSNS (n + 1);
29021 break;
29022
29023 case TYPE_MUL:
29024 switch (get_attr_size (insn))
29025 {
29026 case SIZE_8:
29027 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
29028 break;
29029 case SIZE_16:
29030 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
29031 break;
29032 case SIZE_32:
29033 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
29034 break;
29035 case SIZE_64:
29036 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
29037 break;
29038 default:
29039 gcc_unreachable ();
29040 }
29041 break;
29042 case TYPE_DIV:
29043 switch (get_attr_size (insn))
29044 {
29045 case SIZE_32:
29046 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
29047 break;
29048 case SIZE_64:
29049 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
29050 break;
29051 default:
29052 gcc_unreachable ();
29053 }
29054 break;
29055
29056 case TYPE_FP:
29057 cost = n * rs6000_cost->fp;
29058 break;
29059 case TYPE_DMUL:
29060 cost = n * rs6000_cost->dmul;
29061 break;
29062 case TYPE_SDIV:
29063 cost = n * rs6000_cost->sdiv;
29064 break;
29065 case TYPE_DDIV:
29066 cost = n * rs6000_cost->ddiv;
29067 break;
29068
29069 case TYPE_SYNC:
29070 case TYPE_LOAD_L:
29071 case TYPE_MFCR:
29072 case TYPE_MFCRF:
29073 cost = COSTS_N_INSNS (n + 2);
29074 break;
29075
29076 default:
29077 cost = COSTS_N_INSNS (n);
29078 }
29079
29080 return cost;
29081 }
29082
29083 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
29084
29085 static int
29086 rs6000_debug_address_cost (rtx x, machine_mode mode,
29087 addr_space_t as, bool speed)
29088 {
29089 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
29090
29091 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
29092 ret, speed ? "true" : "false");
29093 debug_rtx (x);
29094
29095 return ret;
29096 }
29097
29098
29099 /* A C expression returning the cost of moving data from a register of class
29100 CLASS1 to one of CLASS2. */
29101
29102 static int
29103 rs6000_register_move_cost (machine_mode mode,
29104 reg_class_t from, reg_class_t to)
29105 {
29106 int ret;
29107 reg_class_t rclass;
29108
29109 if (TARGET_DEBUG_COST)
29110 dbg_cost_ctrl++;
29111
29112 /* If we have VSX, we can easily move between FPR or Altivec registers,
29113 otherwise we can only easily move within classes.
29114 Do this first so we give best-case answers for union classes
29115 containing both gprs and vsx regs. */
29116 HARD_REG_SET to_vsx, from_vsx;
29117 COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
29118 AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
29119 COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
29120 AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
29121 if (!hard_reg_set_empty_p (to_vsx)
29122 && !hard_reg_set_empty_p (from_vsx)
29123 && (TARGET_VSX
29124 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
29125 {
29126 int reg = FIRST_FPR_REGNO;
29127 if (TARGET_VSX
29128 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
29129 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
29130 reg = FIRST_ALTIVEC_REGNO;
29131 ret = 2 * hard_regno_nregs (reg, mode);
29132 }
29133
29134 /* Moves from/to GENERAL_REGS. */
29135 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
29136 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
29137 {
29138 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
29139 {
29140 if (TARGET_DIRECT_MOVE)
29141 {
29142 /* Keep the cost for direct moves above that for within
29143 a register class even if the actual processor cost is
29144 comparable. We do this because a direct move insn
29145 can't be a nop, whereas with ideal register
29146 allocation a move within the same class might turn
29147 out to be a nop. */
29148 if (rs6000_tune == PROCESSOR_POWER9
29149 || rs6000_tune == PROCESSOR_FUTURE)
29150 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29151 else
29152 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29153 /* SFmode requires a conversion when moving between gprs
29154 and vsx. */
29155 if (mode == SFmode)
29156 ret += 2;
29157 }
29158 else
29159 ret = (rs6000_memory_move_cost (mode, rclass, false)
29160 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
29161 }
29162
29163 /* It's more expensive to move CR_REGS than CR0_REGS because of the
29164 shift. */
29165 else if (rclass == CR_REGS)
29166 ret = 4;
29167
29168 /* For those processors that have slow LR/CTR moves, make them more
29169 expensive than memory in order to bias spills to memory .*/
29170 else if ((rs6000_tune == PROCESSOR_POWER6
29171 || rs6000_tune == PROCESSOR_POWER7
29172 || rs6000_tune == PROCESSOR_POWER8
29173 || rs6000_tune == PROCESSOR_POWER9)
29174 && reg_class_subset_p (rclass, SPECIAL_REGS))
29175 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29176
29177 else
29178 /* A move will cost one instruction per GPR moved. */
29179 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
29180 }
29181
29182 /* Everything else has to go through GENERAL_REGS. */
29183 else
29184 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
29185 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
29186
29187 if (TARGET_DEBUG_COST)
29188 {
29189 if (dbg_cost_ctrl == 1)
29190 fprintf (stderr,
29191 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
29192 ret, GET_MODE_NAME (mode), reg_class_names[from],
29193 reg_class_names[to]);
29194 dbg_cost_ctrl--;
29195 }
29196
29197 return ret;
29198 }
29199
29200 /* A C expressions returning the cost of moving data of MODE from a register to
29201 or from memory. */
29202
29203 static int
29204 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
29205 bool in ATTRIBUTE_UNUSED)
29206 {
29207 int ret;
29208
29209 if (TARGET_DEBUG_COST)
29210 dbg_cost_ctrl++;
29211
29212 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
29213 ret = 4 * hard_regno_nregs (0, mode);
29214 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
29215 || reg_classes_intersect_p (rclass, VSX_REGS)))
29216 ret = 4 * hard_regno_nregs (32, mode);
29217 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
29218 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
29219 else
29220 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
29221
29222 if (TARGET_DEBUG_COST)
29223 {
29224 if (dbg_cost_ctrl == 1)
29225 fprintf (stderr,
29226 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
29227 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
29228 dbg_cost_ctrl--;
29229 }
29230
29231 return ret;
29232 }
29233
29234 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
29235
29236 The register allocator chooses GEN_OR_VSX_REGS for the allocno
29237 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
29238 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
29239 move cost between GENERAL_REGS and VSX_REGS low.
29240
29241 It might seem reasonable to use a union class. After all, if usage
29242 of vsr is low and gpr high, it might make sense to spill gpr to vsr
29243 rather than memory. However, in cases where register pressure of
29244 both is high, like the cactus_adm spec test, allowing
29245 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
29246 the first scheduling pass. This is partly due to an allocno of
29247 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
29248 class, which gives too high a pressure for GENERAL_REGS and too low
29249 for VSX_REGS. So, force a choice of the subclass here.
29250
29251 The best class is also the union if GENERAL_REGS and VSX_REGS have
29252 the same cost. In that case we do use GEN_OR_VSX_REGS as the
29253 allocno class, since trying to narrow down the class by regno mode
29254 is prone to error. For example, SImode is allowed in VSX regs and
29255 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
29256 it would be wrong to choose an allocno of GENERAL_REGS based on
29257 SImode. */
29258
29259 static reg_class_t
29260 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
29261 reg_class_t allocno_class,
29262 reg_class_t best_class)
29263 {
29264 switch (allocno_class)
29265 {
29266 case GEN_OR_VSX_REGS:
29267 /* best_class must be a subset of allocno_class. */
29268 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
29269 || best_class == GEN_OR_FLOAT_REGS
29270 || best_class == VSX_REGS
29271 || best_class == ALTIVEC_REGS
29272 || best_class == FLOAT_REGS
29273 || best_class == GENERAL_REGS
29274 || best_class == BASE_REGS);
29275 /* Use best_class but choose wider classes when copying from the
29276 wider class to best_class is cheap. This mimics IRA choice
29277 of allocno class. */
29278 if (best_class == BASE_REGS)
29279 return GENERAL_REGS;
29280 if (TARGET_VSX
29281 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
29282 return VSX_REGS;
29283 return best_class;
29284
29285 default:
29286 break;
29287 }
29288
29289 return allocno_class;
29290 }
29291
29292 /* Returns a code for a target-specific builtin that implements
29293 reciprocal of the function, or NULL_TREE if not available. */
29294
29295 static tree
29296 rs6000_builtin_reciprocal (tree fndecl)
29297 {
29298 switch (DECL_FUNCTION_CODE (fndecl))
29299 {
29300 case VSX_BUILTIN_XVSQRTDP:
29301 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
29302 return NULL_TREE;
29303
29304 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
29305
29306 case VSX_BUILTIN_XVSQRTSP:
29307 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
29308 return NULL_TREE;
29309
29310 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
29311
29312 default:
29313 return NULL_TREE;
29314 }
29315 }
29316
29317 /* Load up a constant. If the mode is a vector mode, splat the value across
29318 all of the vector elements. */
29319
29320 static rtx
29321 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
29322 {
29323 rtx reg;
29324
29325 if (mode == SFmode || mode == DFmode)
29326 {
29327 rtx d = const_double_from_real_value (dconst, mode);
29328 reg = force_reg (mode, d);
29329 }
29330 else if (mode == V4SFmode)
29331 {
29332 rtx d = const_double_from_real_value (dconst, SFmode);
29333 rtvec v = gen_rtvec (4, d, d, d, d);
29334 reg = gen_reg_rtx (mode);
29335 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29336 }
29337 else if (mode == V2DFmode)
29338 {
29339 rtx d = const_double_from_real_value (dconst, DFmode);
29340 rtvec v = gen_rtvec (2, d, d);
29341 reg = gen_reg_rtx (mode);
29342 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
29343 }
29344 else
29345 gcc_unreachable ();
29346
29347 return reg;
29348 }
29349
29350 /* Generate an FMA instruction. */
29351
29352 static void
29353 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
29354 {
29355 machine_mode mode = GET_MODE (target);
29356 rtx dst;
29357
29358 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
29359 gcc_assert (dst != NULL);
29360
29361 if (dst != target)
29362 emit_move_insn (target, dst);
29363 }
29364
29365 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
29366
29367 static void
29368 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
29369 {
29370 machine_mode mode = GET_MODE (dst);
29371 rtx r;
29372
29373 /* This is a tad more complicated, since the fnma_optab is for
29374 a different expression: fma(-m1, m2, a), which is the same
29375 thing except in the case of signed zeros.
29376
29377 Fortunately we know that if FMA is supported that FNMSUB is
29378 also supported in the ISA. Just expand it directly. */
29379
29380 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
29381
29382 r = gen_rtx_NEG (mode, a);
29383 r = gen_rtx_FMA (mode, m1, m2, r);
29384 r = gen_rtx_NEG (mode, r);
29385 emit_insn (gen_rtx_SET (dst, r));
29386 }
29387
29388 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
29389 add a reg_note saying that this was a division. Support both scalar and
29390 vector divide. Assumes no trapping math and finite arguments. */
29391
29392 void
29393 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
29394 {
29395 machine_mode mode = GET_MODE (dst);
29396 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
29397 int i;
29398
29399 /* Low precision estimates guarantee 5 bits of accuracy. High
29400 precision estimates guarantee 14 bits of accuracy. SFmode
29401 requires 23 bits of accuracy. DFmode requires 52 bits of
29402 accuracy. Each pass at least doubles the accuracy, leading
29403 to the following. */
29404 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29405 if (mode == DFmode || mode == V2DFmode)
29406 passes++;
29407
29408 enum insn_code code = optab_handler (smul_optab, mode);
29409 insn_gen_fn gen_mul = GEN_FCN (code);
29410
29411 gcc_assert (code != CODE_FOR_nothing);
29412
29413 one = rs6000_load_constant_and_splat (mode, dconst1);
29414
29415 /* x0 = 1./d estimate */
29416 x0 = gen_reg_rtx (mode);
29417 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
29418 UNSPEC_FRES)));
29419
29420 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
29421 if (passes > 1) {
29422
29423 /* e0 = 1. - d * x0 */
29424 e0 = gen_reg_rtx (mode);
29425 rs6000_emit_nmsub (e0, d, x0, one);
29426
29427 /* x1 = x0 + e0 * x0 */
29428 x1 = gen_reg_rtx (mode);
29429 rs6000_emit_madd (x1, e0, x0, x0);
29430
29431 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
29432 ++i, xprev = xnext, eprev = enext) {
29433
29434 /* enext = eprev * eprev */
29435 enext = gen_reg_rtx (mode);
29436 emit_insn (gen_mul (enext, eprev, eprev));
29437
29438 /* xnext = xprev + enext * xprev */
29439 xnext = gen_reg_rtx (mode);
29440 rs6000_emit_madd (xnext, enext, xprev, xprev);
29441 }
29442
29443 } else
29444 xprev = x0;
29445
29446 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
29447
29448 /* u = n * xprev */
29449 u = gen_reg_rtx (mode);
29450 emit_insn (gen_mul (u, n, xprev));
29451
29452 /* v = n - (d * u) */
29453 v = gen_reg_rtx (mode);
29454 rs6000_emit_nmsub (v, d, u, n);
29455
29456 /* dst = (v * xprev) + u */
29457 rs6000_emit_madd (dst, v, xprev, u);
29458
29459 if (note_p)
29460 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
29461 }
29462
29463 /* Goldschmidt's Algorithm for single/double-precision floating point
29464 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
29465
29466 void
29467 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
29468 {
29469 machine_mode mode = GET_MODE (src);
29470 rtx e = gen_reg_rtx (mode);
29471 rtx g = gen_reg_rtx (mode);
29472 rtx h = gen_reg_rtx (mode);
29473
29474 /* Low precision estimates guarantee 5 bits of accuracy. High
29475 precision estimates guarantee 14 bits of accuracy. SFmode
29476 requires 23 bits of accuracy. DFmode requires 52 bits of
29477 accuracy. Each pass at least doubles the accuracy, leading
29478 to the following. */
29479 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
29480 if (mode == DFmode || mode == V2DFmode)
29481 passes++;
29482
29483 int i;
29484 rtx mhalf;
29485 enum insn_code code = optab_handler (smul_optab, mode);
29486 insn_gen_fn gen_mul = GEN_FCN (code);
29487
29488 gcc_assert (code != CODE_FOR_nothing);
29489
29490 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
29491
29492 /* e = rsqrt estimate */
29493 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
29494 UNSPEC_RSQRT)));
29495
29496 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
29497 if (!recip)
29498 {
29499 rtx zero = force_reg (mode, CONST0_RTX (mode));
29500
29501 if (mode == SFmode)
29502 {
29503 rtx target = emit_conditional_move (e, GT, src, zero, mode,
29504 e, zero, mode, 0);
29505 if (target != e)
29506 emit_move_insn (e, target);
29507 }
29508 else
29509 {
29510 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
29511 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
29512 }
29513 }
29514
29515 /* g = sqrt estimate. */
29516 emit_insn (gen_mul (g, e, src));
29517 /* h = 1/(2*sqrt) estimate. */
29518 emit_insn (gen_mul (h, e, mhalf));
29519
29520 if (recip)
29521 {
29522 if (passes == 1)
29523 {
29524 rtx t = gen_reg_rtx (mode);
29525 rs6000_emit_nmsub (t, g, h, mhalf);
29526 /* Apply correction directly to 1/rsqrt estimate. */
29527 rs6000_emit_madd (dst, e, t, e);
29528 }
29529 else
29530 {
29531 for (i = 0; i < passes; i++)
29532 {
29533 rtx t1 = gen_reg_rtx (mode);
29534 rtx g1 = gen_reg_rtx (mode);
29535 rtx h1 = gen_reg_rtx (mode);
29536
29537 rs6000_emit_nmsub (t1, g, h, mhalf);
29538 rs6000_emit_madd (g1, g, t1, g);
29539 rs6000_emit_madd (h1, h, t1, h);
29540
29541 g = g1;
29542 h = h1;
29543 }
29544 /* Multiply by 2 for 1/rsqrt. */
29545 emit_insn (gen_add3_insn (dst, h, h));
29546 }
29547 }
29548 else
29549 {
29550 rtx t = gen_reg_rtx (mode);
29551 rs6000_emit_nmsub (t, g, h, mhalf);
29552 rs6000_emit_madd (dst, g, t, g);
29553 }
29554
29555 return;
29556 }
29557
29558 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
29559 (Power7) targets. DST is the target, and SRC is the argument operand. */
29560
29561 void
29562 rs6000_emit_popcount (rtx dst, rtx src)
29563 {
29564 machine_mode mode = GET_MODE (dst);
29565 rtx tmp1, tmp2;
29566
29567 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
29568 if (TARGET_POPCNTD)
29569 {
29570 if (mode == SImode)
29571 emit_insn (gen_popcntdsi2 (dst, src));
29572 else
29573 emit_insn (gen_popcntddi2 (dst, src));
29574 return;
29575 }
29576
29577 tmp1 = gen_reg_rtx (mode);
29578
29579 if (mode == SImode)
29580 {
29581 emit_insn (gen_popcntbsi2 (tmp1, src));
29582 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
29583 NULL_RTX, 0);
29584 tmp2 = force_reg (SImode, tmp2);
29585 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
29586 }
29587 else
29588 {
29589 emit_insn (gen_popcntbdi2 (tmp1, src));
29590 tmp2 = expand_mult (DImode, tmp1,
29591 GEN_INT ((HOST_WIDE_INT)
29592 0x01010101 << 32 | 0x01010101),
29593 NULL_RTX, 0);
29594 tmp2 = force_reg (DImode, tmp2);
29595 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
29596 }
29597 }
29598
29599
29600 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
29601 target, and SRC is the argument operand. */
29602
29603 void
29604 rs6000_emit_parity (rtx dst, rtx src)
29605 {
29606 machine_mode mode = GET_MODE (dst);
29607 rtx tmp;
29608
29609 tmp = gen_reg_rtx (mode);
29610
29611 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
29612 if (TARGET_CMPB)
29613 {
29614 if (mode == SImode)
29615 {
29616 emit_insn (gen_popcntbsi2 (tmp, src));
29617 emit_insn (gen_paritysi2_cmpb (dst, tmp));
29618 }
29619 else
29620 {
29621 emit_insn (gen_popcntbdi2 (tmp, src));
29622 emit_insn (gen_paritydi2_cmpb (dst, tmp));
29623 }
29624 return;
29625 }
29626
29627 if (mode == SImode)
29628 {
29629 /* Is mult+shift >= shift+xor+shift+xor? */
29630 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
29631 {
29632 rtx tmp1, tmp2, tmp3, tmp4;
29633
29634 tmp1 = gen_reg_rtx (SImode);
29635 emit_insn (gen_popcntbsi2 (tmp1, src));
29636
29637 tmp2 = gen_reg_rtx (SImode);
29638 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
29639 tmp3 = gen_reg_rtx (SImode);
29640 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
29641
29642 tmp4 = gen_reg_rtx (SImode);
29643 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
29644 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
29645 }
29646 else
29647 rs6000_emit_popcount (tmp, src);
29648 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
29649 }
29650 else
29651 {
29652 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
29653 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
29654 {
29655 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
29656
29657 tmp1 = gen_reg_rtx (DImode);
29658 emit_insn (gen_popcntbdi2 (tmp1, src));
29659
29660 tmp2 = gen_reg_rtx (DImode);
29661 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
29662 tmp3 = gen_reg_rtx (DImode);
29663 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
29664
29665 tmp4 = gen_reg_rtx (DImode);
29666 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
29667 tmp5 = gen_reg_rtx (DImode);
29668 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
29669
29670 tmp6 = gen_reg_rtx (DImode);
29671 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
29672 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
29673 }
29674 else
29675 rs6000_emit_popcount (tmp, src);
29676 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
29677 }
29678 }
29679
29680 /* Expand an Altivec constant permutation for little endian mode.
29681 OP0 and OP1 are the input vectors and TARGET is the output vector.
29682 SEL specifies the constant permutation vector.
29683
29684 There are two issues: First, the two input operands must be
29685 swapped so that together they form a double-wide array in LE
29686 order. Second, the vperm instruction has surprising behavior
29687 in LE mode: it interprets the elements of the source vectors
29688 in BE mode ("left to right") and interprets the elements of
29689 the destination vector in LE mode ("right to left"). To
29690 correct for this, we must subtract each element of the permute
29691 control vector from 31.
29692
29693 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
29694 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
29695 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
29696 serve as the permute control vector. Then, in BE mode,
29697
29698 vperm 9,10,11,12
29699
29700 places the desired result in vr9. However, in LE mode the
29701 vector contents will be
29702
29703 vr10 = 00000003 00000002 00000001 00000000
29704 vr11 = 00000007 00000006 00000005 00000004
29705
29706 The result of the vperm using the same permute control vector is
29707
29708 vr9 = 05000000 07000000 01000000 03000000
29709
29710 That is, the leftmost 4 bytes of vr10 are interpreted as the
29711 source for the rightmost 4 bytes of vr9, and so on.
29712
29713 If we change the permute control vector to
29714
29715 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
29716
29717 and issue
29718
29719 vperm 9,11,10,12
29720
29721 we get the desired
29722
29723 vr9 = 00000006 00000004 00000002 00000000. */
29724
29725 static void
29726 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
29727 const vec_perm_indices &sel)
29728 {
29729 unsigned int i;
29730 rtx perm[16];
29731 rtx constv, unspec;
29732
29733 /* Unpack and adjust the constant selector. */
29734 for (i = 0; i < 16; ++i)
29735 {
29736 unsigned int elt = 31 - (sel[i] & 31);
29737 perm[i] = GEN_INT (elt);
29738 }
29739
29740 /* Expand to a permute, swapping the inputs and using the
29741 adjusted selector. */
29742 if (!REG_P (op0))
29743 op0 = force_reg (V16QImode, op0);
29744 if (!REG_P (op1))
29745 op1 = force_reg (V16QImode, op1);
29746
29747 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
29748 constv = force_reg (V16QImode, constv);
29749 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
29750 UNSPEC_VPERM);
29751 if (!REG_P (target))
29752 {
29753 rtx tmp = gen_reg_rtx (V16QImode);
29754 emit_move_insn (tmp, unspec);
29755 unspec = tmp;
29756 }
29757
29758 emit_move_insn (target, unspec);
29759 }
29760
29761 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
29762 permute control vector. But here it's not a constant, so we must
29763 generate a vector NAND or NOR to do the adjustment. */
29764
29765 void
29766 altivec_expand_vec_perm_le (rtx operands[4])
29767 {
29768 rtx notx, iorx, unspec;
29769 rtx target = operands[0];
29770 rtx op0 = operands[1];
29771 rtx op1 = operands[2];
29772 rtx sel = operands[3];
29773 rtx tmp = target;
29774 rtx norreg = gen_reg_rtx (V16QImode);
29775 machine_mode mode = GET_MODE (target);
29776
29777 /* Get everything in regs so the pattern matches. */
29778 if (!REG_P (op0))
29779 op0 = force_reg (mode, op0);
29780 if (!REG_P (op1))
29781 op1 = force_reg (mode, op1);
29782 if (!REG_P (sel))
29783 sel = force_reg (V16QImode, sel);
29784 if (!REG_P (target))
29785 tmp = gen_reg_rtx (mode);
29786
29787 if (TARGET_P9_VECTOR)
29788 {
29789 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
29790 UNSPEC_VPERMR);
29791 }
29792 else
29793 {
29794 /* Invert the selector with a VNAND if available, else a VNOR.
29795 The VNAND is preferred for future fusion opportunities. */
29796 notx = gen_rtx_NOT (V16QImode, sel);
29797 iorx = (TARGET_P8_VECTOR
29798 ? gen_rtx_IOR (V16QImode, notx, notx)
29799 : gen_rtx_AND (V16QImode, notx, notx));
29800 emit_insn (gen_rtx_SET (norreg, iorx));
29801
29802 /* Permute with operands reversed and adjusted selector. */
29803 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
29804 UNSPEC_VPERM);
29805 }
29806
29807 /* Copy into target, possibly by way of a register. */
29808 if (!REG_P (target))
29809 {
29810 emit_move_insn (tmp, unspec);
29811 unspec = tmp;
29812 }
29813
29814 emit_move_insn (target, unspec);
29815 }
29816
29817 /* Expand an Altivec constant permutation. Return true if we match
29818 an efficient implementation; false to fall back to VPERM.
29819
29820 OP0 and OP1 are the input vectors and TARGET is the output vector.
29821 SEL specifies the constant permutation vector. */
29822
29823 static bool
29824 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
29825 const vec_perm_indices &sel)
29826 {
29827 struct altivec_perm_insn {
29828 HOST_WIDE_INT mask;
29829 enum insn_code impl;
29830 unsigned char perm[16];
29831 };
29832 static const struct altivec_perm_insn patterns[] = {
29833 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
29834 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
29835 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
29836 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
29837 { OPTION_MASK_ALTIVEC,
29838 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
29839 : CODE_FOR_altivec_vmrglb_direct),
29840 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
29841 { OPTION_MASK_ALTIVEC,
29842 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
29843 : CODE_FOR_altivec_vmrglh_direct),
29844 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
29845 { OPTION_MASK_ALTIVEC,
29846 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
29847 : CODE_FOR_altivec_vmrglw_direct),
29848 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
29849 { OPTION_MASK_ALTIVEC,
29850 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
29851 : CODE_FOR_altivec_vmrghb_direct),
29852 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
29853 { OPTION_MASK_ALTIVEC,
29854 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
29855 : CODE_FOR_altivec_vmrghh_direct),
29856 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
29857 { OPTION_MASK_ALTIVEC,
29858 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
29859 : CODE_FOR_altivec_vmrghw_direct),
29860 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
29861 { OPTION_MASK_P8_VECTOR,
29862 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
29863 : CODE_FOR_p8_vmrgow_v4sf_direct),
29864 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
29865 { OPTION_MASK_P8_VECTOR,
29866 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
29867 : CODE_FOR_p8_vmrgew_v4sf_direct),
29868 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
29869 };
29870
29871 unsigned int i, j, elt, which;
29872 unsigned char perm[16];
29873 rtx x;
29874 bool one_vec;
29875
29876 /* Unpack the constant selector. */
29877 for (i = which = 0; i < 16; ++i)
29878 {
29879 elt = sel[i] & 31;
29880 which |= (elt < 16 ? 1 : 2);
29881 perm[i] = elt;
29882 }
29883
29884 /* Simplify the constant selector based on operands. */
29885 switch (which)
29886 {
29887 default:
29888 gcc_unreachable ();
29889
29890 case 3:
29891 one_vec = false;
29892 if (!rtx_equal_p (op0, op1))
29893 break;
29894 /* FALLTHRU */
29895
29896 case 2:
29897 for (i = 0; i < 16; ++i)
29898 perm[i] &= 15;
29899 op0 = op1;
29900 one_vec = true;
29901 break;
29902
29903 case 1:
29904 op1 = op0;
29905 one_vec = true;
29906 break;
29907 }
29908
29909 /* Look for splat patterns. */
29910 if (one_vec)
29911 {
29912 elt = perm[0];
29913
29914 for (i = 0; i < 16; ++i)
29915 if (perm[i] != elt)
29916 break;
29917 if (i == 16)
29918 {
29919 if (!BYTES_BIG_ENDIAN)
29920 elt = 15 - elt;
29921 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
29922 return true;
29923 }
29924
29925 if (elt % 2 == 0)
29926 {
29927 for (i = 0; i < 16; i += 2)
29928 if (perm[i] != elt || perm[i + 1] != elt + 1)
29929 break;
29930 if (i == 16)
29931 {
29932 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
29933 x = gen_reg_rtx (V8HImode);
29934 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
29935 GEN_INT (field)));
29936 emit_move_insn (target, gen_lowpart (V16QImode, x));
29937 return true;
29938 }
29939 }
29940
29941 if (elt % 4 == 0)
29942 {
29943 for (i = 0; i < 16; i += 4)
29944 if (perm[i] != elt
29945 || perm[i + 1] != elt + 1
29946 || perm[i + 2] != elt + 2
29947 || perm[i + 3] != elt + 3)
29948 break;
29949 if (i == 16)
29950 {
29951 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
29952 x = gen_reg_rtx (V4SImode);
29953 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
29954 GEN_INT (field)));
29955 emit_move_insn (target, gen_lowpart (V16QImode, x));
29956 return true;
29957 }
29958 }
29959 }
29960
29961 /* Look for merge and pack patterns. */
29962 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
29963 {
29964 bool swapped;
29965
29966 if ((patterns[j].mask & rs6000_isa_flags) == 0)
29967 continue;
29968
29969 elt = patterns[j].perm[0];
29970 if (perm[0] == elt)
29971 swapped = false;
29972 else if (perm[0] == elt + 16)
29973 swapped = true;
29974 else
29975 continue;
29976 for (i = 1; i < 16; ++i)
29977 {
29978 elt = patterns[j].perm[i];
29979 if (swapped)
29980 elt = (elt >= 16 ? elt - 16 : elt + 16);
29981 else if (one_vec && elt >= 16)
29982 elt -= 16;
29983 if (perm[i] != elt)
29984 break;
29985 }
29986 if (i == 16)
29987 {
29988 enum insn_code icode = patterns[j].impl;
29989 machine_mode omode = insn_data[icode].operand[0].mode;
29990 machine_mode imode = insn_data[icode].operand[1].mode;
29991
29992 /* For little-endian, don't use vpkuwum and vpkuhum if the
29993 underlying vector type is not V4SI and V8HI, respectively.
29994 For example, using vpkuwum with a V8HI picks up the even
29995 halfwords (BE numbering) when the even halfwords (LE
29996 numbering) are what we need. */
29997 if (!BYTES_BIG_ENDIAN
29998 && icode == CODE_FOR_altivec_vpkuwum_direct
29999 && ((REG_P (op0)
30000 && GET_MODE (op0) != V4SImode)
30001 || (SUBREG_P (op0)
30002 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
30003 continue;
30004 if (!BYTES_BIG_ENDIAN
30005 && icode == CODE_FOR_altivec_vpkuhum_direct
30006 && ((REG_P (op0)
30007 && GET_MODE (op0) != V8HImode)
30008 || (SUBREG_P (op0)
30009 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
30010 continue;
30011
30012 /* For little-endian, the two input operands must be swapped
30013 (or swapped back) to ensure proper right-to-left numbering
30014 from 0 to 2N-1. */
30015 if (swapped ^ !BYTES_BIG_ENDIAN)
30016 std::swap (op0, op1);
30017 if (imode != V16QImode)
30018 {
30019 op0 = gen_lowpart (imode, op0);
30020 op1 = gen_lowpart (imode, op1);
30021 }
30022 if (omode == V16QImode)
30023 x = target;
30024 else
30025 x = gen_reg_rtx (omode);
30026 emit_insn (GEN_FCN (icode) (x, op0, op1));
30027 if (omode != V16QImode)
30028 emit_move_insn (target, gen_lowpart (V16QImode, x));
30029 return true;
30030 }
30031 }
30032
30033 if (!BYTES_BIG_ENDIAN)
30034 {
30035 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
30036 return true;
30037 }
30038
30039 return false;
30040 }
30041
30042 /* Expand a VSX Permute Doubleword constant permutation.
30043 Return true if we match an efficient implementation. */
30044
30045 static bool
30046 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
30047 unsigned char perm0, unsigned char perm1)
30048 {
30049 rtx x;
30050
30051 /* If both selectors come from the same operand, fold to single op. */
30052 if ((perm0 & 2) == (perm1 & 2))
30053 {
30054 if (perm0 & 2)
30055 op0 = op1;
30056 else
30057 op1 = op0;
30058 }
30059 /* If both operands are equal, fold to simpler permutation. */
30060 if (rtx_equal_p (op0, op1))
30061 {
30062 perm0 = perm0 & 1;
30063 perm1 = (perm1 & 1) + 2;
30064 }
30065 /* If the first selector comes from the second operand, swap. */
30066 else if (perm0 & 2)
30067 {
30068 if (perm1 & 2)
30069 return false;
30070 perm0 -= 2;
30071 perm1 += 2;
30072 std::swap (op0, op1);
30073 }
30074 /* If the second selector does not come from the second operand, fail. */
30075 else if ((perm1 & 2) == 0)
30076 return false;
30077
30078 /* Success! */
30079 if (target != NULL)
30080 {
30081 machine_mode vmode, dmode;
30082 rtvec v;
30083
30084 vmode = GET_MODE (target);
30085 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
30086 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
30087 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
30088 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
30089 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
30090 emit_insn (gen_rtx_SET (target, x));
30091 }
30092 return true;
30093 }
30094
30095 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
30096
30097 static bool
30098 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
30099 rtx op1, const vec_perm_indices &sel)
30100 {
30101 bool testing_p = !target;
30102
30103 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
30104 if (TARGET_ALTIVEC && testing_p)
30105 return true;
30106
30107 /* Check for ps_merge* or xxpermdi insns. */
30108 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
30109 {
30110 if (testing_p)
30111 {
30112 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
30113 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
30114 }
30115 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
30116 return true;
30117 }
30118
30119 if (TARGET_ALTIVEC)
30120 {
30121 /* Force the target-independent code to lower to V16QImode. */
30122 if (vmode != V16QImode)
30123 return false;
30124 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
30125 return true;
30126 }
30127
30128 return false;
30129 }
30130
30131 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
30132 OP0 and OP1 are the input vectors and TARGET is the output vector.
30133 PERM specifies the constant permutation vector. */
30134
30135 static void
30136 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
30137 machine_mode vmode, const vec_perm_builder &perm)
30138 {
30139 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
30140 if (x != target)
30141 emit_move_insn (target, x);
30142 }
30143
30144 /* Expand an extract even operation. */
30145
30146 void
30147 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
30148 {
30149 machine_mode vmode = GET_MODE (target);
30150 unsigned i, nelt = GET_MODE_NUNITS (vmode);
30151 vec_perm_builder perm (nelt, nelt, 1);
30152
30153 for (i = 0; i < nelt; i++)
30154 perm.quick_push (i * 2);
30155
30156 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30157 }
30158
30159 /* Expand a vector interleave operation. */
30160
30161 void
30162 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
30163 {
30164 machine_mode vmode = GET_MODE (target);
30165 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
30166 vec_perm_builder perm (nelt, nelt, 1);
30167
30168 high = (highp ? 0 : nelt / 2);
30169 for (i = 0; i < nelt / 2; i++)
30170 {
30171 perm.quick_push (i + high);
30172 perm.quick_push (i + nelt + high);
30173 }
30174
30175 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
30176 }
30177
30178 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
30179 void
30180 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
30181 {
30182 HOST_WIDE_INT hwi_scale (scale);
30183 REAL_VALUE_TYPE r_pow;
30184 rtvec v = rtvec_alloc (2);
30185 rtx elt;
30186 rtx scale_vec = gen_reg_rtx (V2DFmode);
30187 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
30188 elt = const_double_from_real_value (r_pow, DFmode);
30189 RTVEC_ELT (v, 0) = elt;
30190 RTVEC_ELT (v, 1) = elt;
30191 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
30192 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
30193 }
30194
30195 /* Return an RTX representing where to find the function value of a
30196 function returning MODE. */
30197 static rtx
30198 rs6000_complex_function_value (machine_mode mode)
30199 {
30200 unsigned int regno;
30201 rtx r1, r2;
30202 machine_mode inner = GET_MODE_INNER (mode);
30203 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
30204
30205 if (TARGET_FLOAT128_TYPE
30206 && (mode == KCmode
30207 || (mode == TCmode && TARGET_IEEEQUAD)))
30208 regno = ALTIVEC_ARG_RETURN;
30209
30210 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30211 regno = FP_ARG_RETURN;
30212
30213 else
30214 {
30215 regno = GP_ARG_RETURN;
30216
30217 /* 32-bit is OK since it'll go in r3/r4. */
30218 if (TARGET_32BIT && inner_bytes >= 4)
30219 return gen_rtx_REG (mode, regno);
30220 }
30221
30222 if (inner_bytes >= 8)
30223 return gen_rtx_REG (mode, regno);
30224
30225 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
30226 const0_rtx);
30227 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
30228 GEN_INT (inner_bytes));
30229 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
30230 }
30231
30232 /* Return an rtx describing a return value of MODE as a PARALLEL
30233 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
30234 stride REG_STRIDE. */
30235
30236 static rtx
30237 rs6000_parallel_return (machine_mode mode,
30238 int n_elts, machine_mode elt_mode,
30239 unsigned int regno, unsigned int reg_stride)
30240 {
30241 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
30242
30243 int i;
30244 for (i = 0; i < n_elts; i++)
30245 {
30246 rtx r = gen_rtx_REG (elt_mode, regno);
30247 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
30248 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
30249 regno += reg_stride;
30250 }
30251
30252 return par;
30253 }
30254
30255 /* Target hook for TARGET_FUNCTION_VALUE.
30256
30257 An integer value is in r3 and a floating-point value is in fp1,
30258 unless -msoft-float. */
30259
30260 static rtx
30261 rs6000_function_value (const_tree valtype,
30262 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
30263 bool outgoing ATTRIBUTE_UNUSED)
30264 {
30265 machine_mode mode;
30266 unsigned int regno;
30267 machine_mode elt_mode;
30268 int n_elts;
30269
30270 /* Special handling for structs in darwin64. */
30271 if (TARGET_MACHO
30272 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
30273 {
30274 CUMULATIVE_ARGS valcum;
30275 rtx valret;
30276
30277 valcum.words = 0;
30278 valcum.fregno = FP_ARG_MIN_REG;
30279 valcum.vregno = ALTIVEC_ARG_MIN_REG;
30280 /* Do a trial code generation as if this were going to be passed as
30281 an argument; if any part goes in memory, we return NULL. */
30282 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
30283 if (valret)
30284 return valret;
30285 /* Otherwise fall through to standard ABI rules. */
30286 }
30287
30288 mode = TYPE_MODE (valtype);
30289
30290 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
30291 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
30292 {
30293 int first_reg, n_regs;
30294
30295 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
30296 {
30297 /* _Decimal128 must use even/odd register pairs. */
30298 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30299 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
30300 }
30301 else
30302 {
30303 first_reg = ALTIVEC_ARG_RETURN;
30304 n_regs = 1;
30305 }
30306
30307 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
30308 }
30309
30310 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
30311 if (TARGET_32BIT && TARGET_POWERPC64)
30312 switch (mode)
30313 {
30314 default:
30315 break;
30316 case E_DImode:
30317 case E_SCmode:
30318 case E_DCmode:
30319 case E_TCmode:
30320 int count = GET_MODE_SIZE (mode) / 4;
30321 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
30322 }
30323
30324 if ((INTEGRAL_TYPE_P (valtype)
30325 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
30326 || POINTER_TYPE_P (valtype))
30327 mode = TARGET_32BIT ? SImode : DImode;
30328
30329 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30330 /* _Decimal128 must use an even/odd register pair. */
30331 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30332 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
30333 && !FLOAT128_VECTOR_P (mode))
30334 regno = FP_ARG_RETURN;
30335 else if (TREE_CODE (valtype) == COMPLEX_TYPE
30336 && targetm.calls.split_complex_arg)
30337 return rs6000_complex_function_value (mode);
30338 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30339 return register is used in both cases, and we won't see V2DImode/V2DFmode
30340 for pure altivec, combine the two cases. */
30341 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
30342 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
30343 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
30344 regno = ALTIVEC_ARG_RETURN;
30345 else
30346 regno = GP_ARG_RETURN;
30347
30348 return gen_rtx_REG (mode, regno);
30349 }
30350
30351 /* Define how to find the value returned by a library function
30352 assuming the value has mode MODE. */
30353 rtx
30354 rs6000_libcall_value (machine_mode mode)
30355 {
30356 unsigned int regno;
30357
30358 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
30359 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
30360 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
30361
30362 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
30363 /* _Decimal128 must use an even/odd register pair. */
30364 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
30365 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
30366 regno = FP_ARG_RETURN;
30367 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
30368 return register is used in both cases, and we won't see V2DImode/V2DFmode
30369 for pure altivec, combine the two cases. */
30370 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
30371 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
30372 regno = ALTIVEC_ARG_RETURN;
30373 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
30374 return rs6000_complex_function_value (mode);
30375 else
30376 regno = GP_ARG_RETURN;
30377
30378 return gen_rtx_REG (mode, regno);
30379 }
30380
30381 /* Compute register pressure classes. We implement the target hook to avoid
30382 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
30383 lead to incorrect estimates of number of available registers and therefor
30384 increased register pressure/spill. */
30385 static int
30386 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
30387 {
30388 int n;
30389
30390 n = 0;
30391 pressure_classes[n++] = GENERAL_REGS;
30392 if (TARGET_VSX)
30393 pressure_classes[n++] = VSX_REGS;
30394 else
30395 {
30396 if (TARGET_ALTIVEC)
30397 pressure_classes[n++] = ALTIVEC_REGS;
30398 if (TARGET_HARD_FLOAT)
30399 pressure_classes[n++] = FLOAT_REGS;
30400 }
30401 pressure_classes[n++] = CR_REGS;
30402 pressure_classes[n++] = SPECIAL_REGS;
30403
30404 return n;
30405 }
30406
30407 /* Given FROM and TO register numbers, say whether this elimination is allowed.
30408 Frame pointer elimination is automatically handled.
30409
30410 For the RS/6000, if frame pointer elimination is being done, we would like
30411 to convert ap into fp, not sp.
30412
30413 We need r30 if -mminimal-toc was specified, and there are constant pool
30414 references. */
30415
30416 static bool
30417 rs6000_can_eliminate (const int from, const int to)
30418 {
30419 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
30420 ? ! frame_pointer_needed
30421 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
30422 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
30423 || constant_pool_empty_p ()
30424 : true);
30425 }
30426
30427 /* Define the offset between two registers, FROM to be eliminated and its
30428 replacement TO, at the start of a routine. */
30429 HOST_WIDE_INT
30430 rs6000_initial_elimination_offset (int from, int to)
30431 {
30432 rs6000_stack_t *info = rs6000_stack_info ();
30433 HOST_WIDE_INT offset;
30434
30435 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30436 offset = info->push_p ? 0 : -info->total_size;
30437 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30438 {
30439 offset = info->push_p ? 0 : -info->total_size;
30440 if (FRAME_GROWS_DOWNWARD)
30441 offset += info->fixed_size + info->vars_size + info->parm_size;
30442 }
30443 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30444 offset = FRAME_GROWS_DOWNWARD
30445 ? info->fixed_size + info->vars_size + info->parm_size
30446 : 0;
30447 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
30448 offset = info->total_size;
30449 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
30450 offset = info->push_p ? info->total_size : 0;
30451 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
30452 offset = 0;
30453 else
30454 gcc_unreachable ();
30455
30456 return offset;
30457 }
30458
30459 /* Fill in sizes of registers used by unwinder. */
30460
30461 static void
30462 rs6000_init_dwarf_reg_sizes_extra (tree address)
30463 {
30464 if (TARGET_MACHO && ! TARGET_ALTIVEC)
30465 {
30466 int i;
30467 machine_mode mode = TYPE_MODE (char_type_node);
30468 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
30469 rtx mem = gen_rtx_MEM (BLKmode, addr);
30470 rtx value = gen_int_mode (16, mode);
30471
30472 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
30473 The unwinder still needs to know the size of Altivec registers. */
30474
30475 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
30476 {
30477 int column = DWARF_REG_TO_UNWIND_COLUMN
30478 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
30479 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
30480
30481 emit_move_insn (adjust_address (mem, mode, offset), value);
30482 }
30483 }
30484 }
30485
30486 /* Map internal gcc register numbers to debug format register numbers.
30487 FORMAT specifies the type of debug register number to use:
30488 0 -- debug information, except for frame-related sections
30489 1 -- DWARF .debug_frame section
30490 2 -- DWARF .eh_frame section */
30491
30492 unsigned int
30493 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
30494 {
30495 /* On some platforms, we use the standard DWARF register
30496 numbering for .debug_info and .debug_frame. */
30497 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
30498 {
30499 #ifdef RS6000_USE_DWARF_NUMBERING
30500 if (regno <= 31)
30501 return regno;
30502 if (FP_REGNO_P (regno))
30503 return regno - FIRST_FPR_REGNO + 32;
30504 if (ALTIVEC_REGNO_P (regno))
30505 return regno - FIRST_ALTIVEC_REGNO + 1124;
30506 if (regno == LR_REGNO)
30507 return 108;
30508 if (regno == CTR_REGNO)
30509 return 109;
30510 if (regno == CA_REGNO)
30511 return 101; /* XER */
30512 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
30513 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
30514 The actual code emitted saves the whole of CR, so we map CR2_REGNO
30515 to the DWARF reg for CR. */
30516 if (format == 1 && regno == CR2_REGNO)
30517 return 64;
30518 if (CR_REGNO_P (regno))
30519 return regno - CR0_REGNO + 86;
30520 if (regno == VRSAVE_REGNO)
30521 return 356;
30522 if (regno == VSCR_REGNO)
30523 return 67;
30524
30525 /* These do not make much sense. */
30526 if (regno == FRAME_POINTER_REGNUM)
30527 return 111;
30528 if (regno == ARG_POINTER_REGNUM)
30529 return 67;
30530 if (regno == 64)
30531 return 100;
30532
30533 gcc_unreachable ();
30534 #endif
30535 }
30536
30537 /* We use the GCC 7 (and before) internal number for non-DWARF debug
30538 information, and also for .eh_frame. */
30539 /* Translate the regnos to their numbers in GCC 7 (and before). */
30540 if (regno <= 31)
30541 return regno;
30542 if (FP_REGNO_P (regno))
30543 return regno - FIRST_FPR_REGNO + 32;
30544 if (ALTIVEC_REGNO_P (regno))
30545 return regno - FIRST_ALTIVEC_REGNO + 77;
30546 if (regno == LR_REGNO)
30547 return 65;
30548 if (regno == CTR_REGNO)
30549 return 66;
30550 if (regno == CA_REGNO)
30551 return 76; /* XER */
30552 if (CR_REGNO_P (regno))
30553 return regno - CR0_REGNO + 68;
30554 if (regno == VRSAVE_REGNO)
30555 return 109;
30556 if (regno == VSCR_REGNO)
30557 return 110;
30558
30559 if (regno == FRAME_POINTER_REGNUM)
30560 return 111;
30561 if (regno == ARG_POINTER_REGNUM)
30562 return 67;
30563 if (regno == 64)
30564 return 64;
30565
30566 gcc_unreachable ();
30567 }
30568
30569 /* target hook eh_return_filter_mode */
30570 static scalar_int_mode
30571 rs6000_eh_return_filter_mode (void)
30572 {
30573 return TARGET_32BIT ? SImode : word_mode;
30574 }
30575
30576 /* Target hook for translate_mode_attribute. */
30577 static machine_mode
30578 rs6000_translate_mode_attribute (machine_mode mode)
30579 {
30580 if ((FLOAT128_IEEE_P (mode)
30581 && ieee128_float_type_node == long_double_type_node)
30582 || (FLOAT128_IBM_P (mode)
30583 && ibm128_float_type_node == long_double_type_node))
30584 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
30585 return mode;
30586 }
30587
30588 /* Target hook for scalar_mode_supported_p. */
30589 static bool
30590 rs6000_scalar_mode_supported_p (scalar_mode mode)
30591 {
30592 /* -m32 does not support TImode. This is the default, from
30593 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
30594 same ABI as for -m32. But default_scalar_mode_supported_p allows
30595 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
30596 for -mpowerpc64. */
30597 if (TARGET_32BIT && mode == TImode)
30598 return false;
30599
30600 if (DECIMAL_FLOAT_MODE_P (mode))
30601 return default_decimal_float_supported_p ();
30602 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
30603 return true;
30604 else
30605 return default_scalar_mode_supported_p (mode);
30606 }
30607
30608 /* Target hook for vector_mode_supported_p. */
30609 static bool
30610 rs6000_vector_mode_supported_p (machine_mode mode)
30611 {
30612 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
30613 128-bit, the compiler might try to widen IEEE 128-bit to IBM
30614 double-double. */
30615 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
30616 return true;
30617
30618 else
30619 return false;
30620 }
30621
30622 /* Target hook for floatn_mode. */
30623 static opt_scalar_float_mode
30624 rs6000_floatn_mode (int n, bool extended)
30625 {
30626 if (extended)
30627 {
30628 switch (n)
30629 {
30630 case 32:
30631 return DFmode;
30632
30633 case 64:
30634 if (TARGET_FLOAT128_TYPE)
30635 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30636 else
30637 return opt_scalar_float_mode ();
30638
30639 case 128:
30640 return opt_scalar_float_mode ();
30641
30642 default:
30643 /* Those are the only valid _FloatNx types. */
30644 gcc_unreachable ();
30645 }
30646 }
30647 else
30648 {
30649 switch (n)
30650 {
30651 case 32:
30652 return SFmode;
30653
30654 case 64:
30655 return DFmode;
30656
30657 case 128:
30658 if (TARGET_FLOAT128_TYPE)
30659 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30660 else
30661 return opt_scalar_float_mode ();
30662
30663 default:
30664 return opt_scalar_float_mode ();
30665 }
30666 }
30667
30668 }
30669
30670 /* Target hook for c_mode_for_suffix. */
30671 static machine_mode
30672 rs6000_c_mode_for_suffix (char suffix)
30673 {
30674 if (TARGET_FLOAT128_TYPE)
30675 {
30676 if (suffix == 'q' || suffix == 'Q')
30677 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
30678
30679 /* At the moment, we are not defining a suffix for IBM extended double.
30680 If/when the default for -mabi=ieeelongdouble is changed, and we want
30681 to support __ibm128 constants in legacy library code, we may need to
30682 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
30683 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
30684 __float80 constants. */
30685 }
30686
30687 return VOIDmode;
30688 }
30689
30690 /* Target hook for invalid_arg_for_unprototyped_fn. */
30691 static const char *
30692 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
30693 {
30694 return (!rs6000_darwin64_abi
30695 && typelist == 0
30696 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
30697 && (funcdecl == NULL_TREE
30698 || (TREE_CODE (funcdecl) == FUNCTION_DECL
30699 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
30700 ? N_("AltiVec argument passed to unprototyped function")
30701 : NULL;
30702 }
30703
30704 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
30705 setup by using __stack_chk_fail_local hidden function instead of
30706 calling __stack_chk_fail directly. Otherwise it is better to call
30707 __stack_chk_fail directly. */
30708
30709 static tree ATTRIBUTE_UNUSED
30710 rs6000_stack_protect_fail (void)
30711 {
30712 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
30713 ? default_hidden_stack_protect_fail ()
30714 : default_external_stack_protect_fail ();
30715 }
30716
30717 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30718
30719 #if TARGET_ELF
30720 static unsigned HOST_WIDE_INT
30721 rs6000_asan_shadow_offset (void)
30722 {
30723 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
30724 }
30725 #endif
30726 \f
30727 /* Mask options that we want to support inside of attribute((target)) and
30728 #pragma GCC target operations. Note, we do not include things like
30729 64/32-bit, endianness, hard/soft floating point, etc. that would have
30730 different calling sequences. */
30731
30732 struct rs6000_opt_mask {
30733 const char *name; /* option name */
30734 HOST_WIDE_INT mask; /* mask to set */
30735 bool invert; /* invert sense of mask */
30736 bool valid_target; /* option is a target option */
30737 };
30738
30739 static struct rs6000_opt_mask const rs6000_opt_masks[] =
30740 {
30741 { "altivec", OPTION_MASK_ALTIVEC, false, true },
30742 { "cmpb", OPTION_MASK_CMPB, false, true },
30743 { "crypto", OPTION_MASK_CRYPTO, false, true },
30744 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
30745 { "dlmzb", OPTION_MASK_DLMZB, false, true },
30746 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
30747 false, true },
30748 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
30749 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
30750 { "fprnd", OPTION_MASK_FPRND, false, true },
30751 { "future", OPTION_MASK_FUTURE, false, true },
30752 { "hard-dfp", OPTION_MASK_DFP, false, true },
30753 { "htm", OPTION_MASK_HTM, false, true },
30754 { "isel", OPTION_MASK_ISEL, false, true },
30755 { "mfcrf", OPTION_MASK_MFCRF, false, true },
30756 { "mfpgpr", 0, false, true },
30757 { "modulo", OPTION_MASK_MODULO, false, true },
30758 { "mulhw", OPTION_MASK_MULHW, false, true },
30759 { "multiple", OPTION_MASK_MULTIPLE, false, true },
30760 { "pcrel", OPTION_MASK_PCREL, false, true },
30761 { "popcntb", OPTION_MASK_POPCNTB, false, true },
30762 { "popcntd", OPTION_MASK_POPCNTD, false, true },
30763 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
30764 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
30765 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
30766 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
30767 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
30768 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
30769 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
30770 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
30771 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
30772 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
30773 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
30774 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
30775 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
30776 { "string", 0, false, true },
30777 { "update", OPTION_MASK_NO_UPDATE, true , true },
30778 { "vsx", OPTION_MASK_VSX, false, true },
30779 #ifdef OPTION_MASK_64BIT
30780 #if TARGET_AIX_OS
30781 { "aix64", OPTION_MASK_64BIT, false, false },
30782 { "aix32", OPTION_MASK_64BIT, true, false },
30783 #else
30784 { "64", OPTION_MASK_64BIT, false, false },
30785 { "32", OPTION_MASK_64BIT, true, false },
30786 #endif
30787 #endif
30788 #ifdef OPTION_MASK_EABI
30789 { "eabi", OPTION_MASK_EABI, false, false },
30790 #endif
30791 #ifdef OPTION_MASK_LITTLE_ENDIAN
30792 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
30793 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
30794 #endif
30795 #ifdef OPTION_MASK_RELOCATABLE
30796 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
30797 #endif
30798 #ifdef OPTION_MASK_STRICT_ALIGN
30799 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
30800 #endif
30801 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
30802 { "string", 0, false, false },
30803 };
30804
30805 /* Builtin mask mapping for printing the flags. */
30806 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
30807 {
30808 { "altivec", RS6000_BTM_ALTIVEC, false, false },
30809 { "vsx", RS6000_BTM_VSX, false, false },
30810 { "fre", RS6000_BTM_FRE, false, false },
30811 { "fres", RS6000_BTM_FRES, false, false },
30812 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
30813 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
30814 { "popcntd", RS6000_BTM_POPCNTD, false, false },
30815 { "cell", RS6000_BTM_CELL, false, false },
30816 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
30817 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
30818 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
30819 { "crypto", RS6000_BTM_CRYPTO, false, false },
30820 { "htm", RS6000_BTM_HTM, false, false },
30821 { "hard-dfp", RS6000_BTM_DFP, false, false },
30822 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
30823 { "long-double-128", RS6000_BTM_LDBL128, false, false },
30824 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
30825 { "float128", RS6000_BTM_FLOAT128, false, false },
30826 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
30827 };
30828
30829 /* Option variables that we want to support inside attribute((target)) and
30830 #pragma GCC target operations. */
30831
30832 struct rs6000_opt_var {
30833 const char *name; /* option name */
30834 size_t global_offset; /* offset of the option in global_options. */
30835 size_t target_offset; /* offset of the option in target options. */
30836 };
30837
30838 static struct rs6000_opt_var const rs6000_opt_vars[] =
30839 {
30840 { "friz",
30841 offsetof (struct gcc_options, x_TARGET_FRIZ),
30842 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
30843 { "avoid-indexed-addresses",
30844 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
30845 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
30846 { "longcall",
30847 offsetof (struct gcc_options, x_rs6000_default_long_calls),
30848 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
30849 { "optimize-swaps",
30850 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
30851 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
30852 { "allow-movmisalign",
30853 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
30854 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
30855 { "sched-groups",
30856 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
30857 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
30858 { "always-hint",
30859 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
30860 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
30861 { "align-branch-targets",
30862 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
30863 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
30864 { "tls-markers",
30865 offsetof (struct gcc_options, x_tls_markers),
30866 offsetof (struct cl_target_option, x_tls_markers), },
30867 { "sched-prolog",
30868 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30869 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30870 { "sched-epilog",
30871 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
30872 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
30873 { "speculate-indirect-jumps",
30874 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
30875 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
30876 };
30877
30878 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
30879 parsing. Return true if there were no errors. */
30880
30881 static bool
30882 rs6000_inner_target_options (tree args, bool attr_p)
30883 {
30884 bool ret = true;
30885
30886 if (args == NULL_TREE)
30887 ;
30888
30889 else if (TREE_CODE (args) == STRING_CST)
30890 {
30891 char *p = ASTRDUP (TREE_STRING_POINTER (args));
30892 char *q;
30893
30894 while ((q = strtok (p, ",")) != NULL)
30895 {
30896 bool error_p = false;
30897 bool not_valid_p = false;
30898 const char *cpu_opt = NULL;
30899
30900 p = NULL;
30901 if (strncmp (q, "cpu=", 4) == 0)
30902 {
30903 int cpu_index = rs6000_cpu_name_lookup (q+4);
30904 if (cpu_index >= 0)
30905 rs6000_cpu_index = cpu_index;
30906 else
30907 {
30908 error_p = true;
30909 cpu_opt = q+4;
30910 }
30911 }
30912 else if (strncmp (q, "tune=", 5) == 0)
30913 {
30914 int tune_index = rs6000_cpu_name_lookup (q+5);
30915 if (tune_index >= 0)
30916 rs6000_tune_index = tune_index;
30917 else
30918 {
30919 error_p = true;
30920 cpu_opt = q+5;
30921 }
30922 }
30923 else
30924 {
30925 size_t i;
30926 bool invert = false;
30927 char *r = q;
30928
30929 error_p = true;
30930 if (strncmp (r, "no-", 3) == 0)
30931 {
30932 invert = true;
30933 r += 3;
30934 }
30935
30936 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
30937 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
30938 {
30939 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
30940
30941 if (!rs6000_opt_masks[i].valid_target)
30942 not_valid_p = true;
30943 else
30944 {
30945 error_p = false;
30946 rs6000_isa_flags_explicit |= mask;
30947
30948 /* VSX needs altivec, so -mvsx automagically sets
30949 altivec and disables -mavoid-indexed-addresses. */
30950 if (!invert)
30951 {
30952 if (mask == OPTION_MASK_VSX)
30953 {
30954 mask |= OPTION_MASK_ALTIVEC;
30955 TARGET_AVOID_XFORM = 0;
30956 }
30957 }
30958
30959 if (rs6000_opt_masks[i].invert)
30960 invert = !invert;
30961
30962 if (invert)
30963 rs6000_isa_flags &= ~mask;
30964 else
30965 rs6000_isa_flags |= mask;
30966 }
30967 break;
30968 }
30969
30970 if (error_p && !not_valid_p)
30971 {
30972 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
30973 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
30974 {
30975 size_t j = rs6000_opt_vars[i].global_offset;
30976 *((int *) ((char *)&global_options + j)) = !invert;
30977 error_p = false;
30978 not_valid_p = false;
30979 break;
30980 }
30981 }
30982 }
30983
30984 if (error_p)
30985 {
30986 const char *eprefix, *esuffix;
30987
30988 ret = false;
30989 if (attr_p)
30990 {
30991 eprefix = "__attribute__((__target__(";
30992 esuffix = ")))";
30993 }
30994 else
30995 {
30996 eprefix = "#pragma GCC target ";
30997 esuffix = "";
30998 }
30999
31000 if (cpu_opt)
31001 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
31002 q, esuffix);
31003 else if (not_valid_p)
31004 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
31005 else
31006 error ("%s%qs%s is invalid", eprefix, q, esuffix);
31007 }
31008 }
31009 }
31010
31011 else if (TREE_CODE (args) == TREE_LIST)
31012 {
31013 do
31014 {
31015 tree value = TREE_VALUE (args);
31016 if (value)
31017 {
31018 bool ret2 = rs6000_inner_target_options (value, attr_p);
31019 if (!ret2)
31020 ret = false;
31021 }
31022 args = TREE_CHAIN (args);
31023 }
31024 while (args != NULL_TREE);
31025 }
31026
31027 else
31028 {
31029 error ("attribute %<target%> argument not a string");
31030 return false;
31031 }
31032
31033 return ret;
31034 }
31035
31036 /* Print out the target options as a list for -mdebug=target. */
31037
31038 static void
31039 rs6000_debug_target_options (tree args, const char *prefix)
31040 {
31041 if (args == NULL_TREE)
31042 fprintf (stderr, "%s<NULL>", prefix);
31043
31044 else if (TREE_CODE (args) == STRING_CST)
31045 {
31046 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31047 char *q;
31048
31049 while ((q = strtok (p, ",")) != NULL)
31050 {
31051 p = NULL;
31052 fprintf (stderr, "%s\"%s\"", prefix, q);
31053 prefix = ", ";
31054 }
31055 }
31056
31057 else if (TREE_CODE (args) == TREE_LIST)
31058 {
31059 do
31060 {
31061 tree value = TREE_VALUE (args);
31062 if (value)
31063 {
31064 rs6000_debug_target_options (value, prefix);
31065 prefix = ", ";
31066 }
31067 args = TREE_CHAIN (args);
31068 }
31069 while (args != NULL_TREE);
31070 }
31071
31072 else
31073 gcc_unreachable ();
31074
31075 return;
31076 }
31077
31078 \f
31079 /* Hook to validate attribute((target("..."))). */
31080
31081 static bool
31082 rs6000_valid_attribute_p (tree fndecl,
31083 tree ARG_UNUSED (name),
31084 tree args,
31085 int flags)
31086 {
31087 struct cl_target_option cur_target;
31088 bool ret;
31089 tree old_optimize;
31090 tree new_target, new_optimize;
31091 tree func_optimize;
31092
31093 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31094
31095 if (TARGET_DEBUG_TARGET)
31096 {
31097 tree tname = DECL_NAME (fndecl);
31098 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
31099 if (tname)
31100 fprintf (stderr, "function: %.*s\n",
31101 (int) IDENTIFIER_LENGTH (tname),
31102 IDENTIFIER_POINTER (tname));
31103 else
31104 fprintf (stderr, "function: unknown\n");
31105
31106 fprintf (stderr, "args:");
31107 rs6000_debug_target_options (args, " ");
31108 fprintf (stderr, "\n");
31109
31110 if (flags)
31111 fprintf (stderr, "flags: 0x%x\n", flags);
31112
31113 fprintf (stderr, "--------------------\n");
31114 }
31115
31116 /* attribute((target("default"))) does nothing, beyond
31117 affecting multi-versioning. */
31118 if (TREE_VALUE (args)
31119 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
31120 && TREE_CHAIN (args) == NULL_TREE
31121 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
31122 return true;
31123
31124 old_optimize = build_optimization_node (&global_options);
31125 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31126
31127 /* If the function changed the optimization levels as well as setting target
31128 options, start with the optimizations specified. */
31129 if (func_optimize && func_optimize != old_optimize)
31130 cl_optimization_restore (&global_options,
31131 TREE_OPTIMIZATION (func_optimize));
31132
31133 /* The target attributes may also change some optimization flags, so update
31134 the optimization options if necessary. */
31135 cl_target_option_save (&cur_target, &global_options);
31136 rs6000_cpu_index = rs6000_tune_index = -1;
31137 ret = rs6000_inner_target_options (args, true);
31138
31139 /* Set up any additional state. */
31140 if (ret)
31141 {
31142 ret = rs6000_option_override_internal (false);
31143 new_target = build_target_option_node (&global_options);
31144 }
31145 else
31146 new_target = NULL;
31147
31148 new_optimize = build_optimization_node (&global_options);
31149
31150 if (!new_target)
31151 ret = false;
31152
31153 else if (fndecl)
31154 {
31155 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
31156
31157 if (old_optimize != new_optimize)
31158 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31159 }
31160
31161 cl_target_option_restore (&global_options, &cur_target);
31162
31163 if (old_optimize != new_optimize)
31164 cl_optimization_restore (&global_options,
31165 TREE_OPTIMIZATION (old_optimize));
31166
31167 return ret;
31168 }
31169
31170 \f
31171 /* Hook to validate the current #pragma GCC target and set the state, and
31172 update the macros based on what was changed. If ARGS is NULL, then
31173 POP_TARGET is used to reset the options. */
31174
31175 bool
31176 rs6000_pragma_target_parse (tree args, tree pop_target)
31177 {
31178 tree prev_tree = build_target_option_node (&global_options);
31179 tree cur_tree;
31180 struct cl_target_option *prev_opt, *cur_opt;
31181 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
31182 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
31183
31184 if (TARGET_DEBUG_TARGET)
31185 {
31186 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
31187 fprintf (stderr, "args:");
31188 rs6000_debug_target_options (args, " ");
31189 fprintf (stderr, "\n");
31190
31191 if (pop_target)
31192 {
31193 fprintf (stderr, "pop_target:\n");
31194 debug_tree (pop_target);
31195 }
31196 else
31197 fprintf (stderr, "pop_target: <NULL>\n");
31198
31199 fprintf (stderr, "--------------------\n");
31200 }
31201
31202 if (! args)
31203 {
31204 cur_tree = ((pop_target)
31205 ? pop_target
31206 : target_option_default_node);
31207 cl_target_option_restore (&global_options,
31208 TREE_TARGET_OPTION (cur_tree));
31209 }
31210 else
31211 {
31212 rs6000_cpu_index = rs6000_tune_index = -1;
31213 if (!rs6000_inner_target_options (args, false)
31214 || !rs6000_option_override_internal (false)
31215 || (cur_tree = build_target_option_node (&global_options))
31216 == NULL_TREE)
31217 {
31218 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
31219 fprintf (stderr, "invalid pragma\n");
31220
31221 return false;
31222 }
31223 }
31224
31225 target_option_current_node = cur_tree;
31226 rs6000_activate_target_options (target_option_current_node);
31227
31228 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
31229 change the macros that are defined. */
31230 if (rs6000_target_modify_macros_ptr)
31231 {
31232 prev_opt = TREE_TARGET_OPTION (prev_tree);
31233 prev_bumask = prev_opt->x_rs6000_builtin_mask;
31234 prev_flags = prev_opt->x_rs6000_isa_flags;
31235
31236 cur_opt = TREE_TARGET_OPTION (cur_tree);
31237 cur_flags = cur_opt->x_rs6000_isa_flags;
31238 cur_bumask = cur_opt->x_rs6000_builtin_mask;
31239
31240 diff_bumask = (prev_bumask ^ cur_bumask);
31241 diff_flags = (prev_flags ^ cur_flags);
31242
31243 if ((diff_flags != 0) || (diff_bumask != 0))
31244 {
31245 /* Delete old macros. */
31246 rs6000_target_modify_macros_ptr (false,
31247 prev_flags & diff_flags,
31248 prev_bumask & diff_bumask);
31249
31250 /* Define new macros. */
31251 rs6000_target_modify_macros_ptr (true,
31252 cur_flags & diff_flags,
31253 cur_bumask & diff_bumask);
31254 }
31255 }
31256
31257 return true;
31258 }
31259
31260 \f
31261 /* Remember the last target of rs6000_set_current_function. */
31262 static GTY(()) tree rs6000_previous_fndecl;
31263
31264 /* Restore target's globals from NEW_TREE and invalidate the
31265 rs6000_previous_fndecl cache. */
31266
31267 void
31268 rs6000_activate_target_options (tree new_tree)
31269 {
31270 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
31271 if (TREE_TARGET_GLOBALS (new_tree))
31272 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31273 else if (new_tree == target_option_default_node)
31274 restore_target_globals (&default_target_globals);
31275 else
31276 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
31277 rs6000_previous_fndecl = NULL_TREE;
31278 }
31279
31280 /* Establish appropriate back-end context for processing the function
31281 FNDECL. The argument might be NULL to indicate processing at top
31282 level, outside of any function scope. */
31283 static void
31284 rs6000_set_current_function (tree fndecl)
31285 {
31286 if (TARGET_DEBUG_TARGET)
31287 {
31288 fprintf (stderr, "\n==================== rs6000_set_current_function");
31289
31290 if (fndecl)
31291 fprintf (stderr, ", fndecl %s (%p)",
31292 (DECL_NAME (fndecl)
31293 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
31294 : "<unknown>"), (void *)fndecl);
31295
31296 if (rs6000_previous_fndecl)
31297 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
31298
31299 fprintf (stderr, "\n");
31300 }
31301
31302 /* Only change the context if the function changes. This hook is called
31303 several times in the course of compiling a function, and we don't want to
31304 slow things down too much or call target_reinit when it isn't safe. */
31305 if (fndecl == rs6000_previous_fndecl)
31306 return;
31307
31308 tree old_tree;
31309 if (rs6000_previous_fndecl == NULL_TREE)
31310 old_tree = target_option_current_node;
31311 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
31312 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
31313 else
31314 old_tree = target_option_default_node;
31315
31316 tree new_tree;
31317 if (fndecl == NULL_TREE)
31318 {
31319 if (old_tree != target_option_current_node)
31320 new_tree = target_option_current_node;
31321 else
31322 new_tree = NULL_TREE;
31323 }
31324 else
31325 {
31326 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31327 if (new_tree == NULL_TREE)
31328 new_tree = target_option_default_node;
31329 }
31330
31331 if (TARGET_DEBUG_TARGET)
31332 {
31333 if (new_tree)
31334 {
31335 fprintf (stderr, "\nnew fndecl target specific options:\n");
31336 debug_tree (new_tree);
31337 }
31338
31339 if (old_tree)
31340 {
31341 fprintf (stderr, "\nold fndecl target specific options:\n");
31342 debug_tree (old_tree);
31343 }
31344
31345 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
31346 fprintf (stderr, "--------------------\n");
31347 }
31348
31349 if (new_tree && old_tree != new_tree)
31350 rs6000_activate_target_options (new_tree);
31351
31352 if (fndecl)
31353 rs6000_previous_fndecl = fndecl;
31354 }
31355
31356 \f
31357 /* Save the current options */
31358
31359 static void
31360 rs6000_function_specific_save (struct cl_target_option *ptr,
31361 struct gcc_options *opts)
31362 {
31363 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
31364 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
31365 }
31366
31367 /* Restore the current options */
31368
31369 static void
31370 rs6000_function_specific_restore (struct gcc_options *opts,
31371 struct cl_target_option *ptr)
31372
31373 {
31374 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
31375 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
31376 (void) rs6000_option_override_internal (false);
31377 }
31378
31379 /* Print the current options */
31380
31381 static void
31382 rs6000_function_specific_print (FILE *file, int indent,
31383 struct cl_target_option *ptr)
31384 {
31385 rs6000_print_isa_options (file, indent, "Isa options set",
31386 ptr->x_rs6000_isa_flags);
31387
31388 rs6000_print_isa_options (file, indent, "Isa options explicit",
31389 ptr->x_rs6000_isa_flags_explicit);
31390 }
31391
31392 /* Helper function to print the current isa or misc options on a line. */
31393
31394 static void
31395 rs6000_print_options_internal (FILE *file,
31396 int indent,
31397 const char *string,
31398 HOST_WIDE_INT flags,
31399 const char *prefix,
31400 const struct rs6000_opt_mask *opts,
31401 size_t num_elements)
31402 {
31403 size_t i;
31404 size_t start_column = 0;
31405 size_t cur_column;
31406 size_t max_column = 120;
31407 size_t prefix_len = strlen (prefix);
31408 size_t comma_len = 0;
31409 const char *comma = "";
31410
31411 if (indent)
31412 start_column += fprintf (file, "%*s", indent, "");
31413
31414 if (!flags)
31415 {
31416 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
31417 return;
31418 }
31419
31420 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
31421
31422 /* Print the various mask options. */
31423 cur_column = start_column;
31424 for (i = 0; i < num_elements; i++)
31425 {
31426 bool invert = opts[i].invert;
31427 const char *name = opts[i].name;
31428 const char *no_str = "";
31429 HOST_WIDE_INT mask = opts[i].mask;
31430 size_t len = comma_len + prefix_len + strlen (name);
31431
31432 if (!invert)
31433 {
31434 if ((flags & mask) == 0)
31435 {
31436 no_str = "no-";
31437 len += sizeof ("no-") - 1;
31438 }
31439
31440 flags &= ~mask;
31441 }
31442
31443 else
31444 {
31445 if ((flags & mask) != 0)
31446 {
31447 no_str = "no-";
31448 len += sizeof ("no-") - 1;
31449 }
31450
31451 flags |= mask;
31452 }
31453
31454 cur_column += len;
31455 if (cur_column > max_column)
31456 {
31457 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
31458 cur_column = start_column + len;
31459 comma = "";
31460 }
31461
31462 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
31463 comma = ", ";
31464 comma_len = sizeof (", ") - 1;
31465 }
31466
31467 fputs ("\n", file);
31468 }
31469
31470 /* Helper function to print the current isa options on a line. */
31471
31472 static void
31473 rs6000_print_isa_options (FILE *file, int indent, const char *string,
31474 HOST_WIDE_INT flags)
31475 {
31476 rs6000_print_options_internal (file, indent, string, flags, "-m",
31477 &rs6000_opt_masks[0],
31478 ARRAY_SIZE (rs6000_opt_masks));
31479 }
31480
31481 static void
31482 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
31483 HOST_WIDE_INT flags)
31484 {
31485 rs6000_print_options_internal (file, indent, string, flags, "",
31486 &rs6000_builtin_mask_names[0],
31487 ARRAY_SIZE (rs6000_builtin_mask_names));
31488 }
31489
31490 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
31491 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
31492 -mupper-regs-df, etc.).
31493
31494 If the user used -mno-power8-vector, we need to turn off all of the implicit
31495 ISA 2.07 and 3.0 options that relate to the vector unit.
31496
31497 If the user used -mno-power9-vector, we need to turn off all of the implicit
31498 ISA 3.0 options that relate to the vector unit.
31499
31500 This function does not handle explicit options such as the user specifying
31501 -mdirect-move. These are handled in rs6000_option_override_internal, and
31502 the appropriate error is given if needed.
31503
31504 We return a mask of all of the implicit options that should not be enabled
31505 by default. */
31506
31507 static HOST_WIDE_INT
31508 rs6000_disable_incompatible_switches (void)
31509 {
31510 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
31511 size_t i, j;
31512
31513 static const struct {
31514 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
31515 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
31516 const char *const name; /* name of the switch. */
31517 } flags[] = {
31518 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
31519 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
31520 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
31521 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
31522 };
31523
31524 for (i = 0; i < ARRAY_SIZE (flags); i++)
31525 {
31526 HOST_WIDE_INT no_flag = flags[i].no_flag;
31527
31528 if ((rs6000_isa_flags & no_flag) == 0
31529 && (rs6000_isa_flags_explicit & no_flag) != 0)
31530 {
31531 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
31532 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
31533 & rs6000_isa_flags
31534 & dep_flags);
31535
31536 if (set_flags)
31537 {
31538 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
31539 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
31540 {
31541 set_flags &= ~rs6000_opt_masks[j].mask;
31542 error ("%<-mno-%s%> turns off %<-m%s%>",
31543 flags[i].name,
31544 rs6000_opt_masks[j].name);
31545 }
31546
31547 gcc_assert (!set_flags);
31548 }
31549
31550 rs6000_isa_flags &= ~dep_flags;
31551 ignore_masks |= no_flag | dep_flags;
31552 }
31553 }
31554
31555 return ignore_masks;
31556 }
31557
31558 \f
31559 /* Helper function for printing the function name when debugging. */
31560
31561 static const char *
31562 get_decl_name (tree fn)
31563 {
31564 tree name;
31565
31566 if (!fn)
31567 return "<null>";
31568
31569 name = DECL_NAME (fn);
31570 if (!name)
31571 return "<no-name>";
31572
31573 return IDENTIFIER_POINTER (name);
31574 }
31575
31576 /* Return the clone id of the target we are compiling code for in a target
31577 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
31578 the priority list for the target clones (ordered from lowest to
31579 highest). */
31580
31581 static int
31582 rs6000_clone_priority (tree fndecl)
31583 {
31584 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
31585 HOST_WIDE_INT isa_masks;
31586 int ret = CLONE_DEFAULT;
31587 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
31588 const char *attrs_str = NULL;
31589
31590 attrs = TREE_VALUE (TREE_VALUE (attrs));
31591 attrs_str = TREE_STRING_POINTER (attrs);
31592
31593 /* Return priority zero for default function. Return the ISA needed for the
31594 function if it is not the default. */
31595 if (strcmp (attrs_str, "default") != 0)
31596 {
31597 if (fn_opts == NULL_TREE)
31598 fn_opts = target_option_default_node;
31599
31600 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
31601 isa_masks = rs6000_isa_flags;
31602 else
31603 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
31604
31605 for (ret = CLONE_MAX - 1; ret != 0; ret--)
31606 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
31607 break;
31608 }
31609
31610 if (TARGET_DEBUG_TARGET)
31611 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
31612 get_decl_name (fndecl), ret);
31613
31614 return ret;
31615 }
31616
31617 /* This compares the priority of target features in function DECL1 and DECL2.
31618 It returns positive value if DECL1 is higher priority, negative value if
31619 DECL2 is higher priority and 0 if they are the same. Note, priorities are
31620 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
31621
31622 static int
31623 rs6000_compare_version_priority (tree decl1, tree decl2)
31624 {
31625 int priority1 = rs6000_clone_priority (decl1);
31626 int priority2 = rs6000_clone_priority (decl2);
31627 int ret = priority1 - priority2;
31628
31629 if (TARGET_DEBUG_TARGET)
31630 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
31631 get_decl_name (decl1), get_decl_name (decl2), ret);
31632
31633 return ret;
31634 }
31635
31636 /* Make a dispatcher declaration for the multi-versioned function DECL.
31637 Calls to DECL function will be replaced with calls to the dispatcher
31638 by the front-end. Returns the decl of the dispatcher function. */
31639
31640 static tree
31641 rs6000_get_function_versions_dispatcher (void *decl)
31642 {
31643 tree fn = (tree) decl;
31644 struct cgraph_node *node = NULL;
31645 struct cgraph_node *default_node = NULL;
31646 struct cgraph_function_version_info *node_v = NULL;
31647 struct cgraph_function_version_info *first_v = NULL;
31648
31649 tree dispatch_decl = NULL;
31650
31651 struct cgraph_function_version_info *default_version_info = NULL;
31652 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31653
31654 if (TARGET_DEBUG_TARGET)
31655 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
31656 get_decl_name (fn));
31657
31658 node = cgraph_node::get (fn);
31659 gcc_assert (node != NULL);
31660
31661 node_v = node->function_version ();
31662 gcc_assert (node_v != NULL);
31663
31664 if (node_v->dispatcher_resolver != NULL)
31665 return node_v->dispatcher_resolver;
31666
31667 /* Find the default version and make it the first node. */
31668 first_v = node_v;
31669 /* Go to the beginning of the chain. */
31670 while (first_v->prev != NULL)
31671 first_v = first_v->prev;
31672
31673 default_version_info = first_v;
31674 while (default_version_info != NULL)
31675 {
31676 const tree decl2 = default_version_info->this_node->decl;
31677 if (is_function_default_version (decl2))
31678 break;
31679 default_version_info = default_version_info->next;
31680 }
31681
31682 /* If there is no default node, just return NULL. */
31683 if (default_version_info == NULL)
31684 return NULL;
31685
31686 /* Make default info the first node. */
31687 if (first_v != default_version_info)
31688 {
31689 default_version_info->prev->next = default_version_info->next;
31690 if (default_version_info->next)
31691 default_version_info->next->prev = default_version_info->prev;
31692 first_v->prev = default_version_info;
31693 default_version_info->next = first_v;
31694 default_version_info->prev = NULL;
31695 }
31696
31697 default_node = default_version_info->this_node;
31698
31699 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
31700 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31701 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
31702 "exports hardware capability bits");
31703 #else
31704
31705 if (targetm.has_ifunc_p ())
31706 {
31707 struct cgraph_function_version_info *it_v = NULL;
31708 struct cgraph_node *dispatcher_node = NULL;
31709 struct cgraph_function_version_info *dispatcher_version_info = NULL;
31710
31711 /* Right now, the dispatching is done via ifunc. */
31712 dispatch_decl = make_dispatcher_decl (default_node->decl);
31713
31714 dispatcher_node = cgraph_node::get_create (dispatch_decl);
31715 gcc_assert (dispatcher_node != NULL);
31716 dispatcher_node->dispatcher_function = 1;
31717 dispatcher_version_info
31718 = dispatcher_node->insert_new_function_version ();
31719 dispatcher_version_info->next = default_version_info;
31720 dispatcher_node->definition = 1;
31721
31722 /* Set the dispatcher for all the versions. */
31723 it_v = default_version_info;
31724 while (it_v != NULL)
31725 {
31726 it_v->dispatcher_resolver = dispatch_decl;
31727 it_v = it_v->next;
31728 }
31729 }
31730 else
31731 {
31732 error_at (DECL_SOURCE_LOCATION (default_node->decl),
31733 "multiversioning needs ifunc which is not supported "
31734 "on this target");
31735 }
31736 #endif
31737
31738 return dispatch_decl;
31739 }
31740
31741 /* Make the resolver function decl to dispatch the versions of a multi-
31742 versioned function, DEFAULT_DECL. Create an empty basic block in the
31743 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
31744 function. */
31745
31746 static tree
31747 make_resolver_func (const tree default_decl,
31748 const tree dispatch_decl,
31749 basic_block *empty_bb)
31750 {
31751 /* Make the resolver function static. The resolver function returns
31752 void *. */
31753 tree decl_name = clone_function_name (default_decl, "resolver");
31754 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
31755 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
31756 tree decl = build_fn_decl (resolver_name, type);
31757 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
31758
31759 DECL_NAME (decl) = decl_name;
31760 TREE_USED (decl) = 1;
31761 DECL_ARTIFICIAL (decl) = 1;
31762 DECL_IGNORED_P (decl) = 0;
31763 TREE_PUBLIC (decl) = 0;
31764 DECL_UNINLINABLE (decl) = 1;
31765
31766 /* Resolver is not external, body is generated. */
31767 DECL_EXTERNAL (decl) = 0;
31768 DECL_EXTERNAL (dispatch_decl) = 0;
31769
31770 DECL_CONTEXT (decl) = NULL_TREE;
31771 DECL_INITIAL (decl) = make_node (BLOCK);
31772 DECL_STATIC_CONSTRUCTOR (decl) = 0;
31773
31774 /* Build result decl and add to function_decl. */
31775 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
31776 DECL_CONTEXT (t) = decl;
31777 DECL_ARTIFICIAL (t) = 1;
31778 DECL_IGNORED_P (t) = 1;
31779 DECL_RESULT (decl) = t;
31780
31781 gimplify_function_tree (decl);
31782 push_cfun (DECL_STRUCT_FUNCTION (decl));
31783 *empty_bb = init_lowered_empty_function (decl, false,
31784 profile_count::uninitialized ());
31785
31786 cgraph_node::add_new_function (decl, true);
31787 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
31788
31789 pop_cfun ();
31790
31791 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
31792 DECL_ATTRIBUTES (dispatch_decl)
31793 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
31794
31795 cgraph_node::create_same_body_alias (dispatch_decl, decl);
31796
31797 return decl;
31798 }
31799
31800 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
31801 return a pointer to VERSION_DECL if we are running on a machine that
31802 supports the index CLONE_ISA hardware architecture bits. This function will
31803 be called during version dispatch to decide which function version to
31804 execute. It returns the basic block at the end, to which more conditions
31805 can be added. */
31806
31807 static basic_block
31808 add_condition_to_bb (tree function_decl, tree version_decl,
31809 int clone_isa, basic_block new_bb)
31810 {
31811 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31812
31813 gcc_assert (new_bb != NULL);
31814 gimple_seq gseq = bb_seq (new_bb);
31815
31816
31817 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31818 build_fold_addr_expr (version_decl));
31819 tree result_var = create_tmp_var (ptr_type_node);
31820 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
31821 gimple *return_stmt = gimple_build_return (result_var);
31822
31823 if (clone_isa == CLONE_DEFAULT)
31824 {
31825 gimple_seq_add_stmt (&gseq, convert_stmt);
31826 gimple_seq_add_stmt (&gseq, return_stmt);
31827 set_bb_seq (new_bb, gseq);
31828 gimple_set_bb (convert_stmt, new_bb);
31829 gimple_set_bb (return_stmt, new_bb);
31830 pop_cfun ();
31831 return new_bb;
31832 }
31833
31834 tree bool_zero = build_int_cst (bool_int_type_node, 0);
31835 tree cond_var = create_tmp_var (bool_int_type_node);
31836 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
31837 const char *arg_str = rs6000_clone_map[clone_isa].name;
31838 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31839 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31840 gimple_call_set_lhs (call_cond_stmt, cond_var);
31841
31842 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31843 gimple_set_bb (call_cond_stmt, new_bb);
31844 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31845
31846 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
31847 NULL_TREE, NULL_TREE);
31848 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31849 gimple_set_bb (if_else_stmt, new_bb);
31850 gimple_seq_add_stmt (&gseq, if_else_stmt);
31851
31852 gimple_seq_add_stmt (&gseq, convert_stmt);
31853 gimple_seq_add_stmt (&gseq, return_stmt);
31854 set_bb_seq (new_bb, gseq);
31855
31856 basic_block bb1 = new_bb;
31857 edge e12 = split_block (bb1, if_else_stmt);
31858 basic_block bb2 = e12->dest;
31859 e12->flags &= ~EDGE_FALLTHRU;
31860 e12->flags |= EDGE_TRUE_VALUE;
31861
31862 edge e23 = split_block (bb2, return_stmt);
31863 gimple_set_bb (convert_stmt, bb2);
31864 gimple_set_bb (return_stmt, bb2);
31865
31866 basic_block bb3 = e23->dest;
31867 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31868
31869 remove_edge (e23);
31870 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31871
31872 pop_cfun ();
31873 return bb3;
31874 }
31875
31876 /* This function generates the dispatch function for multi-versioned functions.
31877 DISPATCH_DECL is the function which will contain the dispatch logic.
31878 FNDECLS are the function choices for dispatch, and is a tree chain.
31879 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
31880 code is generated. */
31881
31882 static int
31883 dispatch_function_versions (tree dispatch_decl,
31884 void *fndecls_p,
31885 basic_block *empty_bb)
31886 {
31887 int ix;
31888 tree ele;
31889 vec<tree> *fndecls;
31890 tree clones[CLONE_MAX];
31891
31892 if (TARGET_DEBUG_TARGET)
31893 fputs ("dispatch_function_versions, top\n", stderr);
31894
31895 gcc_assert (dispatch_decl != NULL
31896 && fndecls_p != NULL
31897 && empty_bb != NULL);
31898
31899 /* fndecls_p is actually a vector. */
31900 fndecls = static_cast<vec<tree> *> (fndecls_p);
31901
31902 /* At least one more version other than the default. */
31903 gcc_assert (fndecls->length () >= 2);
31904
31905 /* The first version in the vector is the default decl. */
31906 memset ((void *) clones, '\0', sizeof (clones));
31907 clones[CLONE_DEFAULT] = (*fndecls)[0];
31908
31909 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
31910 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
31911 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
31912 recent glibc. If we ever need to call __builtin_cpu_init, we would need
31913 to insert the code here to do the call. */
31914
31915 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31916 {
31917 int priority = rs6000_clone_priority (ele);
31918 if (!clones[priority])
31919 clones[priority] = ele;
31920 }
31921
31922 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
31923 if (clones[ix])
31924 {
31925 if (TARGET_DEBUG_TARGET)
31926 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
31927 ix, get_decl_name (clones[ix]));
31928
31929 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
31930 *empty_bb);
31931 }
31932
31933 return 0;
31934 }
31935
31936 /* Generate the dispatching code body to dispatch multi-versioned function
31937 DECL. The target hook is called to process the "target" attributes and
31938 provide the code to dispatch the right function at run-time. NODE points
31939 to the dispatcher decl whose body will be created. */
31940
31941 static tree
31942 rs6000_generate_version_dispatcher_body (void *node_p)
31943 {
31944 tree resolver;
31945 basic_block empty_bb;
31946 struct cgraph_node *node = (cgraph_node *) node_p;
31947 struct cgraph_function_version_info *ninfo = node->function_version ();
31948
31949 if (ninfo->dispatcher_resolver)
31950 return ninfo->dispatcher_resolver;
31951
31952 /* node is going to be an alias, so remove the finalized bit. */
31953 node->definition = false;
31954
31955 /* The first version in the chain corresponds to the default version. */
31956 ninfo->dispatcher_resolver = resolver
31957 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
31958
31959 if (TARGET_DEBUG_TARGET)
31960 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
31961 get_decl_name (resolver));
31962
31963 push_cfun (DECL_STRUCT_FUNCTION (resolver));
31964 auto_vec<tree, 2> fn_ver_vec;
31965
31966 for (struct cgraph_function_version_info *vinfo = ninfo->next;
31967 vinfo;
31968 vinfo = vinfo->next)
31969 {
31970 struct cgraph_node *version = vinfo->this_node;
31971 /* Check for virtual functions here again, as by this time it should
31972 have been determined if this function needs a vtable index or
31973 not. This happens for methods in derived classes that override
31974 virtual methods in base classes but are not explicitly marked as
31975 virtual. */
31976 if (DECL_VINDEX (version->decl))
31977 sorry ("Virtual function multiversioning not supported");
31978
31979 fn_ver_vec.safe_push (version->decl);
31980 }
31981
31982 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
31983 cgraph_edge::rebuild_edges ();
31984 pop_cfun ();
31985 return resolver;
31986 }
31987
31988 \f
31989 /* Hook to determine if one function can safely inline another. */
31990
31991 static bool
31992 rs6000_can_inline_p (tree caller, tree callee)
31993 {
31994 bool ret = false;
31995 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
31996 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
31997
31998 /* If callee has no option attributes, then it is ok to inline. */
31999 if (!callee_tree)
32000 ret = true;
32001
32002 /* If caller has no option attributes, but callee does then it is not ok to
32003 inline. */
32004 else if (!caller_tree)
32005 ret = false;
32006
32007 else
32008 {
32009 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32010 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32011
32012 /* Callee's options should a subset of the caller's, i.e. a vsx function
32013 can inline an altivec function but a non-vsx function can't inline a
32014 vsx function. */
32015 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32016 == callee_opts->x_rs6000_isa_flags)
32017 ret = true;
32018 }
32019
32020 if (TARGET_DEBUG_TARGET)
32021 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32022 get_decl_name (caller), get_decl_name (callee),
32023 (ret ? "can" : "cannot"));
32024
32025 return ret;
32026 }
32027 \f
32028 /* Allocate a stack temp and fixup the address so it meets the particular
32029 memory requirements (either offetable or REG+REG addressing). */
32030
32031 rtx
32032 rs6000_allocate_stack_temp (machine_mode mode,
32033 bool offsettable_p,
32034 bool reg_reg_p)
32035 {
32036 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32037 rtx addr = XEXP (stack, 0);
32038 int strict_p = reload_completed;
32039
32040 if (!legitimate_indirect_address_p (addr, strict_p))
32041 {
32042 if (offsettable_p
32043 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32044 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32045
32046 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32047 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32048 }
32049
32050 return stack;
32051 }
32052
32053 /* Given a memory reference, if it is not a reg or reg+reg addressing,
32054 convert to such a form to deal with memory reference instructions
32055 like STFIWX and LDBRX that only take reg+reg addressing. */
32056
32057 rtx
32058 rs6000_force_indexed_or_indirect_mem (rtx x)
32059 {
32060 machine_mode mode = GET_MODE (x);
32061
32062 gcc_assert (MEM_P (x));
32063 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
32064 {
32065 rtx addr = XEXP (x, 0);
32066 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32067 {
32068 rtx reg = XEXP (addr, 0);
32069 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32070 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32071 gcc_assert (REG_P (reg));
32072 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32073 addr = reg;
32074 }
32075 else if (GET_CODE (addr) == PRE_MODIFY)
32076 {
32077 rtx reg = XEXP (addr, 0);
32078 rtx expr = XEXP (addr, 1);
32079 gcc_assert (REG_P (reg));
32080 gcc_assert (GET_CODE (expr) == PLUS);
32081 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32082 addr = reg;
32083 }
32084
32085 x = replace_equiv_address (x, force_reg (Pmode, addr));
32086 }
32087
32088 return x;
32089 }
32090
32091 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32092
32093 On the RS/6000, all integer constants are acceptable, most won't be valid
32094 for particular insns, though. Only easy FP constants are acceptable. */
32095
32096 static bool
32097 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32098 {
32099 if (TARGET_ELF && tls_referenced_p (x))
32100 return false;
32101
32102 if (CONST_DOUBLE_P (x))
32103 return easy_fp_constant (x, mode);
32104
32105 if (GET_CODE (x) == CONST_VECTOR)
32106 return easy_vector_constant (x, mode);
32107
32108 return true;
32109 }
32110
32111 \f
32112 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32113
32114 static bool
32115 chain_already_loaded (rtx_insn *last)
32116 {
32117 for (; last != NULL; last = PREV_INSN (last))
32118 {
32119 if (NONJUMP_INSN_P (last))
32120 {
32121 rtx patt = PATTERN (last);
32122
32123 if (GET_CODE (patt) == SET)
32124 {
32125 rtx lhs = XEXP (patt, 0);
32126
32127 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32128 return true;
32129 }
32130 }
32131 }
32132 return false;
32133 }
32134
32135 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32136
32137 void
32138 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32139 {
32140 rtx func = func_desc;
32141 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32142 rtx toc_load = NULL_RTX;
32143 rtx toc_restore = NULL_RTX;
32144 rtx func_addr;
32145 rtx abi_reg = NULL_RTX;
32146 rtx call[4];
32147 int n_call;
32148 rtx insn;
32149 bool is_pltseq_longcall;
32150
32151 if (global_tlsarg)
32152 tlsarg = global_tlsarg;
32153
32154 /* Handle longcall attributes. */
32155 is_pltseq_longcall = false;
32156 if ((INTVAL (cookie) & CALL_LONG) != 0
32157 && GET_CODE (func_desc) == SYMBOL_REF)
32158 {
32159 func = rs6000_longcall_ref (func_desc, tlsarg);
32160 if (TARGET_PLTSEQ)
32161 is_pltseq_longcall = true;
32162 }
32163
32164 /* Handle indirect calls. */
32165 if (!SYMBOL_REF_P (func)
32166 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
32167 {
32168 if (!rs6000_pcrel_p (cfun))
32169 {
32170 /* Save the TOC into its reserved slot before the call,
32171 and prepare to restore it after the call. */
32172 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32173 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
32174 gen_rtvec (1, stack_toc_offset),
32175 UNSPEC_TOCSLOT);
32176 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
32177
32178 /* Can we optimize saving the TOC in the prologue or
32179 do we need to do it at every call? */
32180 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32181 cfun->machine->save_toc_in_prologue = true;
32182 else
32183 {
32184 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32185 rtx stack_toc_mem = gen_frame_mem (Pmode,
32186 gen_rtx_PLUS (Pmode, stack_ptr,
32187 stack_toc_offset));
32188 MEM_VOLATILE_P (stack_toc_mem) = 1;
32189 if (is_pltseq_longcall)
32190 {
32191 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
32192 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32193 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
32194 }
32195 else
32196 emit_move_insn (stack_toc_mem, toc_reg);
32197 }
32198 }
32199
32200 if (DEFAULT_ABI == ABI_ELFv2)
32201 {
32202 /* A function pointer in the ELFv2 ABI is just a plain address, but
32203 the ABI requires it to be loaded into r12 before the call. */
32204 func_addr = gen_rtx_REG (Pmode, 12);
32205 if (!rtx_equal_p (func_addr, func))
32206 emit_move_insn (func_addr, func);
32207 abi_reg = func_addr;
32208 /* Indirect calls via CTR are strongly preferred over indirect
32209 calls via LR, so move the address there. Needed to mark
32210 this insn for linker plt sequence editing too. */
32211 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32212 if (is_pltseq_longcall)
32213 {
32214 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
32215 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32216 emit_insn (gen_rtx_SET (func_addr, mark_func));
32217 v = gen_rtvec (2, func_addr, func_desc);
32218 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32219 }
32220 else
32221 emit_move_insn (func_addr, abi_reg);
32222 }
32223 else
32224 {
32225 /* A function pointer under AIX is a pointer to a data area whose
32226 first word contains the actual address of the function, whose
32227 second word contains a pointer to its TOC, and whose third word
32228 contains a value to place in the static chain register (r11).
32229 Note that if we load the static chain, our "trampoline" need
32230 not have any executable code. */
32231
32232 /* Load up address of the actual function. */
32233 func = force_reg (Pmode, func);
32234 func_addr = gen_reg_rtx (Pmode);
32235 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
32236
32237 /* Indirect calls via CTR are strongly preferred over indirect
32238 calls via LR, so move the address there. */
32239 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
32240 emit_move_insn (ctr_reg, func_addr);
32241 func_addr = ctr_reg;
32242
32243 /* Prepare to load the TOC of the called function. Note that the
32244 TOC load must happen immediately before the actual call so
32245 that unwinding the TOC registers works correctly. See the
32246 comment in frob_update_context. */
32247 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32248 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32249 gen_rtx_PLUS (Pmode, func,
32250 func_toc_offset));
32251 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32252
32253 /* If we have a static chain, load it up. But, if the call was
32254 originally direct, the 3rd word has not been written since no
32255 trampoline has been built, so we ought not to load it, lest we
32256 override a static chain value. */
32257 if (!(GET_CODE (func_desc) == SYMBOL_REF
32258 && SYMBOL_REF_FUNCTION_P (func_desc))
32259 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
32260 && !chain_already_loaded (get_current_sequence ()->next->last))
32261 {
32262 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32263 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32264 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32265 gen_rtx_PLUS (Pmode, func,
32266 func_sc_offset));
32267 emit_move_insn (sc_reg, func_sc_mem);
32268 abi_reg = sc_reg;
32269 }
32270 }
32271 }
32272 else
32273 {
32274 /* No TOC register needed for calls from PC-relative callers. */
32275 if (!rs6000_pcrel_p (cfun))
32276 /* Direct calls use the TOC: for local calls, the callee will
32277 assume the TOC register is set; for non-local calls, the
32278 PLT stub needs the TOC register. */
32279 abi_reg = toc_reg;
32280 func_addr = func;
32281 }
32282
32283 /* Create the call. */
32284 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32285 if (value != NULL_RTX)
32286 call[0] = gen_rtx_SET (value, call[0]);
32287 n_call = 1;
32288
32289 if (toc_load)
32290 call[n_call++] = toc_load;
32291 if (toc_restore)
32292 call[n_call++] = toc_restore;
32293
32294 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32295
32296 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32297 insn = emit_call_insn (insn);
32298
32299 /* Mention all registers defined by the ABI to hold information
32300 as uses in CALL_INSN_FUNCTION_USAGE. */
32301 if (abi_reg)
32302 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32303 }
32304
32305 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32306
32307 void
32308 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32309 {
32310 rtx call[2];
32311 rtx insn;
32312
32313 gcc_assert (INTVAL (cookie) == 0);
32314
32315 if (global_tlsarg)
32316 tlsarg = global_tlsarg;
32317
32318 /* Create the call. */
32319 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
32320 if (value != NULL_RTX)
32321 call[0] = gen_rtx_SET (value, call[0]);
32322
32323 call[1] = simple_return_rtx;
32324
32325 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32326 insn = emit_call_insn (insn);
32327
32328 /* Note use of the TOC register. */
32329 if (!rs6000_pcrel_p (cfun))
32330 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
32331 gen_rtx_REG (Pmode, TOC_REGNUM));
32332 }
32333
32334 /* Expand code to perform a call under the SYSV4 ABI. */
32335
32336 void
32337 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32338 {
32339 rtx func = func_desc;
32340 rtx func_addr;
32341 rtx call[4];
32342 rtx insn;
32343 rtx abi_reg = NULL_RTX;
32344 int n;
32345
32346 if (global_tlsarg)
32347 tlsarg = global_tlsarg;
32348
32349 /* Handle longcall attributes. */
32350 if ((INTVAL (cookie) & CALL_LONG) != 0
32351 && GET_CODE (func_desc) == SYMBOL_REF)
32352 {
32353 func = rs6000_longcall_ref (func_desc, tlsarg);
32354 /* If the longcall was implemented as an inline PLT call using
32355 PLT unspecs then func will be REG:r11. If not, func will be
32356 a pseudo reg. The inline PLT call sequence supports lazy
32357 linking (and longcalls to functions in dlopen'd libraries).
32358 The other style of longcalls don't. The lazy linking entry
32359 to the dynamic symbol resolver requires r11 be the function
32360 address (as it is for linker generated PLT stubs). Ensure
32361 r11 stays valid to the bctrl by marking r11 used by the call. */
32362 if (TARGET_PLTSEQ)
32363 abi_reg = func;
32364 }
32365
32366 /* Handle indirect calls. */
32367 if (GET_CODE (func) != SYMBOL_REF)
32368 {
32369 func = force_reg (Pmode, func);
32370
32371 /* Indirect calls via CTR are strongly preferred over indirect
32372 calls via LR, so move the address there. That can't be left
32373 to reload because we want to mark every instruction in an
32374 inline PLT call sequence with a reloc, enabling the linker to
32375 edit the sequence back to a direct call when that makes sense. */
32376 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32377 if (abi_reg)
32378 {
32379 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32380 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32381 emit_insn (gen_rtx_SET (func_addr, mark_func));
32382 v = gen_rtvec (2, func_addr, func_desc);
32383 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32384 }
32385 else
32386 emit_move_insn (func_addr, func);
32387 }
32388 else
32389 func_addr = func;
32390
32391 /* Create the call. */
32392 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32393 if (value != NULL_RTX)
32394 call[0] = gen_rtx_SET (value, call[0]);
32395
32396 call[1] = gen_rtx_USE (VOIDmode, cookie);
32397 n = 2;
32398 if (TARGET_SECURE_PLT
32399 && flag_pic
32400 && GET_CODE (func_addr) == SYMBOL_REF
32401 && !SYMBOL_REF_LOCAL_P (func_addr))
32402 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
32403
32404 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32405
32406 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
32407 insn = emit_call_insn (insn);
32408 if (abi_reg)
32409 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32410 }
32411
32412 /* Expand code to perform a sibling call under the SysV4 ABI. */
32413
32414 void
32415 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
32416 {
32417 rtx func = func_desc;
32418 rtx func_addr;
32419 rtx call[3];
32420 rtx insn;
32421 rtx abi_reg = NULL_RTX;
32422
32423 if (global_tlsarg)
32424 tlsarg = global_tlsarg;
32425
32426 /* Handle longcall attributes. */
32427 if ((INTVAL (cookie) & CALL_LONG) != 0
32428 && GET_CODE (func_desc) == SYMBOL_REF)
32429 {
32430 func = rs6000_longcall_ref (func_desc, tlsarg);
32431 /* If the longcall was implemented as an inline PLT call using
32432 PLT unspecs then func will be REG:r11. If not, func will be
32433 a pseudo reg. The inline PLT call sequence supports lazy
32434 linking (and longcalls to functions in dlopen'd libraries).
32435 The other style of longcalls don't. The lazy linking entry
32436 to the dynamic symbol resolver requires r11 be the function
32437 address (as it is for linker generated PLT stubs). Ensure
32438 r11 stays valid to the bctr by marking r11 used by the call. */
32439 if (TARGET_PLTSEQ)
32440 abi_reg = func;
32441 }
32442
32443 /* Handle indirect calls. */
32444 if (GET_CODE (func) != SYMBOL_REF)
32445 {
32446 func = force_reg (Pmode, func);
32447
32448 /* Indirect sibcalls must go via CTR. That can't be left to
32449 reload because we want to mark every instruction in an inline
32450 PLT call sequence with a reloc, enabling the linker to edit
32451 the sequence back to a direct call when that makes sense. */
32452 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32453 if (abi_reg)
32454 {
32455 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
32456 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32457 emit_insn (gen_rtx_SET (func_addr, mark_func));
32458 v = gen_rtvec (2, func_addr, func_desc);
32459 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
32460 }
32461 else
32462 emit_move_insn (func_addr, func);
32463 }
32464 else
32465 func_addr = func;
32466
32467 /* Create the call. */
32468 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32469 if (value != NULL_RTX)
32470 call[0] = gen_rtx_SET (value, call[0]);
32471
32472 call[1] = gen_rtx_USE (VOIDmode, cookie);
32473 call[2] = simple_return_rtx;
32474
32475 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32476 insn = emit_call_insn (insn);
32477 if (abi_reg)
32478 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32479 }
32480
32481 #if TARGET_MACHO
32482
32483 /* Expand code to perform a call under the Darwin ABI.
32484 Modulo handling of mlongcall, this is much the same as sysv.
32485 if/when the longcall optimisation is removed, we could drop this
32486 code and use the sysv case (taking care to avoid the tls stuff).
32487
32488 We can use this for sibcalls too, if needed. */
32489
32490 void
32491 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
32492 rtx cookie, bool sibcall)
32493 {
32494 rtx func = func_desc;
32495 rtx func_addr;
32496 rtx call[3];
32497 rtx insn;
32498 int cookie_val = INTVAL (cookie);
32499 bool make_island = false;
32500
32501 /* Handle longcall attributes, there are two cases for Darwin:
32502 1) Newer linkers are capable of synthesising any branch islands needed.
32503 2) We need a helper branch island synthesised by the compiler.
32504 The second case has mostly been retired and we don't use it for m64.
32505 In fact, it's is an optimisation, we could just indirect as sysv does..
32506 ... however, backwards compatibility for now.
32507 If we're going to use this, then we need to keep the CALL_LONG bit set,
32508 so that we can pick up the special insn form later. */
32509 if ((cookie_val & CALL_LONG) != 0
32510 && GET_CODE (func_desc) == SYMBOL_REF)
32511 {
32512 /* FIXME: the longcall opt should not hang off picsymbol stubs. */
32513 if (darwin_picsymbol_stubs && TARGET_32BIT)
32514 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
32515 else
32516 {
32517 /* The linker is capable of doing this, but the user explicitly
32518 asked for -mlongcall, so we'll do the 'normal' version. */
32519 func = rs6000_longcall_ref (func_desc, NULL_RTX);
32520 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
32521 }
32522 }
32523
32524 /* Handle indirect calls. */
32525 if (GET_CODE (func) != SYMBOL_REF)
32526 {
32527 func = force_reg (Pmode, func);
32528
32529 /* Indirect calls via CTR are strongly preferred over indirect
32530 calls via LR, and are required for indirect sibcalls, so move
32531 the address there. */
32532 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
32533 emit_move_insn (func_addr, func);
32534 }
32535 else
32536 func_addr = func;
32537
32538 /* Create the call. */
32539 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
32540 if (value != NULL_RTX)
32541 call[0] = gen_rtx_SET (value, call[0]);
32542
32543 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
32544
32545 if (sibcall)
32546 call[2] = simple_return_rtx;
32547 else
32548 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
32549
32550 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
32551 insn = emit_call_insn (insn);
32552 /* Now we have the debug info in the insn, we can set up the branch island
32553 if we're using one. */
32554 if (make_island)
32555 {
32556 tree funname = get_identifier (XSTR (func_desc, 0));
32557
32558 if (no_previous_def (funname))
32559 {
32560 rtx label_rtx = gen_label_rtx ();
32561 char *label_buf, temp_buf[256];
32562 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32563 CODE_LABEL_NUMBER (label_rtx));
32564 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32565 tree labelname = get_identifier (label_buf);
32566 add_compiler_branch_island (labelname, funname,
32567 insn_line ((const rtx_insn*)insn));
32568 }
32569 }
32570 }
32571 #endif
32572
32573 void
32574 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32575 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32576 {
32577 #if TARGET_MACHO
32578 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
32579 #else
32580 gcc_unreachable();
32581 #endif
32582 }
32583
32584
32585 void
32586 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
32587 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
32588 {
32589 #if TARGET_MACHO
32590 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
32591 #else
32592 gcc_unreachable();
32593 #endif
32594 }
32595
32596 /* Return whether we should generate PC-relative code for FNDECL. */
32597 bool
32598 rs6000_fndecl_pcrel_p (const_tree fndecl)
32599 {
32600 if (DEFAULT_ABI != ABI_ELFv2)
32601 return false;
32602
32603 struct cl_target_option *opts = target_opts_for_fn (fndecl);
32604
32605 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32606 && TARGET_CMODEL == CMODEL_MEDIUM);
32607 }
32608
32609 /* Return whether we should generate PC-relative code for *FN. */
32610 bool
32611 rs6000_pcrel_p (struct function *fn)
32612 {
32613 if (DEFAULT_ABI != ABI_ELFv2)
32614 return false;
32615
32616 /* Optimize usual case. */
32617 if (fn == cfun)
32618 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
32619 && TARGET_CMODEL == CMODEL_MEDIUM);
32620
32621 return rs6000_fndecl_pcrel_p (fn->decl);
32622 }
32623
32624 #ifdef HAVE_GAS_HIDDEN
32625 # define USE_HIDDEN_LINKONCE 1
32626 #else
32627 # define USE_HIDDEN_LINKONCE 0
32628 #endif
32629
32630 /* Fills in the label name that should be used for a 476 link stack thunk. */
32631
32632 void
32633 get_ppc476_thunk_name (char name[32])
32634 {
32635 gcc_assert (TARGET_LINK_STACK);
32636
32637 if (USE_HIDDEN_LINKONCE)
32638 sprintf (name, "__ppc476.get_thunk");
32639 else
32640 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32641 }
32642
32643 /* This function emits the simple thunk routine that is used to preserve
32644 the link stack on the 476 cpu. */
32645
32646 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32647 static void
32648 rs6000_code_end (void)
32649 {
32650 char name[32];
32651 tree decl;
32652
32653 if (!TARGET_LINK_STACK)
32654 return;
32655
32656 get_ppc476_thunk_name (name);
32657
32658 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32659 build_function_type_list (void_type_node, NULL_TREE));
32660 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32661 NULL_TREE, void_type_node);
32662 TREE_PUBLIC (decl) = 1;
32663 TREE_STATIC (decl) = 1;
32664
32665 #if RS6000_WEAK
32666 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
32667 {
32668 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
32669 targetm.asm_out.unique_section (decl, 0);
32670 switch_to_section (get_named_section (decl, NULL, 0));
32671 DECL_WEAK (decl) = 1;
32672 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32673 targetm.asm_out.globalize_label (asm_out_file, name);
32674 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32675 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32676 }
32677 else
32678 #endif
32679 {
32680 switch_to_section (text_section);
32681 ASM_OUTPUT_LABEL (asm_out_file, name);
32682 }
32683
32684 DECL_INITIAL (decl) = make_node (BLOCK);
32685 current_function_decl = decl;
32686 allocate_struct_function (decl, false);
32687 init_function_start (decl);
32688 first_function_block_is_cold = false;
32689 /* Make sure unwind info is emitted for the thunk if needed. */
32690 final_start_function (emit_barrier (), asm_out_file, 1);
32691
32692 fputs ("\tblr\n", asm_out_file);
32693
32694 final_end_function ();
32695 init_insn_lengths ();
32696 free_after_compilation (cfun);
32697 set_cfun (NULL);
32698 current_function_decl = NULL;
32699 }
32700
32701 /* Add r30 to hard reg set if the prologue sets it up and it is not
32702 pic_offset_table_rtx. */
32703
32704 static void
32705 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32706 {
32707 if (!TARGET_SINGLE_PIC_BASE
32708 && TARGET_TOC
32709 && TARGET_MINIMAL_TOC
32710 && !constant_pool_empty_p ())
32711 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32712 if (cfun->machine->split_stack_argp_used)
32713 add_to_hard_reg_set (&set->set, Pmode, 12);
32714
32715 /* Make sure the hard reg set doesn't include r2, which was possibly added
32716 via PIC_OFFSET_TABLE_REGNUM. */
32717 if (TARGET_TOC)
32718 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
32719 }
32720
32721 \f
32722 /* Helper function for rs6000_split_logical to emit a logical instruction after
32723 spliting the operation to single GPR registers.
32724
32725 DEST is the destination register.
32726 OP1 and OP2 are the input source registers.
32727 CODE is the base operation (AND, IOR, XOR, NOT).
32728 MODE is the machine mode.
32729 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32730 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32731 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32732
32733 static void
32734 rs6000_split_logical_inner (rtx dest,
32735 rtx op1,
32736 rtx op2,
32737 enum rtx_code code,
32738 machine_mode mode,
32739 bool complement_final_p,
32740 bool complement_op1_p,
32741 bool complement_op2_p)
32742 {
32743 rtx bool_rtx;
32744
32745 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32746 if (op2 && CONST_INT_P (op2)
32747 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32748 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32749 {
32750 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32751 HOST_WIDE_INT value = INTVAL (op2) & mask;
32752
32753 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32754 if (code == AND)
32755 {
32756 if (value == 0)
32757 {
32758 emit_insn (gen_rtx_SET (dest, const0_rtx));
32759 return;
32760 }
32761
32762 else if (value == mask)
32763 {
32764 if (!rtx_equal_p (dest, op1))
32765 emit_insn (gen_rtx_SET (dest, op1));
32766 return;
32767 }
32768 }
32769
32770 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32771 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32772 else if (code == IOR || code == XOR)
32773 {
32774 if (value == 0)
32775 {
32776 if (!rtx_equal_p (dest, op1))
32777 emit_insn (gen_rtx_SET (dest, op1));
32778 return;
32779 }
32780 }
32781 }
32782
32783 if (code == AND && mode == SImode
32784 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32785 {
32786 emit_insn (gen_andsi3 (dest, op1, op2));
32787 return;
32788 }
32789
32790 if (complement_op1_p)
32791 op1 = gen_rtx_NOT (mode, op1);
32792
32793 if (complement_op2_p)
32794 op2 = gen_rtx_NOT (mode, op2);
32795
32796 /* For canonical RTL, if only one arm is inverted it is the first. */
32797 if (!complement_op1_p && complement_op2_p)
32798 std::swap (op1, op2);
32799
32800 bool_rtx = ((code == NOT)
32801 ? gen_rtx_NOT (mode, op1)
32802 : gen_rtx_fmt_ee (code, mode, op1, op2));
32803
32804 if (complement_final_p)
32805 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32806
32807 emit_insn (gen_rtx_SET (dest, bool_rtx));
32808 }
32809
32810 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32811 operations are split immediately during RTL generation to allow for more
32812 optimizations of the AND/IOR/XOR.
32813
32814 OPERANDS is an array containing the destination and two input operands.
32815 CODE is the base operation (AND, IOR, XOR, NOT).
32816 MODE is the machine mode.
32817 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32818 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32819 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32820 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32821 formation of the AND instructions. */
32822
32823 static void
32824 rs6000_split_logical_di (rtx operands[3],
32825 enum rtx_code code,
32826 bool complement_final_p,
32827 bool complement_op1_p,
32828 bool complement_op2_p)
32829 {
32830 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32831 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32832 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32833 enum hi_lo { hi = 0, lo = 1 };
32834 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32835 size_t i;
32836
32837 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32838 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32839 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32840 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32841
32842 if (code == NOT)
32843 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32844 else
32845 {
32846 if (!CONST_INT_P (operands[2]))
32847 {
32848 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32849 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32850 }
32851 else
32852 {
32853 HOST_WIDE_INT value = INTVAL (operands[2]);
32854 HOST_WIDE_INT value_hi_lo[2];
32855
32856 gcc_assert (!complement_final_p);
32857 gcc_assert (!complement_op1_p);
32858 gcc_assert (!complement_op2_p);
32859
32860 value_hi_lo[hi] = value >> 32;
32861 value_hi_lo[lo] = value & lower_32bits;
32862
32863 for (i = 0; i < 2; i++)
32864 {
32865 HOST_WIDE_INT sub_value = value_hi_lo[i];
32866
32867 if (sub_value & sign_bit)
32868 sub_value |= upper_32bits;
32869
32870 op2_hi_lo[i] = GEN_INT (sub_value);
32871
32872 /* If this is an AND instruction, check to see if we need to load
32873 the value in a register. */
32874 if (code == AND && sub_value != -1 && sub_value != 0
32875 && !and_operand (op2_hi_lo[i], SImode))
32876 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32877 }
32878 }
32879 }
32880
32881 for (i = 0; i < 2; i++)
32882 {
32883 /* Split large IOR/XOR operations. */
32884 if ((code == IOR || code == XOR)
32885 && CONST_INT_P (op2_hi_lo[i])
32886 && !complement_final_p
32887 && !complement_op1_p
32888 && !complement_op2_p
32889 && !logical_const_operand (op2_hi_lo[i], SImode))
32890 {
32891 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32892 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32893 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32894 rtx tmp = gen_reg_rtx (SImode);
32895
32896 /* Make sure the constant is sign extended. */
32897 if ((hi_16bits & sign_bit) != 0)
32898 hi_16bits |= upper_32bits;
32899
32900 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32901 code, SImode, false, false, false);
32902
32903 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
32904 code, SImode, false, false, false);
32905 }
32906 else
32907 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
32908 code, SImode, complement_final_p,
32909 complement_op1_p, complement_op2_p);
32910 }
32911
32912 return;
32913 }
32914
32915 /* Split the insns that make up boolean operations operating on multiple GPR
32916 registers. The boolean MD patterns ensure that the inputs either are
32917 exactly the same as the output registers, or there is no overlap.
32918
32919 OPERANDS is an array containing the destination and two input operands.
32920 CODE is the base operation (AND, IOR, XOR, NOT).
32921 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32922 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32923 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32924
32925 void
32926 rs6000_split_logical (rtx operands[3],
32927 enum rtx_code code,
32928 bool complement_final_p,
32929 bool complement_op1_p,
32930 bool complement_op2_p)
32931 {
32932 machine_mode mode = GET_MODE (operands[0]);
32933 machine_mode sub_mode;
32934 rtx op0, op1, op2;
32935 int sub_size, regno0, regno1, nregs, i;
32936
32937 /* If this is DImode, use the specialized version that can run before
32938 register allocation. */
32939 if (mode == DImode && !TARGET_POWERPC64)
32940 {
32941 rs6000_split_logical_di (operands, code, complement_final_p,
32942 complement_op1_p, complement_op2_p);
32943 return;
32944 }
32945
32946 op0 = operands[0];
32947 op1 = operands[1];
32948 op2 = (code == NOT) ? NULL_RTX : operands[2];
32949 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
32950 sub_size = GET_MODE_SIZE (sub_mode);
32951 regno0 = REGNO (op0);
32952 regno1 = REGNO (op1);
32953
32954 gcc_assert (reload_completed);
32955 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
32956 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
32957
32958 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
32959 gcc_assert (nregs > 1);
32960
32961 if (op2 && REG_P (op2))
32962 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
32963
32964 for (i = 0; i < nregs; i++)
32965 {
32966 int offset = i * sub_size;
32967 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
32968 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
32969 rtx sub_op2 = ((code == NOT)
32970 ? NULL_RTX
32971 : simplify_subreg (sub_mode, op2, mode, offset));
32972
32973 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
32974 complement_final_p, complement_op1_p,
32975 complement_op2_p);
32976 }
32977
32978 return;
32979 }
32980
32981 \f
32982 /* Return true if the peephole2 can combine a load involving a combination of
32983 an addis instruction and a load with an offset that can be fused together on
32984 a power8. */
32985
32986 bool
32987 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
32988 rtx addis_value, /* addis value. */
32989 rtx target, /* target register that is loaded. */
32990 rtx mem) /* bottom part of the memory addr. */
32991 {
32992 rtx addr;
32993 rtx base_reg;
32994
32995 /* Validate arguments. */
32996 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
32997 return false;
32998
32999 if (!base_reg_operand (target, GET_MODE (target)))
33000 return false;
33001
33002 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33003 return false;
33004
33005 /* Allow sign/zero extension. */
33006 if (GET_CODE (mem) == ZERO_EXTEND
33007 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33008 mem = XEXP (mem, 0);
33009
33010 if (!MEM_P (mem))
33011 return false;
33012
33013 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33014 return false;
33015
33016 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33017 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33018 return false;
33019
33020 /* Validate that the register used to load the high value is either the
33021 register being loaded, or we can safely replace its use.
33022
33023 This function is only called from the peephole2 pass and we assume that
33024 there are 2 instructions in the peephole (addis and load), so we want to
33025 check if the target register was not used in the memory address and the
33026 register to hold the addis result is dead after the peephole. */
33027 if (REGNO (addis_reg) != REGNO (target))
33028 {
33029 if (reg_mentioned_p (target, mem))
33030 return false;
33031
33032 if (!peep2_reg_dead_p (2, addis_reg))
33033 return false;
33034
33035 /* If the target register being loaded is the stack pointer, we must
33036 avoid loading any other value into it, even temporarily. */
33037 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33038 return false;
33039 }
33040
33041 base_reg = XEXP (addr, 0);
33042 return REGNO (addis_reg) == REGNO (base_reg);
33043 }
33044
33045 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33046 sequence. We adjust the addis register to use the target register. If the
33047 load sign extends, we adjust the code to do the zero extending load, and an
33048 explicit sign extension later since the fusion only covers zero extending
33049 loads.
33050
33051 The operands are:
33052 operands[0] register set with addis (to be replaced with target)
33053 operands[1] value set via addis
33054 operands[2] target register being loaded
33055 operands[3] D-form memory reference using operands[0]. */
33056
33057 void
33058 expand_fusion_gpr_load (rtx *operands)
33059 {
33060 rtx addis_value = operands[1];
33061 rtx target = operands[2];
33062 rtx orig_mem = operands[3];
33063 rtx new_addr, new_mem, orig_addr, offset;
33064 enum rtx_code plus_or_lo_sum;
33065 machine_mode target_mode = GET_MODE (target);
33066 machine_mode extend_mode = target_mode;
33067 machine_mode ptr_mode = Pmode;
33068 enum rtx_code extend = UNKNOWN;
33069
33070 if (GET_CODE (orig_mem) == ZERO_EXTEND
33071 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33072 {
33073 extend = GET_CODE (orig_mem);
33074 orig_mem = XEXP (orig_mem, 0);
33075 target_mode = GET_MODE (orig_mem);
33076 }
33077
33078 gcc_assert (MEM_P (orig_mem));
33079
33080 orig_addr = XEXP (orig_mem, 0);
33081 plus_or_lo_sum = GET_CODE (orig_addr);
33082 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33083
33084 offset = XEXP (orig_addr, 1);
33085 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33086 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33087
33088 if (extend != UNKNOWN)
33089 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33090
33091 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33092 UNSPEC_FUSION_GPR);
33093 emit_insn (gen_rtx_SET (target, new_mem));
33094
33095 if (extend == SIGN_EXTEND)
33096 {
33097 int sub_off = ((BYTES_BIG_ENDIAN)
33098 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33099 : 0);
33100 rtx sign_reg
33101 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33102
33103 emit_insn (gen_rtx_SET (target,
33104 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33105 }
33106
33107 return;
33108 }
33109
33110 /* Emit the addis instruction that will be part of a fused instruction
33111 sequence. */
33112
33113 void
33114 emit_fusion_addis (rtx target, rtx addis_value)
33115 {
33116 rtx fuse_ops[10];
33117 const char *addis_str = NULL;
33118
33119 /* Emit the addis instruction. */
33120 fuse_ops[0] = target;
33121 if (satisfies_constraint_L (addis_value))
33122 {
33123 fuse_ops[1] = addis_value;
33124 addis_str = "lis %0,%v1";
33125 }
33126
33127 else if (GET_CODE (addis_value) == PLUS)
33128 {
33129 rtx op0 = XEXP (addis_value, 0);
33130 rtx op1 = XEXP (addis_value, 1);
33131
33132 if (REG_P (op0) && CONST_INT_P (op1)
33133 && satisfies_constraint_L (op1))
33134 {
33135 fuse_ops[1] = op0;
33136 fuse_ops[2] = op1;
33137 addis_str = "addis %0,%1,%v2";
33138 }
33139 }
33140
33141 else if (GET_CODE (addis_value) == HIGH)
33142 {
33143 rtx value = XEXP (addis_value, 0);
33144 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33145 {
33146 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33147 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33148 if (TARGET_ELF)
33149 addis_str = "addis %0,%2,%1@toc@ha";
33150
33151 else if (TARGET_XCOFF)
33152 addis_str = "addis %0,%1@u(%2)";
33153
33154 else
33155 gcc_unreachable ();
33156 }
33157
33158 else if (GET_CODE (value) == PLUS)
33159 {
33160 rtx op0 = XEXP (value, 0);
33161 rtx op1 = XEXP (value, 1);
33162
33163 if (GET_CODE (op0) == UNSPEC
33164 && XINT (op0, 1) == UNSPEC_TOCREL
33165 && CONST_INT_P (op1))
33166 {
33167 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33168 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33169 fuse_ops[3] = op1;
33170 if (TARGET_ELF)
33171 addis_str = "addis %0,%2,%1+%3@toc@ha";
33172
33173 else if (TARGET_XCOFF)
33174 addis_str = "addis %0,%1+%3@u(%2)";
33175
33176 else
33177 gcc_unreachable ();
33178 }
33179 }
33180
33181 else if (satisfies_constraint_L (value))
33182 {
33183 fuse_ops[1] = value;
33184 addis_str = "lis %0,%v1";
33185 }
33186
33187 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33188 {
33189 fuse_ops[1] = value;
33190 addis_str = "lis %0,%1@ha";
33191 }
33192 }
33193
33194 if (!addis_str)
33195 fatal_insn ("Could not generate addis value for fusion", addis_value);
33196
33197 output_asm_insn (addis_str, fuse_ops);
33198 }
33199
33200 /* Emit a D-form load or store instruction that is the second instruction
33201 of a fusion sequence. */
33202
33203 static void
33204 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
33205 {
33206 rtx fuse_ops[10];
33207 char insn_template[80];
33208
33209 fuse_ops[0] = load_reg;
33210 fuse_ops[1] = addis_reg;
33211
33212 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
33213 {
33214 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
33215 fuse_ops[2] = offset;
33216 output_asm_insn (insn_template, fuse_ops);
33217 }
33218
33219 else if (GET_CODE (offset) == UNSPEC
33220 && XINT (offset, 1) == UNSPEC_TOCREL)
33221 {
33222 if (TARGET_ELF)
33223 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
33224
33225 else if (TARGET_XCOFF)
33226 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33227
33228 else
33229 gcc_unreachable ();
33230
33231 fuse_ops[2] = XVECEXP (offset, 0, 0);
33232 output_asm_insn (insn_template, fuse_ops);
33233 }
33234
33235 else if (GET_CODE (offset) == PLUS
33236 && GET_CODE (XEXP (offset, 0)) == UNSPEC
33237 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
33238 && CONST_INT_P (XEXP (offset, 1)))
33239 {
33240 rtx tocrel_unspec = XEXP (offset, 0);
33241 if (TARGET_ELF)
33242 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
33243
33244 else if (TARGET_XCOFF)
33245 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
33246
33247 else
33248 gcc_unreachable ();
33249
33250 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
33251 fuse_ops[3] = XEXP (offset, 1);
33252 output_asm_insn (insn_template, fuse_ops);
33253 }
33254
33255 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
33256 {
33257 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
33258
33259 fuse_ops[2] = offset;
33260 output_asm_insn (insn_template, fuse_ops);
33261 }
33262
33263 else
33264 fatal_insn ("Unable to generate load/store offset for fusion", offset);
33265
33266 return;
33267 }
33268
33269 /* Given an address, convert it into the addis and load offset parts. Addresses
33270 created during the peephole2 process look like:
33271 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
33272 (unspec [(...)] UNSPEC_TOCREL)) */
33273
33274 static void
33275 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
33276 {
33277 rtx hi, lo;
33278
33279 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
33280 {
33281 hi = XEXP (addr, 0);
33282 lo = XEXP (addr, 1);
33283 }
33284 else
33285 gcc_unreachable ();
33286
33287 *p_hi = hi;
33288 *p_lo = lo;
33289 }
33290
33291 /* Return a string to fuse an addis instruction with a gpr load to the same
33292 register that we loaded up the addis instruction. The address that is used
33293 is the logical address that was formed during peephole2:
33294 (lo_sum (high) (low-part))
33295
33296 The code is complicated, so we call output_asm_insn directly, and just
33297 return "". */
33298
33299 const char *
33300 emit_fusion_gpr_load (rtx target, rtx mem)
33301 {
33302 rtx addis_value;
33303 rtx addr;
33304 rtx load_offset;
33305 const char *load_str = NULL;
33306 machine_mode mode;
33307
33308 if (GET_CODE (mem) == ZERO_EXTEND)
33309 mem = XEXP (mem, 0);
33310
33311 gcc_assert (REG_P (target) && MEM_P (mem));
33312
33313 addr = XEXP (mem, 0);
33314 fusion_split_address (addr, &addis_value, &load_offset);
33315
33316 /* Now emit the load instruction to the same register. */
33317 mode = GET_MODE (mem);
33318 switch (mode)
33319 {
33320 case E_QImode:
33321 load_str = "lbz";
33322 break;
33323
33324 case E_HImode:
33325 load_str = "lhz";
33326 break;
33327
33328 case E_SImode:
33329 case E_SFmode:
33330 load_str = "lwz";
33331 break;
33332
33333 case E_DImode:
33334 case E_DFmode:
33335 gcc_assert (TARGET_POWERPC64);
33336 load_str = "ld";
33337 break;
33338
33339 default:
33340 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
33341 }
33342
33343 /* Emit the addis instruction. */
33344 emit_fusion_addis (target, addis_value);
33345
33346 /* Emit the D-form load instruction. */
33347 emit_fusion_load (target, target, load_offset, load_str);
33348
33349 return "";
33350 }
33351 \f
33352
33353 #ifdef RS6000_GLIBC_ATOMIC_FENV
33354 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
33355 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
33356 #endif
33357
33358 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
33359
33360 static void
33361 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
33362 {
33363 if (!TARGET_HARD_FLOAT)
33364 {
33365 #ifdef RS6000_GLIBC_ATOMIC_FENV
33366 if (atomic_hold_decl == NULL_TREE)
33367 {
33368 atomic_hold_decl
33369 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33370 get_identifier ("__atomic_feholdexcept"),
33371 build_function_type_list (void_type_node,
33372 double_ptr_type_node,
33373 NULL_TREE));
33374 TREE_PUBLIC (atomic_hold_decl) = 1;
33375 DECL_EXTERNAL (atomic_hold_decl) = 1;
33376 }
33377
33378 if (atomic_clear_decl == NULL_TREE)
33379 {
33380 atomic_clear_decl
33381 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33382 get_identifier ("__atomic_feclearexcept"),
33383 build_function_type_list (void_type_node,
33384 NULL_TREE));
33385 TREE_PUBLIC (atomic_clear_decl) = 1;
33386 DECL_EXTERNAL (atomic_clear_decl) = 1;
33387 }
33388
33389 tree const_double = build_qualified_type (double_type_node,
33390 TYPE_QUAL_CONST);
33391 tree const_double_ptr = build_pointer_type (const_double);
33392 if (atomic_update_decl == NULL_TREE)
33393 {
33394 atomic_update_decl
33395 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
33396 get_identifier ("__atomic_feupdateenv"),
33397 build_function_type_list (void_type_node,
33398 const_double_ptr,
33399 NULL_TREE));
33400 TREE_PUBLIC (atomic_update_decl) = 1;
33401 DECL_EXTERNAL (atomic_update_decl) = 1;
33402 }
33403
33404 tree fenv_var = create_tmp_var_raw (double_type_node);
33405 TREE_ADDRESSABLE (fenv_var) = 1;
33406 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
33407
33408 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
33409 *clear = build_call_expr (atomic_clear_decl, 0);
33410 *update = build_call_expr (atomic_update_decl, 1,
33411 fold_convert (const_double_ptr, fenv_addr));
33412 #endif
33413 return;
33414 }
33415
33416 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
33417 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
33418 tree call_mffs = build_call_expr (mffs, 0);
33419
33420 /* Generates the equivalent of feholdexcept (&fenv_var)
33421
33422 *fenv_var = __builtin_mffs ();
33423 double fenv_hold;
33424 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
33425 __builtin_mtfsf (0xff, fenv_hold); */
33426
33427 /* Mask to clear everything except for the rounding modes and non-IEEE
33428 arithmetic flag. */
33429 const unsigned HOST_WIDE_INT hold_exception_mask =
33430 HOST_WIDE_INT_C (0xffffffff00000007);
33431
33432 tree fenv_var = create_tmp_var_raw (double_type_node);
33433
33434 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
33435
33436 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
33437 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33438 build_int_cst (uint64_type_node,
33439 hold_exception_mask));
33440
33441 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33442 fenv_llu_and);
33443
33444 tree hold_mtfsf = build_call_expr (mtfsf, 2,
33445 build_int_cst (unsigned_type_node, 0xff),
33446 fenv_hold_mtfsf);
33447
33448 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
33449
33450 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
33451
33452 double fenv_clear = __builtin_mffs ();
33453 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
33454 __builtin_mtfsf (0xff, fenv_clear); */
33455
33456 /* Mask to clear everything except for the rounding modes and non-IEEE
33457 arithmetic flag. */
33458 const unsigned HOST_WIDE_INT clear_exception_mask =
33459 HOST_WIDE_INT_C (0xffffffff00000000);
33460
33461 tree fenv_clear = create_tmp_var_raw (double_type_node);
33462
33463 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
33464
33465 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
33466 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
33467 fenv_clean_llu,
33468 build_int_cst (uint64_type_node,
33469 clear_exception_mask));
33470
33471 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33472 fenv_clear_llu_and);
33473
33474 tree clear_mtfsf = build_call_expr (mtfsf, 2,
33475 build_int_cst (unsigned_type_node, 0xff),
33476 fenv_clear_mtfsf);
33477
33478 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
33479
33480 /* Generates the equivalent of feupdateenv (&fenv_var)
33481
33482 double old_fenv = __builtin_mffs ();
33483 double fenv_update;
33484 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
33485 (*(uint64_t*)fenv_var 0x1ff80fff);
33486 __builtin_mtfsf (0xff, fenv_update); */
33487
33488 const unsigned HOST_WIDE_INT update_exception_mask =
33489 HOST_WIDE_INT_C (0xffffffff1fffff00);
33490 const unsigned HOST_WIDE_INT new_exception_mask =
33491 HOST_WIDE_INT_C (0x1ff80fff);
33492
33493 tree old_fenv = create_tmp_var_raw (double_type_node);
33494 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
33495
33496 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
33497 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
33498 build_int_cst (uint64_type_node,
33499 update_exception_mask));
33500
33501 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
33502 build_int_cst (uint64_type_node,
33503 new_exception_mask));
33504
33505 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
33506 old_llu_and, new_llu_and);
33507
33508 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
33509 new_llu_mask);
33510
33511 tree update_mtfsf = build_call_expr (mtfsf, 2,
33512 build_int_cst (unsigned_type_node, 0xff),
33513 fenv_update_mtfsf);
33514
33515 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
33516 }
33517
33518 void
33519 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
33520 {
33521 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33522
33523 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33524 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33525
33526 /* The destination of the vmrgew instruction layout is:
33527 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33528 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33529 vmrgew instruction will be correct. */
33530 if (BYTES_BIG_ENDIAN)
33531 {
33532 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
33533 GEN_INT (0)));
33534 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
33535 GEN_INT (3)));
33536 }
33537 else
33538 {
33539 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
33540 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
33541 }
33542
33543 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33544 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33545
33546 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
33547 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
33548
33549 if (BYTES_BIG_ENDIAN)
33550 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33551 else
33552 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33553 }
33554
33555 void
33556 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
33557 {
33558 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33559
33560 rtx_tmp0 = gen_reg_rtx (V2DImode);
33561 rtx_tmp1 = gen_reg_rtx (V2DImode);
33562
33563 /* The destination of the vmrgew instruction layout is:
33564 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
33565 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
33566 vmrgew instruction will be correct. */
33567 if (BYTES_BIG_ENDIAN)
33568 {
33569 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
33570 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
33571 }
33572 else
33573 {
33574 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
33575 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
33576 }
33577
33578 rtx_tmp2 = gen_reg_rtx (V4SFmode);
33579 rtx_tmp3 = gen_reg_rtx (V4SFmode);
33580
33581 if (signed_convert)
33582 {
33583 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
33584 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
33585 }
33586 else
33587 {
33588 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
33589 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
33590 }
33591
33592 if (BYTES_BIG_ENDIAN)
33593 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
33594 else
33595 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
33596 }
33597
33598 void
33599 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
33600 rtx src2)
33601 {
33602 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
33603
33604 rtx_tmp0 = gen_reg_rtx (V2DFmode);
33605 rtx_tmp1 = gen_reg_rtx (V2DFmode);
33606
33607 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
33608 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
33609
33610 rtx_tmp2 = gen_reg_rtx (V4SImode);
33611 rtx_tmp3 = gen_reg_rtx (V4SImode);
33612
33613 if (signed_convert)
33614 {
33615 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
33616 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
33617 }
33618 else
33619 {
33620 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
33621 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
33622 }
33623
33624 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
33625 }
33626
33627 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
33628
33629 static bool
33630 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
33631 optimization_type opt_type)
33632 {
33633 switch (op)
33634 {
33635 case rsqrt_optab:
33636 return (opt_type == OPTIMIZE_FOR_SPEED
33637 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
33638
33639 default:
33640 return true;
33641 }
33642 }
33643
33644 /* Implement TARGET_CONSTANT_ALIGNMENT. */
33645
33646 static HOST_WIDE_INT
33647 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33648 {
33649 if (TREE_CODE (exp) == STRING_CST
33650 && (STRICT_ALIGNMENT || !optimize_size))
33651 return MAX (align, BITS_PER_WORD);
33652 return align;
33653 }
33654
33655 /* Implement TARGET_STARTING_FRAME_OFFSET. */
33656
33657 static HOST_WIDE_INT
33658 rs6000_starting_frame_offset (void)
33659 {
33660 if (FRAME_GROWS_DOWNWARD)
33661 return 0;
33662 return RS6000_STARTING_FRAME_OFFSET;
33663 }
33664 \f
33665
33666 /* Create an alias for a mangled name where we have changed the mangling (in
33667 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
33668 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
33669
33670 #if TARGET_ELF && RS6000_WEAK
33671 static void
33672 rs6000_globalize_decl_name (FILE * stream, tree decl)
33673 {
33674 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33675
33676 targetm.asm_out.globalize_label (stream, name);
33677
33678 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
33679 {
33680 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
33681 const char *old_name;
33682
33683 ieee128_mangling_gcc_8_1 = true;
33684 lang_hooks.set_decl_assembler_name (decl);
33685 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33686 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
33687 ieee128_mangling_gcc_8_1 = false;
33688
33689 if (strcmp (name, old_name) != 0)
33690 {
33691 fprintf (stream, "\t.weak %s\n", old_name);
33692 fprintf (stream, "\t.set %s,%s\n", old_name, name);
33693 }
33694 }
33695 }
33696 #endif
33697
33698 \f
33699 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
33700 function names from <foo>l to <foo>f128 if the default long double type is
33701 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
33702 include file switches the names on systems that support long double as IEEE
33703 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
33704 In the future, glibc will export names like __ieee128_sinf128 and we can
33705 switch to using those instead of using sinf128, which pollutes the user's
33706 namespace.
33707
33708 This will switch the names for Fortran math functions as well (which doesn't
33709 use math.h). However, Fortran needs other changes to the compiler and
33710 library before you can switch the real*16 type at compile time.
33711
33712 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
33713 only do this if the default is that long double is IBM extended double, and
33714 the user asked for IEEE 128-bit. */
33715
33716 static tree
33717 rs6000_mangle_decl_assembler_name (tree decl, tree id)
33718 {
33719 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
33720 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
33721 {
33722 size_t len = IDENTIFIER_LENGTH (id);
33723 const char *name = IDENTIFIER_POINTER (id);
33724
33725 if (name[len - 1] == 'l')
33726 {
33727 bool uses_ieee128_p = false;
33728 tree type = TREE_TYPE (decl);
33729 machine_mode ret_mode = TYPE_MODE (type);
33730
33731 /* See if the function returns a IEEE 128-bit floating point type or
33732 complex type. */
33733 if (ret_mode == TFmode || ret_mode == TCmode)
33734 uses_ieee128_p = true;
33735 else
33736 {
33737 function_args_iterator args_iter;
33738 tree arg;
33739
33740 /* See if the function passes a IEEE 128-bit floating point type
33741 or complex type. */
33742 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
33743 {
33744 machine_mode arg_mode = TYPE_MODE (arg);
33745 if (arg_mode == TFmode || arg_mode == TCmode)
33746 {
33747 uses_ieee128_p = true;
33748 break;
33749 }
33750 }
33751 }
33752
33753 /* If we passed or returned an IEEE 128-bit floating point type,
33754 change the name. */
33755 if (uses_ieee128_p)
33756 {
33757 char *name2 = (char *) alloca (len + 4);
33758 memcpy (name2, name, len - 1);
33759 strcpy (name2 + len - 1, "f128");
33760 id = get_identifier (name2);
33761 }
33762 }
33763 }
33764
33765 return id;
33766 }
33767
33768 /* Predict whether the given loop in gimple will be transformed in the RTL
33769 doloop_optimize pass. */
33770
33771 static bool
33772 rs6000_predict_doloop_p (struct loop *loop)
33773 {
33774 gcc_assert (loop);
33775
33776 /* On rs6000, targetm.can_use_doloop_p is actually
33777 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
33778 if (loop->inner != NULL)
33779 {
33780 if (dump_file && (dump_flags & TDF_DETAILS))
33781 fprintf (dump_file, "Predict doloop failure due to"
33782 " loop nesting.\n");
33783 return false;
33784 }
33785
33786 return true;
33787 }
33788
33789 struct gcc_target targetm = TARGET_INITIALIZER;
33790
33791 #include "gt-rs6000.h"