]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
rs6000: allow cunroll to grow size according to -funroll-loop or -fpeel-loops
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
83
84 /* This file should be included last. */
85 #include "target-def.h"
86
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
100
101 /* Don't enable PC-relative addressing if the target does not support it. */
102 #ifndef PCREL_SUPPORTED_BY_OS
103 #define PCREL_SUPPORTED_BY_OS 0
104 #endif
105
106 /* Support targetm.vectorize.builtin_mask_for_load. */
107 tree altivec_builtin_mask_for_load;
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 #if TARGET_ELF
123 /* Note whether IEEE 128-bit floating point was passed or returned, either as
124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
125 floating point. We changed the default C++ mangling for these types and we
126 may want to generate a weak alias of the old mangling (U10__float128) to the
127 new mangling (u9__ieee128). */
128 bool rs6000_passes_ieee128 = false;
129 #endif
130
131 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
132 name used in current releases (i.e. u9__ieee128). */
133 static bool ieee128_mangling_gcc_8_1;
134
135 /* Width in bits of a pointer. */
136 unsigned rs6000_pointer_size;
137
138 #ifdef HAVE_AS_GNU_ATTRIBUTE
139 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
140 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
141 # endif
142 /* Flag whether floating point values have been passed/returned.
143 Note that this doesn't say whether fprs are used, since the
144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
145 should be set for soft-float values passed in gprs and ieee128
146 values passed in vsx registers. */
147 bool rs6000_passes_float = false;
148 bool rs6000_passes_long_double = false;
149 /* Flag whether vector values have been passed/returned. */
150 bool rs6000_passes_vector = false;
151 /* Flag whether small (<= 8 byte) structures have been returned. */
152 bool rs6000_returns_struct = false;
153 #endif
154
155 /* Value is TRUE if register/mode pair is acceptable. */
156 static bool rs6000_hard_regno_mode_ok_p
157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
158
159 /* Maximum number of registers needed for a given register class and mode. */
160 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
161
162 /* How many registers are needed for a given register and mode. */
163 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
164
165 /* Map register number to register class. */
166 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
167
168 static int dbg_cost_ctrl;
169
170 /* Built in types. */
171 tree rs6000_builtin_types[RS6000_BTI_MAX];
172 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
173
174 /* Flag to say the TOC is initialized */
175 int toc_initialized, need_toc_init;
176 char toc_label_name[10];
177
178 /* Cached value of rs6000_variable_issue. This is cached in
179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
180 static short cached_can_issue_more;
181
182 static GTY(()) section *read_only_data_section;
183 static GTY(()) section *private_data_section;
184 static GTY(()) section *tls_data_section;
185 static GTY(()) section *tls_private_data_section;
186 static GTY(()) section *read_only_private_data_section;
187 static GTY(()) section *sdata2_section;
188
189 section *toc_section = 0;
190
191 /* Describe the vector unit used for modes. */
192 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
193 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194
195 /* Register classes for various constraints that are based on the target
196 switches. */
197 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198
199 /* Describe the alignment of a vector. */
200 int rs6000_vector_align[NUM_MACHINE_MODES];
201
202 /* Map selected modes to types for builtins. */
203 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204
205 /* What modes to automatically generate reciprocal divide estimate (fre) and
206 reciprocal sqrt (frsqrte) for. */
207 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208
209 /* Masks to determine which reciprocal esitmate instructions to generate
210 automatically. */
211 enum rs6000_recip_mask {
212 RECIP_SF_DIV = 0x001, /* Use divide estimate */
213 RECIP_DF_DIV = 0x002,
214 RECIP_V4SF_DIV = 0x004,
215 RECIP_V2DF_DIV = 0x008,
216
217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
218 RECIP_DF_RSQRT = 0x020,
219 RECIP_V4SF_RSQRT = 0x040,
220 RECIP_V2DF_RSQRT = 0x080,
221
222 /* Various combination of flags for -mrecip=xxx. */
223 RECIP_NONE = 0,
224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227
228 RECIP_HIGH_PRECISION = RECIP_ALL,
229
230 /* On low precision machines like the power5, don't enable double precision
231 reciprocal square root estimate, since it isn't accurate enough. */
232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
233 };
234
235 /* -mrecip options. */
236 static struct
237 {
238 const char *string; /* option name */
239 unsigned int mask; /* mask bits to set */
240 } recip_options[] = {
241 { "all", RECIP_ALL },
242 { "none", RECIP_NONE },
243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
244 | RECIP_V2DF_DIV) },
245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
248 | RECIP_V2DF_RSQRT) },
249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
251 };
252
253 /* On PowerPC, we have a limited number of target clones that we care about
254 which means we can use an array to hold the options, rather than having more
255 elaborate data structures to identify each possible variation. Order the
256 clones from the default to the highest ISA. */
257 enum {
258 CLONE_DEFAULT = 0, /* default clone. */
259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
262 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
263 CLONE_MAX
264 };
265
266 /* Map compiler ISA bits into HWCAP names. */
267 struct clone_map {
268 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
269 const char *name; /* name to use in __builtin_cpu_supports. */
270 };
271
272 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
273 { 0, "" }, /* Default options. */
274 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
275 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
276 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
277 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
278 };
279
280
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
289
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
299
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
309 };
310
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
319
320
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
324
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
331 };
332
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
343 };
344
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
350 };
351
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
356
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
375 };
376
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
382 {
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
385 }
386
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
390 {
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
393 }
394
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
398 {
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
400 }
401
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
410 }
411
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
420
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
426
427 int
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 {
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
434
435 in_set = single_set (in_insn);
436 if (in_set)
437 {
438 if (MEM_P (SET_DEST (in_set)))
439 {
440 out_set = single_set (out_insn);
441 if (!out_set)
442 {
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
445 {
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 {
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
454 }
455 }
456 }
457 }
458 }
459 else
460 {
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
464
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 {
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
472
473 if (MEM_P (SET_DEST (in_exp)))
474 {
475 out_set = single_set (out_insn);
476 if (!out_set)
477 {
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 {
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
489 }
490 }
491 }
492 }
493 }
494 return store_data_bypass_p (out_insn, in_insn);
495 }
496
497 \f
498 /* Processor costs (relative to an add) */
499
500 const struct processor_costs *rs6000_cost;
501
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
520 };
521
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
540 };
541
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
560 };
561
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
580 };
581
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
600 };
601
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
620 };
621
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
640 };
641
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
660 };
661
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
680 };
681
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
700 };
701
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
720 };
721
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
740 };
741
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
760 };
761
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
780 };
781
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
801 };
802
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
821 };
822
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
841 };
842
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
861 };
862
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
876 32,
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
881 };
882
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
901 };
902
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
921 };
922
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
941 };
942
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
961 };
962
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
981 };
982
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1001 };
1002
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1021 };
1022
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1041 };
1042
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1061 };
1062
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1081 };
1082
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1096 64,
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1101 };
1102
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1105
1106 \f
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1151
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1154
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1158
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1161
1162 const int INSN_NOT_AVAILABLE = -1;
1163
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178
1179 /* Hash table stuff for keeping track of TOC entries. */
1180
1181 struct GTY((for_user)) toc_hash_struct
1182 {
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1188 };
1189
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1191 {
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 };
1195
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197
1198
1199 \f
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1202 {
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1224 };
1225
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1228 {
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1250 };
1251 #endif
1252
1253 /* Table of valid machine attributes. */
1254
1255 static const struct attribute_spec rs6000_attribute_table[] =
1256 {
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 };
1274 \f
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1278 \f
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1286
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1289
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1314
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1319
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1324
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1327
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1340
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1343
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1346
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1349
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1352
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1355
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1358
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1361
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1366
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1369
1370 #undef TARGET_LEGITIMIZE_ADDRESS
1371 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1372
1373 #undef TARGET_SCHED_VARIABLE_ISSUE
1374 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1375
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1378 #undef TARGET_SCHED_ADJUST_COST
1379 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1380 #undef TARGET_SCHED_ADJUST_PRIORITY
1381 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1382 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1383 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1384 #undef TARGET_SCHED_INIT
1385 #define TARGET_SCHED_INIT rs6000_sched_init
1386 #undef TARGET_SCHED_FINISH
1387 #define TARGET_SCHED_FINISH rs6000_sched_finish
1388 #undef TARGET_SCHED_REORDER
1389 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1390 #undef TARGET_SCHED_REORDER2
1391 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1392
1393 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1394 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1395
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1398
1399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1401 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1402 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1403 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1404 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1405 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1406 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1407
1408 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1409 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1410
1411 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1412 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1413 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1414 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1415 rs6000_builtin_support_vector_misalignment
1416 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1417 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1418 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1419 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1420 rs6000_builtin_vectorization_cost
1421 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1422 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1423 rs6000_preferred_simd_mode
1424 #undef TARGET_VECTORIZE_INIT_COST
1425 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1426 #undef TARGET_VECTORIZE_ADD_STMT_COST
1427 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1428 #undef TARGET_VECTORIZE_FINISH_COST
1429 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1430 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1431 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1432
1433 #undef TARGET_LOOP_UNROLL_ADJUST
1434 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1435
1436 #undef TARGET_INIT_BUILTINS
1437 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1438 #undef TARGET_BUILTIN_DECL
1439 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1440
1441 #undef TARGET_FOLD_BUILTIN
1442 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1443 #undef TARGET_GIMPLE_FOLD_BUILTIN
1444 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1445
1446 #undef TARGET_EXPAND_BUILTIN
1447 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1448
1449 #undef TARGET_MANGLE_TYPE
1450 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1451
1452 #undef TARGET_INIT_LIBFUNCS
1453 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1454
1455 #if TARGET_MACHO
1456 #undef TARGET_BINDS_LOCAL_P
1457 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1458 #endif
1459
1460 #undef TARGET_MS_BITFIELD_LAYOUT_P
1461 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1462
1463 #undef TARGET_ASM_OUTPUT_MI_THUNK
1464 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1465
1466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1468
1469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1470 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1471
1472 #undef TARGET_REGISTER_MOVE_COST
1473 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1474 #undef TARGET_MEMORY_MOVE_COST
1475 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1476 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1477 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1478 rs6000_ira_change_pseudo_allocno_class
1479 #undef TARGET_CANNOT_COPY_INSN_P
1480 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1481 #undef TARGET_RTX_COSTS
1482 #define TARGET_RTX_COSTS rs6000_rtx_costs
1483 #undef TARGET_ADDRESS_COST
1484 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1485 #undef TARGET_INSN_COST
1486 #define TARGET_INSN_COST rs6000_insn_cost
1487
1488 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1489 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1490
1491 #undef TARGET_PROMOTE_FUNCTION_MODE
1492 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1493
1494 #undef TARGET_RETURN_IN_MEMORY
1495 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1496
1497 #undef TARGET_RETURN_IN_MSB
1498 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1499
1500 #undef TARGET_SETUP_INCOMING_VARARGS
1501 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1502
1503 /* Always strict argument naming on rs6000. */
1504 #undef TARGET_STRICT_ARGUMENT_NAMING
1505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1508 #undef TARGET_SPLIT_COMPLEX_ARG
1509 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1510 #undef TARGET_MUST_PASS_IN_STACK
1511 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1512 #undef TARGET_PASS_BY_REFERENCE
1513 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1514 #undef TARGET_ARG_PARTIAL_BYTES
1515 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1516 #undef TARGET_FUNCTION_ARG_ADVANCE
1517 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1518 #undef TARGET_FUNCTION_ARG
1519 #define TARGET_FUNCTION_ARG rs6000_function_arg
1520 #undef TARGET_FUNCTION_ARG_PADDING
1521 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1522 #undef TARGET_FUNCTION_ARG_BOUNDARY
1523 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1524
1525 #undef TARGET_BUILD_BUILTIN_VA_LIST
1526 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1527
1528 #undef TARGET_EXPAND_BUILTIN_VA_START
1529 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1530
1531 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1532 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1533
1534 #undef TARGET_EH_RETURN_FILTER_MODE
1535 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1536
1537 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1538 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1539
1540 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1541 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1542
1543 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1544 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1545
1546 #undef TARGET_FLOATN_MODE
1547 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1548
1549 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1550 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1551
1552 #undef TARGET_MD_ASM_ADJUST
1553 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1554
1555 #undef TARGET_OPTION_OVERRIDE
1556 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1557
1558 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1560 rs6000_builtin_vectorized_function
1561
1562 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1564 rs6000_builtin_md_vectorized_function
1565
1566 #undef TARGET_STACK_PROTECT_GUARD
1567 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1568
1569 #if !TARGET_MACHO
1570 #undef TARGET_STACK_PROTECT_FAIL
1571 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1572 #endif
1573
1574 #ifdef HAVE_AS_TLS
1575 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1576 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1577 #endif
1578
1579 /* Use a 32-bit anchor range. This leads to sequences like:
1580
1581 addis tmp,anchor,high
1582 add dest,tmp,low
1583
1584 where tmp itself acts as an anchor, and can be shared between
1585 accesses to the same 64k page. */
1586 #undef TARGET_MIN_ANCHOR_OFFSET
1587 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1588 #undef TARGET_MAX_ANCHOR_OFFSET
1589 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1590 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1591 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1592 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1593 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1594
1595 #undef TARGET_BUILTIN_RECIPROCAL
1596 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1597
1598 #undef TARGET_SECONDARY_RELOAD
1599 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED
1601 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1602 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1603 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1604
1605 #undef TARGET_LEGITIMATE_ADDRESS_P
1606 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1607
1608 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1609 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1610
1611 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1612 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1613
1614 #undef TARGET_CAN_ELIMINATE
1615 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1616
1617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1618 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1619
1620 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1621 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1622
1623 #undef TARGET_TRAMPOLINE_INIT
1624 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1625
1626 #undef TARGET_FUNCTION_VALUE
1627 #define TARGET_FUNCTION_VALUE rs6000_function_value
1628
1629 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1630 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1631
1632 #undef TARGET_OPTION_SAVE
1633 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1634
1635 #undef TARGET_OPTION_RESTORE
1636 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1637
1638 #undef TARGET_OPTION_PRINT
1639 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1640
1641 #undef TARGET_CAN_INLINE_P
1642 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1643
1644 #undef TARGET_SET_CURRENT_FUNCTION
1645 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1646
1647 #undef TARGET_LEGITIMATE_CONSTANT_P
1648 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1649
1650 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1651 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1652
1653 #undef TARGET_CAN_USE_DOLOOP_P
1654 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1655
1656 #undef TARGET_PREDICT_DOLOOP_P
1657 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1658
1659 #undef TARGET_HAVE_COUNT_REG_DECR_P
1660 #define TARGET_HAVE_COUNT_REG_DECR_P true
1661
1662 /* 1000000000 is infinite cost in IVOPTs. */
1663 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1664 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1665
1666 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1667 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1668
1669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1671
1672 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1673 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1674 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1675 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1676 #undef TARGET_UNWIND_WORD_MODE
1677 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1678
1679 #undef TARGET_OFFLOAD_OPTIONS
1680 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1681
1682 #undef TARGET_C_MODE_FOR_SUFFIX
1683 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1684
1685 #undef TARGET_INVALID_BINARY_OP
1686 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1687
1688 #undef TARGET_OPTAB_SUPPORTED_P
1689 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1690
1691 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1692 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1693
1694 #undef TARGET_COMPARE_VERSION_PRIORITY
1695 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1696
1697 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1698 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1699 rs6000_generate_version_dispatcher_body
1700
1701 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1702 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1703 rs6000_get_function_versions_dispatcher
1704
1705 #undef TARGET_OPTION_FUNCTION_VERSIONS
1706 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1707
1708 #undef TARGET_HARD_REGNO_NREGS
1709 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1710 #undef TARGET_HARD_REGNO_MODE_OK
1711 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1712
1713 #undef TARGET_MODES_TIEABLE_P
1714 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1715
1716 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1717 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1718 rs6000_hard_regno_call_part_clobbered
1719
1720 #undef TARGET_SLOW_UNALIGNED_ACCESS
1721 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1722
1723 #undef TARGET_CAN_CHANGE_MODE_CLASS
1724 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1725
1726 #undef TARGET_CONSTANT_ALIGNMENT
1727 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1728
1729 #undef TARGET_STARTING_FRAME_OFFSET
1730 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1731
1732 #if TARGET_ELF && RS6000_WEAK
1733 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1734 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1735 #endif
1736
1737 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1738 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1739
1740 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1741 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1742
1743 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1744 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1745 rs6000_cannot_substitute_mem_equiv_p
1746 \f
1747
1748 /* Processor table. */
1749 struct rs6000_ptt
1750 {
1751 const char *const name; /* Canonical processor name. */
1752 const enum processor_type processor; /* Processor type enum value. */
1753 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1754 };
1755
1756 static struct rs6000_ptt const processor_target_table[] =
1757 {
1758 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1759 #include "rs6000-cpus.def"
1760 #undef RS6000_CPU
1761 };
1762
1763 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1764 name is invalid. */
1765
1766 static int
1767 rs6000_cpu_name_lookup (const char *name)
1768 {
1769 size_t i;
1770
1771 if (name != NULL)
1772 {
1773 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1774 if (! strcmp (name, processor_target_table[i].name))
1775 return (int)i;
1776 }
1777
1778 return -1;
1779 }
1780
1781 \f
1782 /* Return number of consecutive hard regs needed starting at reg REGNO
1783 to hold something of mode MODE.
1784 This is ordinarily the length in words of a value of mode MODE
1785 but can be less for certain modes in special long registers.
1786
1787 POWER and PowerPC GPRs hold 32 bits worth;
1788 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1789
1790 static int
1791 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1792 {
1793 unsigned HOST_WIDE_INT reg_size;
1794
1795 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1796 128-bit floating point that can go in vector registers, which has VSX
1797 memory addressing. */
1798 if (FP_REGNO_P (regno))
1799 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1800 ? UNITS_PER_VSX_WORD
1801 : UNITS_PER_FP_WORD);
1802
1803 else if (ALTIVEC_REGNO_P (regno))
1804 reg_size = UNITS_PER_ALTIVEC_WORD;
1805
1806 else
1807 reg_size = UNITS_PER_WORD;
1808
1809 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1810 }
1811
1812 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1813 MODE. */
1814 static int
1815 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1816 {
1817 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1818
1819 if (COMPLEX_MODE_P (mode))
1820 mode = GET_MODE_INNER (mode);
1821
1822 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1823 register combinations, and use PTImode where we need to deal with quad
1824 word memory operations. Don't allow quad words in the argument or frame
1825 pointer registers, just registers 0..31. */
1826 if (mode == PTImode)
1827 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1828 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1829 && ((regno & 1) == 0));
1830
1831 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1832 implementations. Don't allow an item to be split between a FP register
1833 and an Altivec register. Allow TImode in all VSX registers if the user
1834 asked for it. */
1835 if (TARGET_VSX && VSX_REGNO_P (regno)
1836 && (VECTOR_MEM_VSX_P (mode)
1837 || FLOAT128_VECTOR_P (mode)
1838 || reg_addr[mode].scalar_in_vmx_p
1839 || mode == TImode
1840 || (TARGET_VADDUQM && mode == V1TImode)))
1841 {
1842 if (FP_REGNO_P (regno))
1843 return FP_REGNO_P (last_regno);
1844
1845 if (ALTIVEC_REGNO_P (regno))
1846 {
1847 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1848 return 0;
1849
1850 return ALTIVEC_REGNO_P (last_regno);
1851 }
1852 }
1853
1854 /* The GPRs can hold any mode, but values bigger than one register
1855 cannot go past R31. */
1856 if (INT_REGNO_P (regno))
1857 return INT_REGNO_P (last_regno);
1858
1859 /* The float registers (except for VSX vector modes) can only hold floating
1860 modes and DImode. */
1861 if (FP_REGNO_P (regno))
1862 {
1863 if (FLOAT128_VECTOR_P (mode))
1864 return false;
1865
1866 if (SCALAR_FLOAT_MODE_P (mode)
1867 && (mode != TDmode || (regno % 2) == 0)
1868 && FP_REGNO_P (last_regno))
1869 return 1;
1870
1871 if (GET_MODE_CLASS (mode) == MODE_INT)
1872 {
1873 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1874 return 1;
1875
1876 if (TARGET_P8_VECTOR && (mode == SImode))
1877 return 1;
1878
1879 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1880 return 1;
1881 }
1882
1883 return 0;
1884 }
1885
1886 /* The CR register can only hold CC modes. */
1887 if (CR_REGNO_P (regno))
1888 return GET_MODE_CLASS (mode) == MODE_CC;
1889
1890 if (CA_REGNO_P (regno))
1891 return mode == Pmode || mode == SImode;
1892
1893 /* AltiVec only in AldyVec registers. */
1894 if (ALTIVEC_REGNO_P (regno))
1895 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1896 || mode == V1TImode);
1897
1898 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1899 and it must be able to fit within the register set. */
1900
1901 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1902 }
1903
1904 /* Implement TARGET_HARD_REGNO_NREGS. */
1905
1906 static unsigned int
1907 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1908 {
1909 return rs6000_hard_regno_nregs[mode][regno];
1910 }
1911
1912 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1913
1914 static bool
1915 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1916 {
1917 return rs6000_hard_regno_mode_ok_p[mode][regno];
1918 }
1919
1920 /* Implement TARGET_MODES_TIEABLE_P.
1921
1922 PTImode cannot tie with other modes because PTImode is restricted to even
1923 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1924 57744).
1925
1926 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1927 128-bit floating point on VSX systems ties with other vectors. */
1928
1929 static bool
1930 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1931 {
1932 if (mode1 == PTImode)
1933 return mode2 == PTImode;
1934 if (mode2 == PTImode)
1935 return false;
1936
1937 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1938 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1939 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1940 return false;
1941
1942 if (SCALAR_FLOAT_MODE_P (mode1))
1943 return SCALAR_FLOAT_MODE_P (mode2);
1944 if (SCALAR_FLOAT_MODE_P (mode2))
1945 return false;
1946
1947 if (GET_MODE_CLASS (mode1) == MODE_CC)
1948 return GET_MODE_CLASS (mode2) == MODE_CC;
1949 if (GET_MODE_CLASS (mode2) == MODE_CC)
1950 return false;
1951
1952 return true;
1953 }
1954
1955 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1956
1957 static bool
1958 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1959 machine_mode mode)
1960 {
1961 if (TARGET_32BIT
1962 && TARGET_POWERPC64
1963 && GET_MODE_SIZE (mode) > 4
1964 && INT_REGNO_P (regno))
1965 return true;
1966
1967 if (TARGET_VSX
1968 && FP_REGNO_P (regno)
1969 && GET_MODE_SIZE (mode) > 8
1970 && !FLOAT128_2REG_P (mode))
1971 return true;
1972
1973 return false;
1974 }
1975
1976 /* Print interesting facts about registers. */
1977 static void
1978 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1979 {
1980 int r, m;
1981
1982 for (r = first_regno; r <= last_regno; ++r)
1983 {
1984 const char *comma = "";
1985 int len;
1986
1987 if (first_regno == last_regno)
1988 fprintf (stderr, "%s:\t", reg_name);
1989 else
1990 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1991
1992 len = 8;
1993 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1994 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1995 {
1996 if (len > 70)
1997 {
1998 fprintf (stderr, ",\n\t");
1999 len = 8;
2000 comma = "";
2001 }
2002
2003 if (rs6000_hard_regno_nregs[m][r] > 1)
2004 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2005 rs6000_hard_regno_nregs[m][r]);
2006 else
2007 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2008
2009 comma = ", ";
2010 }
2011
2012 if (call_used_or_fixed_reg_p (r))
2013 {
2014 if (len > 70)
2015 {
2016 fprintf (stderr, ",\n\t");
2017 len = 8;
2018 comma = "";
2019 }
2020
2021 len += fprintf (stderr, "%s%s", comma, "call-used");
2022 comma = ", ";
2023 }
2024
2025 if (fixed_regs[r])
2026 {
2027 if (len > 70)
2028 {
2029 fprintf (stderr, ",\n\t");
2030 len = 8;
2031 comma = "";
2032 }
2033
2034 len += fprintf (stderr, "%s%s", comma, "fixed");
2035 comma = ", ";
2036 }
2037
2038 if (len > 70)
2039 {
2040 fprintf (stderr, ",\n\t");
2041 comma = "";
2042 }
2043
2044 len += fprintf (stderr, "%sreg-class = %s", comma,
2045 reg_class_names[(int)rs6000_regno_regclass[r]]);
2046 comma = ", ";
2047
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2052 }
2053
2054 fprintf (stderr, "%sregno = %d\n", comma, r);
2055 }
2056 }
2057
2058 static const char *
2059 rs6000_debug_vector_unit (enum rs6000_vector v)
2060 {
2061 const char *ret;
2062
2063 switch (v)
2064 {
2065 case VECTOR_NONE: ret = "none"; break;
2066 case VECTOR_ALTIVEC: ret = "altivec"; break;
2067 case VECTOR_VSX: ret = "vsx"; break;
2068 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2069 default: ret = "unknown"; break;
2070 }
2071
2072 return ret;
2073 }
2074
2075 /* Inner function printing just the address mask for a particular reload
2076 register class. */
2077 DEBUG_FUNCTION char *
2078 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2079 {
2080 static char ret[8];
2081 char *p = ret;
2082
2083 if ((mask & RELOAD_REG_VALID) != 0)
2084 *p++ = 'v';
2085 else if (keep_spaces)
2086 *p++ = ' ';
2087
2088 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2089 *p++ = 'm';
2090 else if (keep_spaces)
2091 *p++ = ' ';
2092
2093 if ((mask & RELOAD_REG_INDEXED) != 0)
2094 *p++ = 'i';
2095 else if (keep_spaces)
2096 *p++ = ' ';
2097
2098 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2099 *p++ = 'O';
2100 else if ((mask & RELOAD_REG_OFFSET) != 0)
2101 *p++ = 'o';
2102 else if (keep_spaces)
2103 *p++ = ' ';
2104
2105 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2106 *p++ = '+';
2107 else if (keep_spaces)
2108 *p++ = ' ';
2109
2110 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2111 *p++ = '+';
2112 else if (keep_spaces)
2113 *p++ = ' ';
2114
2115 if ((mask & RELOAD_REG_AND_M16) != 0)
2116 *p++ = '&';
2117 else if (keep_spaces)
2118 *p++ = ' ';
2119
2120 *p = '\0';
2121
2122 return ret;
2123 }
2124
2125 /* Print the address masks in a human readble fashion. */
2126 DEBUG_FUNCTION void
2127 rs6000_debug_print_mode (ssize_t m)
2128 {
2129 ssize_t rc;
2130 int spaces = 0;
2131
2132 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2133 for (rc = 0; rc < N_RELOAD_REG; rc++)
2134 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2135 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2136
2137 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2138 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2139 {
2140 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2141 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2142 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2143 spaces = 0;
2144 }
2145 else
2146 spaces += strlen (" Reload=sl");
2147
2148 if (reg_addr[m].scalar_in_vmx_p)
2149 {
2150 fprintf (stderr, "%*s Upper=y", spaces, "");
2151 spaces = 0;
2152 }
2153 else
2154 spaces += strlen (" Upper=y");
2155
2156 if (rs6000_vector_unit[m] != VECTOR_NONE
2157 || rs6000_vector_mem[m] != VECTOR_NONE)
2158 {
2159 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2160 spaces, "",
2161 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2162 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2163 }
2164
2165 fputs ("\n", stderr);
2166 }
2167
2168 #define DEBUG_FMT_ID "%-32s= "
2169 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2170 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2171 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2172
2173 /* Print various interesting information with -mdebug=reg. */
2174 static void
2175 rs6000_debug_reg_global (void)
2176 {
2177 static const char *const tf[2] = { "false", "true" };
2178 const char *nl = (const char *)0;
2179 int m;
2180 size_t m1, m2, v;
2181 char costly_num[20];
2182 char nop_num[20];
2183 char flags_buffer[40];
2184 const char *costly_str;
2185 const char *nop_str;
2186 const char *trace_str;
2187 const char *abi_str;
2188 const char *cmodel_str;
2189 struct cl_target_option cl_opts;
2190
2191 /* Modes we want tieable information on. */
2192 static const machine_mode print_tieable_modes[] = {
2193 QImode,
2194 HImode,
2195 SImode,
2196 DImode,
2197 TImode,
2198 PTImode,
2199 SFmode,
2200 DFmode,
2201 TFmode,
2202 IFmode,
2203 KFmode,
2204 SDmode,
2205 DDmode,
2206 TDmode,
2207 V16QImode,
2208 V8HImode,
2209 V4SImode,
2210 V2DImode,
2211 V1TImode,
2212 V32QImode,
2213 V16HImode,
2214 V8SImode,
2215 V4DImode,
2216 V2TImode,
2217 V4SFmode,
2218 V2DFmode,
2219 V8SFmode,
2220 V4DFmode,
2221 CCmode,
2222 CCUNSmode,
2223 CCEQmode,
2224 };
2225
2226 /* Virtual regs we are interested in. */
2227 const static struct {
2228 int regno; /* register number. */
2229 const char *name; /* register name. */
2230 } virtual_regs[] = {
2231 { STACK_POINTER_REGNUM, "stack pointer:" },
2232 { TOC_REGNUM, "toc: " },
2233 { STATIC_CHAIN_REGNUM, "static chain: " },
2234 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2235 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2236 { ARG_POINTER_REGNUM, "arg pointer: " },
2237 { FRAME_POINTER_REGNUM, "frame pointer:" },
2238 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2239 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2240 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2241 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2242 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2243 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2244 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2245 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2246 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2247 };
2248
2249 fputs ("\nHard register information:\n", stderr);
2250 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2251 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2252 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2253 LAST_ALTIVEC_REGNO,
2254 "vs");
2255 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2256 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2257 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2258 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2259 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2260 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2261
2262 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2263 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2264 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2265
2266 fprintf (stderr,
2267 "\n"
2268 "d reg_class = %s\n"
2269 "f reg_class = %s\n"
2270 "v reg_class = %s\n"
2271 "wa reg_class = %s\n"
2272 "we reg_class = %s\n"
2273 "wr reg_class = %s\n"
2274 "wx reg_class = %s\n"
2275 "wA reg_class = %s\n"
2276 "\n",
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2279 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2280 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2281 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2282 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2283 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2284 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2285
2286 nl = "\n";
2287 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2288 rs6000_debug_print_mode (m);
2289
2290 fputs ("\n", stderr);
2291
2292 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2293 {
2294 machine_mode mode1 = print_tieable_modes[m1];
2295 bool first_time = true;
2296
2297 nl = (const char *)0;
2298 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2299 {
2300 machine_mode mode2 = print_tieable_modes[m2];
2301 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2302 {
2303 if (first_time)
2304 {
2305 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2306 nl = "\n";
2307 first_time = false;
2308 }
2309
2310 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2311 }
2312 }
2313
2314 if (!first_time)
2315 fputs ("\n", stderr);
2316 }
2317
2318 if (nl)
2319 fputs (nl, stderr);
2320
2321 if (rs6000_recip_control)
2322 {
2323 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2324
2325 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2326 if (rs6000_recip_bits[m])
2327 {
2328 fprintf (stderr,
2329 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2330 GET_MODE_NAME (m),
2331 (RS6000_RECIP_AUTO_RE_P (m)
2332 ? "auto"
2333 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2334 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2335 ? "auto"
2336 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2337 }
2338
2339 fputs ("\n", stderr);
2340 }
2341
2342 if (rs6000_cpu_index >= 0)
2343 {
2344 const char *name = processor_target_table[rs6000_cpu_index].name;
2345 HOST_WIDE_INT flags
2346 = processor_target_table[rs6000_cpu_index].target_enable;
2347
2348 sprintf (flags_buffer, "-mcpu=%s flags", name);
2349 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2350 }
2351 else
2352 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2353
2354 if (rs6000_tune_index >= 0)
2355 {
2356 const char *name = processor_target_table[rs6000_tune_index].name;
2357 HOST_WIDE_INT flags
2358 = processor_target_table[rs6000_tune_index].target_enable;
2359
2360 sprintf (flags_buffer, "-mtune=%s flags", name);
2361 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2362 }
2363 else
2364 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2365
2366 cl_target_option_save (&cl_opts, &global_options);
2367 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2368 rs6000_isa_flags);
2369
2370 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2371 rs6000_isa_flags_explicit);
2372
2373 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2374 rs6000_builtin_mask);
2375
2376 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2377
2378 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2379 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2380
2381 switch (rs6000_sched_costly_dep)
2382 {
2383 case max_dep_latency:
2384 costly_str = "max_dep_latency";
2385 break;
2386
2387 case no_dep_costly:
2388 costly_str = "no_dep_costly";
2389 break;
2390
2391 case all_deps_costly:
2392 costly_str = "all_deps_costly";
2393 break;
2394
2395 case true_store_to_load_dep_costly:
2396 costly_str = "true_store_to_load_dep_costly";
2397 break;
2398
2399 case store_to_load_dep_costly:
2400 costly_str = "store_to_load_dep_costly";
2401 break;
2402
2403 default:
2404 costly_str = costly_num;
2405 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2406 break;
2407 }
2408
2409 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2410
2411 switch (rs6000_sched_insert_nops)
2412 {
2413 case sched_finish_regroup_exact:
2414 nop_str = "sched_finish_regroup_exact";
2415 break;
2416
2417 case sched_finish_pad_groups:
2418 nop_str = "sched_finish_pad_groups";
2419 break;
2420
2421 case sched_finish_none:
2422 nop_str = "sched_finish_none";
2423 break;
2424
2425 default:
2426 nop_str = nop_num;
2427 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2428 break;
2429 }
2430
2431 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2432
2433 switch (rs6000_sdata)
2434 {
2435 default:
2436 case SDATA_NONE:
2437 break;
2438
2439 case SDATA_DATA:
2440 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2441 break;
2442
2443 case SDATA_SYSV:
2444 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2445 break;
2446
2447 case SDATA_EABI:
2448 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2449 break;
2450
2451 }
2452
2453 switch (rs6000_traceback)
2454 {
2455 case traceback_default: trace_str = "default"; break;
2456 case traceback_none: trace_str = "none"; break;
2457 case traceback_part: trace_str = "part"; break;
2458 case traceback_full: trace_str = "full"; break;
2459 default: trace_str = "unknown"; break;
2460 }
2461
2462 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2463
2464 switch (rs6000_current_cmodel)
2465 {
2466 case CMODEL_SMALL: cmodel_str = "small"; break;
2467 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2468 case CMODEL_LARGE: cmodel_str = "large"; break;
2469 default: cmodel_str = "unknown"; break;
2470 }
2471
2472 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2473
2474 switch (rs6000_current_abi)
2475 {
2476 case ABI_NONE: abi_str = "none"; break;
2477 case ABI_AIX: abi_str = "aix"; break;
2478 case ABI_ELFv2: abi_str = "ELFv2"; break;
2479 case ABI_V4: abi_str = "V4"; break;
2480 case ABI_DARWIN: abi_str = "darwin"; break;
2481 default: abi_str = "unknown"; break;
2482 }
2483
2484 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2485
2486 if (rs6000_altivec_abi)
2487 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2488
2489 if (rs6000_darwin64_abi)
2490 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2491
2492 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2493 (TARGET_SOFT_FLOAT ? "true" : "false"));
2494
2495 if (TARGET_LINK_STACK)
2496 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2497
2498 if (TARGET_P8_FUSION)
2499 {
2500 char options[80];
2501
2502 strcpy (options, "power8");
2503 if (TARGET_P8_FUSION_SIGN)
2504 strcat (options, ", sign");
2505
2506 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2507 }
2508
2509 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2510 TARGET_SECURE_PLT ? "secure" : "bss");
2511 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2512 aix_struct_return ? "aix" : "sysv");
2513 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2514 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2515 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2516 tf[!!rs6000_align_branch_targets]);
2517 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2518 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2519 rs6000_long_double_type_size);
2520 if (rs6000_long_double_type_size > 64)
2521 {
2522 fprintf (stderr, DEBUG_FMT_S, "long double type",
2523 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2524 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2525 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2526 }
2527 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2528 (int)rs6000_sched_restricted_insns_priority);
2529 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2530 (int)END_BUILTINS);
2531 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2532 (int)RS6000_BUILTIN_COUNT);
2533
2534 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2535 (int)TARGET_FLOAT128_ENABLE_TYPE);
2536
2537 if (TARGET_VSX)
2538 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2539 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2540
2541 if (TARGET_DIRECT_MOVE_128)
2542 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2543 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2544 }
2545
2546 \f
2547 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2548 legitimate address support to figure out the appropriate addressing to
2549 use. */
2550
2551 static void
2552 rs6000_setup_reg_addr_masks (void)
2553 {
2554 ssize_t rc, reg, m, nregs;
2555 addr_mask_type any_addr_mask, addr_mask;
2556
2557 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2558 {
2559 machine_mode m2 = (machine_mode) m;
2560 bool complex_p = false;
2561 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2562 size_t msize;
2563
2564 if (COMPLEX_MODE_P (m2))
2565 {
2566 complex_p = true;
2567 m2 = GET_MODE_INNER (m2);
2568 }
2569
2570 msize = GET_MODE_SIZE (m2);
2571
2572 /* SDmode is special in that we want to access it only via REG+REG
2573 addressing on power7 and above, since we want to use the LFIWZX and
2574 STFIWZX instructions to load it. */
2575 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2576
2577 any_addr_mask = 0;
2578 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2579 {
2580 addr_mask = 0;
2581 reg = reload_reg_map[rc].reg;
2582
2583 /* Can mode values go in the GPR/FPR/Altivec registers? */
2584 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2585 {
2586 bool small_int_vsx_p = (small_int_p
2587 && (rc == RELOAD_REG_FPR
2588 || rc == RELOAD_REG_VMX));
2589
2590 nregs = rs6000_hard_regno_nregs[m][reg];
2591 addr_mask |= RELOAD_REG_VALID;
2592
2593 /* Indicate if the mode takes more than 1 physical register. If
2594 it takes a single register, indicate it can do REG+REG
2595 addressing. Small integers in VSX registers can only do
2596 REG+REG addressing. */
2597 if (small_int_vsx_p)
2598 addr_mask |= RELOAD_REG_INDEXED;
2599 else if (nregs > 1 || m == BLKmode || complex_p)
2600 addr_mask |= RELOAD_REG_MULTIPLE;
2601 else
2602 addr_mask |= RELOAD_REG_INDEXED;
2603
2604 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2605 addressing. If we allow scalars into Altivec registers,
2606 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2607
2608 For VSX systems, we don't allow update addressing for
2609 DFmode/SFmode if those registers can go in both the
2610 traditional floating point registers and Altivec registers.
2611 The load/store instructions for the Altivec registers do not
2612 have update forms. If we allowed update addressing, it seems
2613 to break IV-OPT code using floating point if the index type is
2614 int instead of long (PR target/81550 and target/84042). */
2615
2616 if (TARGET_UPDATE
2617 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2618 && msize <= 8
2619 && !VECTOR_MODE_P (m2)
2620 && !FLOAT128_VECTOR_P (m2)
2621 && !complex_p
2622 && (m != E_DFmode || !TARGET_VSX)
2623 && (m != E_SFmode || !TARGET_P8_VECTOR)
2624 && !small_int_vsx_p)
2625 {
2626 addr_mask |= RELOAD_REG_PRE_INCDEC;
2627
2628 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2629 we don't allow PRE_MODIFY for some multi-register
2630 operations. */
2631 switch (m)
2632 {
2633 default:
2634 addr_mask |= RELOAD_REG_PRE_MODIFY;
2635 break;
2636
2637 case E_DImode:
2638 if (TARGET_POWERPC64)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2641
2642 case E_DFmode:
2643 case E_DDmode:
2644 if (TARGET_HARD_FLOAT)
2645 addr_mask |= RELOAD_REG_PRE_MODIFY;
2646 break;
2647 }
2648 }
2649 }
2650
2651 /* GPR and FPR registers can do REG+OFFSET addressing, except
2652 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2653 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2654 if ((addr_mask != 0) && !indexed_only_p
2655 && msize <= 8
2656 && (rc == RELOAD_REG_GPR
2657 || ((msize == 8 || m2 == SFmode)
2658 && (rc == RELOAD_REG_FPR
2659 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2660 addr_mask |= RELOAD_REG_OFFSET;
2661
2662 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2663 instructions are enabled. The offset for 128-bit VSX registers is
2664 only 12-bits. While GPRs can handle the full offset range, VSX
2665 registers can only handle the restricted range. */
2666 else if ((addr_mask != 0) && !indexed_only_p
2667 && msize == 16 && TARGET_P9_VECTOR
2668 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2669 || (m2 == TImode && TARGET_VSX)))
2670 {
2671 addr_mask |= RELOAD_REG_OFFSET;
2672 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2673 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2674 }
2675
2676 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2677 addressing on 128-bit types. */
2678 if (rc == RELOAD_REG_VMX && msize == 16
2679 && (addr_mask & RELOAD_REG_VALID) != 0)
2680 addr_mask |= RELOAD_REG_AND_M16;
2681
2682 reg_addr[m].addr_mask[rc] = addr_mask;
2683 any_addr_mask |= addr_mask;
2684 }
2685
2686 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2687 }
2688 }
2689
2690 \f
2691 /* Initialize the various global tables that are based on register size. */
2692 static void
2693 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2694 {
2695 ssize_t r, m, c;
2696 int align64;
2697 int align32;
2698
2699 /* Precalculate REGNO_REG_CLASS. */
2700 rs6000_regno_regclass[0] = GENERAL_REGS;
2701 for (r = 1; r < 32; ++r)
2702 rs6000_regno_regclass[r] = BASE_REGS;
2703
2704 for (r = 32; r < 64; ++r)
2705 rs6000_regno_regclass[r] = FLOAT_REGS;
2706
2707 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2708 rs6000_regno_regclass[r] = NO_REGS;
2709
2710 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2711 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2712
2713 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2714 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2715 rs6000_regno_regclass[r] = CR_REGS;
2716
2717 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2718 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2719 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2720 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2721 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2722 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2723 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2724
2725 /* Precalculate register class to simpler reload register class. We don't
2726 need all of the register classes that are combinations of different
2727 classes, just the simple ones that have constraint letters. */
2728 for (c = 0; c < N_REG_CLASSES; c++)
2729 reg_class_to_reg_type[c] = NO_REG_TYPE;
2730
2731 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2734 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2735 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2736 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2737 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2738 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2739 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2740 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2741
2742 if (TARGET_VSX)
2743 {
2744 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2745 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2746 }
2747 else
2748 {
2749 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2750 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2751 }
2752
2753 /* Precalculate the valid memory formats as well as the vector information,
2754 this must be set up before the rs6000_hard_regno_nregs_internal calls
2755 below. */
2756 gcc_assert ((int)VECTOR_NONE == 0);
2757 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2758 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2759
2760 gcc_assert ((int)CODE_FOR_nothing == 0);
2761 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2762
2763 gcc_assert ((int)NO_REGS == 0);
2764 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2765
2766 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2767 believes it can use native alignment or still uses 128-bit alignment. */
2768 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2769 {
2770 align64 = 64;
2771 align32 = 32;
2772 }
2773 else
2774 {
2775 align64 = 128;
2776 align32 = 128;
2777 }
2778
2779 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2780 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2781 if (TARGET_FLOAT128_TYPE)
2782 {
2783 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2784 rs6000_vector_align[KFmode] = 128;
2785
2786 if (FLOAT128_IEEE_P (TFmode))
2787 {
2788 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2789 rs6000_vector_align[TFmode] = 128;
2790 }
2791 }
2792
2793 /* V2DF mode, VSX only. */
2794 if (TARGET_VSX)
2795 {
2796 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2797 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2798 rs6000_vector_align[V2DFmode] = align64;
2799 }
2800
2801 /* V4SF mode, either VSX or Altivec. */
2802 if (TARGET_VSX)
2803 {
2804 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2806 rs6000_vector_align[V4SFmode] = align32;
2807 }
2808 else if (TARGET_ALTIVEC)
2809 {
2810 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2811 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2812 rs6000_vector_align[V4SFmode] = align32;
2813 }
2814
2815 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2816 and stores. */
2817 if (TARGET_ALTIVEC)
2818 {
2819 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2820 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2821 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2822 rs6000_vector_align[V4SImode] = align32;
2823 rs6000_vector_align[V8HImode] = align32;
2824 rs6000_vector_align[V16QImode] = align32;
2825
2826 if (TARGET_VSX)
2827 {
2828 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2829 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2830 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2831 }
2832 else
2833 {
2834 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2835 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2836 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2837 }
2838 }
2839
2840 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2841 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2842 if (TARGET_VSX)
2843 {
2844 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2845 rs6000_vector_unit[V2DImode]
2846 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2847 rs6000_vector_align[V2DImode] = align64;
2848
2849 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2850 rs6000_vector_unit[V1TImode]
2851 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2852 rs6000_vector_align[V1TImode] = 128;
2853 }
2854
2855 /* DFmode, see if we want to use the VSX unit. Memory is handled
2856 differently, so don't set rs6000_vector_mem. */
2857 if (TARGET_VSX)
2858 {
2859 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2860 rs6000_vector_align[DFmode] = 64;
2861 }
2862
2863 /* SFmode, see if we want to use the VSX unit. */
2864 if (TARGET_P8_VECTOR)
2865 {
2866 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2867 rs6000_vector_align[SFmode] = 32;
2868 }
2869
2870 /* Allow TImode in VSX register and set the VSX memory macros. */
2871 if (TARGET_VSX)
2872 {
2873 rs6000_vector_mem[TImode] = VECTOR_VSX;
2874 rs6000_vector_align[TImode] = align64;
2875 }
2876
2877 /* Register class constraints for the constraints that depend on compile
2878 switches. When the VSX code was added, different constraints were added
2879 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2880 of the VSX registers are used. The register classes for scalar floating
2881 point types is set, based on whether we allow that type into the upper
2882 (Altivec) registers. GCC has register classes to target the Altivec
2883 registers for load/store operations, to select using a VSX memory
2884 operation instead of the traditional floating point operation. The
2885 constraints are:
2886
2887 d - Register class to use with traditional DFmode instructions.
2888 f - Register class to use with traditional SFmode instructions.
2889 v - Altivec register.
2890 wa - Any VSX register.
2891 wc - Reserved to represent individual CR bits (used in LLVM).
2892 wn - always NO_REGS.
2893 wr - GPR if 64-bit mode is permitted.
2894 wx - Float register if we can do 32-bit int stores. */
2895
2896 if (TARGET_HARD_FLOAT)
2897 {
2898 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2899 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2900 }
2901
2902 if (TARGET_VSX)
2903 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2904
2905 /* Add conditional constraints based on various options, to allow us to
2906 collapse multiple insn patterns. */
2907 if (TARGET_ALTIVEC)
2908 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2909
2910 if (TARGET_POWERPC64)
2911 {
2912 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2913 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2914 }
2915
2916 if (TARGET_STFIWX)
2917 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2918
2919 /* Support for new direct moves (ISA 3.0 + 64bit). */
2920 if (TARGET_DIRECT_MOVE_128)
2921 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2922
2923 /* Set up the reload helper and direct move functions. */
2924 if (TARGET_VSX || TARGET_ALTIVEC)
2925 {
2926 if (TARGET_64BIT)
2927 {
2928 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2929 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2930 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2931 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2932 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2933 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2934 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2935 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2936 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2937 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2938 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2939 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2940 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2941 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2942 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2943 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2944 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2945 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2946 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2947 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2948
2949 if (FLOAT128_VECTOR_P (KFmode))
2950 {
2951 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2952 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2953 }
2954
2955 if (FLOAT128_VECTOR_P (TFmode))
2956 {
2957 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2958 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2959 }
2960
2961 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2962 available. */
2963 if (TARGET_NO_SDMODE_STACK)
2964 {
2965 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2966 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2967 }
2968
2969 if (TARGET_VSX)
2970 {
2971 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2972 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2973 }
2974
2975 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2976 {
2977 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2978 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2979 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2980 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2981 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2982 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2983 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2984 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2985 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2986
2987 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2988 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2989 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2990 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2991 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2992 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2993 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2994 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2995 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2996
2997 if (FLOAT128_VECTOR_P (KFmode))
2998 {
2999 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3000 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3001 }
3002
3003 if (FLOAT128_VECTOR_P (TFmode))
3004 {
3005 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3006 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3007 }
3008 }
3009 }
3010 else
3011 {
3012 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3013 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3014 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3015 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3016 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3017 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3018 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3019 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3020 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3021 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3022 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3023 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3024 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3025 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3026 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3027 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3028 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3029 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3030 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3031 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3032
3033 if (FLOAT128_VECTOR_P (KFmode))
3034 {
3035 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3036 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3037 }
3038
3039 if (FLOAT128_IEEE_P (TFmode))
3040 {
3041 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3042 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3043 }
3044
3045 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3046 available. */
3047 if (TARGET_NO_SDMODE_STACK)
3048 {
3049 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3050 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3051 }
3052
3053 if (TARGET_VSX)
3054 {
3055 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3056 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3057 }
3058
3059 if (TARGET_DIRECT_MOVE)
3060 {
3061 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3062 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3063 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3064 }
3065 }
3066
3067 reg_addr[DFmode].scalar_in_vmx_p = true;
3068 reg_addr[DImode].scalar_in_vmx_p = true;
3069
3070 if (TARGET_P8_VECTOR)
3071 {
3072 reg_addr[SFmode].scalar_in_vmx_p = true;
3073 reg_addr[SImode].scalar_in_vmx_p = true;
3074
3075 if (TARGET_P9_VECTOR)
3076 {
3077 reg_addr[HImode].scalar_in_vmx_p = true;
3078 reg_addr[QImode].scalar_in_vmx_p = true;
3079 }
3080 }
3081 }
3082
3083 /* Precalculate HARD_REGNO_NREGS. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_nregs[m][r]
3087 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3088
3089 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3090 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3091 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3092 rs6000_hard_regno_mode_ok_p[m][r]
3093 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3094
3095 /* Precalculate CLASS_MAX_NREGS sizes. */
3096 for (c = 0; c < LIM_REG_CLASSES; ++c)
3097 {
3098 int reg_size;
3099
3100 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3101 reg_size = UNITS_PER_VSX_WORD;
3102
3103 else if (c == ALTIVEC_REGS)
3104 reg_size = UNITS_PER_ALTIVEC_WORD;
3105
3106 else if (c == FLOAT_REGS)
3107 reg_size = UNITS_PER_FP_WORD;
3108
3109 else
3110 reg_size = UNITS_PER_WORD;
3111
3112 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3113 {
3114 machine_mode m2 = (machine_mode)m;
3115 int reg_size2 = reg_size;
3116
3117 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3118 in VSX. */
3119 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3120 reg_size2 = UNITS_PER_FP_WORD;
3121
3122 rs6000_class_max_nregs[m][c]
3123 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3124 }
3125 }
3126
3127 /* Calculate which modes to automatically generate code to use a the
3128 reciprocal divide and square root instructions. In the future, possibly
3129 automatically generate the instructions even if the user did not specify
3130 -mrecip. The older machines double precision reciprocal sqrt estimate is
3131 not accurate enough. */
3132 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3133 if (TARGET_FRES)
3134 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3135 if (TARGET_FRE)
3136 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3141
3142 if (TARGET_FRSQRTES)
3143 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3144 if (TARGET_FRSQRTE)
3145 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3146 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3147 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3148 if (VECTOR_UNIT_VSX_P (V2DFmode))
3149 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3150
3151 if (rs6000_recip_control)
3152 {
3153 if (!flag_finite_math_only)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3155 "-ffast-math");
3156 if (flag_trapping_math)
3157 warning (0, "%qs requires %qs or %qs", "-mrecip",
3158 "-fno-trapping-math", "-ffast-math");
3159 if (!flag_reciprocal_math)
3160 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3161 "-ffast-math");
3162 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3163 {
3164 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3165 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3166 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3167
3168 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3169 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3170 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3171
3172 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3173 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3174 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3175
3176 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3177 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3178 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3179
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3181 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3182 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3183
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3185 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3186 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3187
3188 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3189 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3190 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3191
3192 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3193 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3194 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3195 }
3196 }
3197
3198 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3199 legitimate address support to figure out the appropriate addressing to
3200 use. */
3201 rs6000_setup_reg_addr_masks ();
3202
3203 if (global_init_p || TARGET_DEBUG_TARGET)
3204 {
3205 if (TARGET_DEBUG_REG)
3206 rs6000_debug_reg_global ();
3207
3208 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3209 fprintf (stderr,
3210 "SImode variable mult cost = %d\n"
3211 "SImode constant mult cost = %d\n"
3212 "SImode short constant mult cost = %d\n"
3213 "DImode multipliciation cost = %d\n"
3214 "SImode division cost = %d\n"
3215 "DImode division cost = %d\n"
3216 "Simple fp operation cost = %d\n"
3217 "DFmode multiplication cost = %d\n"
3218 "SFmode division cost = %d\n"
3219 "DFmode division cost = %d\n"
3220 "cache line size = %d\n"
3221 "l1 cache size = %d\n"
3222 "l2 cache size = %d\n"
3223 "simultaneous prefetches = %d\n"
3224 "\n",
3225 rs6000_cost->mulsi,
3226 rs6000_cost->mulsi_const,
3227 rs6000_cost->mulsi_const9,
3228 rs6000_cost->muldi,
3229 rs6000_cost->divsi,
3230 rs6000_cost->divdi,
3231 rs6000_cost->fp,
3232 rs6000_cost->dmul,
3233 rs6000_cost->sdiv,
3234 rs6000_cost->ddiv,
3235 rs6000_cost->cache_line_size,
3236 rs6000_cost->l1_cache_size,
3237 rs6000_cost->l2_cache_size,
3238 rs6000_cost->simultaneous_prefetches);
3239 }
3240 }
3241
3242 #if TARGET_MACHO
3243 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3244
3245 static void
3246 darwin_rs6000_override_options (void)
3247 {
3248 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3249 off. */
3250 rs6000_altivec_abi = 1;
3251 TARGET_ALTIVEC_VRSAVE = 1;
3252 rs6000_current_abi = ABI_DARWIN;
3253
3254 if (DEFAULT_ABI == ABI_DARWIN
3255 && TARGET_64BIT)
3256 darwin_one_byte_bool = 1;
3257
3258 if (TARGET_64BIT && ! TARGET_POWERPC64)
3259 {
3260 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3261 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3262 }
3263
3264 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3265 optimisation, and will not work with the most generic case (where the
3266 symbol is undefined external, but there is no symbl stub). */
3267 if (TARGET_64BIT)
3268 rs6000_default_long_calls = 0;
3269
3270 /* ld_classic is (so far) still used for kernel (static) code, and supports
3271 the JBSR longcall / branch islands. */
3272 if (flag_mkernel)
3273 {
3274 rs6000_default_long_calls = 1;
3275
3276 /* Allow a kext author to do -mkernel -mhard-float. */
3277 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3278 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3279 }
3280
3281 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3282 Altivec. */
3283 if (!flag_mkernel && !flag_apple_kext
3284 && TARGET_64BIT
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3286 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3287
3288 /* Unless the user (not the configurer) has explicitly overridden
3289 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3290 G4 unless targeting the kernel. */
3291 if (!flag_mkernel
3292 && !flag_apple_kext
3293 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3294 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3295 && ! global_options_set.x_rs6000_cpu_index)
3296 {
3297 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3298 }
3299 }
3300 #endif
3301
3302 /* If not otherwise specified by a target, make 'long double' equivalent to
3303 'double'. */
3304
3305 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3306 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3307 #endif
3308
3309 /* Return the builtin mask of the various options used that could affect which
3310 builtins were used. In the past we used target_flags, but we've run out of
3311 bits, and some options are no longer in target_flags. */
3312
3313 HOST_WIDE_INT
3314 rs6000_builtin_mask_calculate (void)
3315 {
3316 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3317 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3318 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3319 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3320 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3321 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3322 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3323 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3324 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3325 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3326 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3327 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3328 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3329 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3330 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3331 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3332 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3333 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3334 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3335 | ((TARGET_LONG_DOUBLE_128
3336 && TARGET_HARD_FLOAT
3337 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3338 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3339 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3340 | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
3341 }
3342
3343 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3344 to clobber the XER[CA] bit because clobbering that bit without telling
3345 the compiler worked just fine with versions of GCC before GCC 5, and
3346 breaking a lot of older code in ways that are hard to track down is
3347 not such a great idea. */
3348
3349 static rtx_insn *
3350 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3351 vec<const char *> &/*constraints*/,
3352 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3353 {
3354 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3355 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3356 return NULL;
3357 }
3358
3359 /* Override command line options.
3360
3361 Combine build-specific configuration information with options
3362 specified on the command line to set various state variables which
3363 influence code generation, optimization, and expansion of built-in
3364 functions. Assure that command-line configuration preferences are
3365 compatible with each other and with the build configuration; issue
3366 warnings while adjusting configuration or error messages while
3367 rejecting configuration.
3368
3369 Upon entry to this function:
3370
3371 This function is called once at the beginning of
3372 compilation, and then again at the start and end of compiling
3373 each section of code that has a different configuration, as
3374 indicated, for example, by adding the
3375
3376 __attribute__((__target__("cpu=power9")))
3377
3378 qualifier to a function definition or, for example, by bracketing
3379 code between
3380
3381 #pragma GCC target("altivec")
3382
3383 and
3384
3385 #pragma GCC reset_options
3386
3387 directives. Parameter global_init_p is true for the initial
3388 invocation, which initializes global variables, and false for all
3389 subsequent invocations.
3390
3391
3392 Various global state information is assumed to be valid. This
3393 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3394 default CPU specified at build configure time, TARGET_DEFAULT,
3395 representing the default set of option flags for the default
3396 target, and global_options_set.x_rs6000_isa_flags, representing
3397 which options were requested on the command line.
3398
3399 Upon return from this function:
3400
3401 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3402 was set by name on the command line. Additionally, if certain
3403 attributes are automatically enabled or disabled by this function
3404 in order to assure compatibility between options and
3405 configuration, the flags associated with those attributes are
3406 also set. By setting these "explicit bits", we avoid the risk
3407 that other code might accidentally overwrite these particular
3408 attributes with "default values".
3409
3410 The various bits of rs6000_isa_flags are set to indicate the
3411 target options that have been selected for the most current
3412 compilation efforts. This has the effect of also turning on the
3413 associated TARGET_XXX values since these are macros which are
3414 generally defined to test the corresponding bit of the
3415 rs6000_isa_flags variable.
3416
3417 The variable rs6000_builtin_mask is set to represent the target
3418 options for the most current compilation efforts, consistent with
3419 the current contents of rs6000_isa_flags. This variable controls
3420 expansion of built-in functions.
3421
3422 Various other global variables and fields of global structures
3423 (over 50 in all) are initialized to reflect the desired options
3424 for the most current compilation efforts. */
3425
3426 static bool
3427 rs6000_option_override_internal (bool global_init_p)
3428 {
3429 bool ret = true;
3430
3431 HOST_WIDE_INT set_masks;
3432 HOST_WIDE_INT ignore_masks;
3433 int cpu_index = -1;
3434 int tune_index;
3435 struct cl_target_option *main_target_opt
3436 = ((global_init_p || target_option_default_node == NULL)
3437 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3438
3439 /* Print defaults. */
3440 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3441 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3442
3443 /* Remember the explicit arguments. */
3444 if (global_init_p)
3445 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3446
3447 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3448 library functions, so warn about it. The flag may be useful for
3449 performance studies from time to time though, so don't disable it
3450 entirely. */
3451 if (global_options_set.x_rs6000_alignment_flags
3452 && rs6000_alignment_flags == MASK_ALIGN_POWER
3453 && DEFAULT_ABI == ABI_DARWIN
3454 && TARGET_64BIT)
3455 warning (0, "%qs is not supported for 64-bit Darwin;"
3456 " it is incompatible with the installed C and C++ libraries",
3457 "-malign-power");
3458
3459 /* Numerous experiment shows that IRA based loop pressure
3460 calculation works better for RTL loop invariant motion on targets
3461 with enough (>= 32) registers. It is an expensive optimization.
3462 So it is on only for peak performance. */
3463 if (optimize >= 3 && global_init_p
3464 && !global_options_set.x_flag_ira_loop_pressure)
3465 flag_ira_loop_pressure = 1;
3466
3467 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3468 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3469 options were already specified. */
3470 if (flag_sanitize & SANITIZE_USER_ADDRESS
3471 && !global_options_set.x_flag_asynchronous_unwind_tables)
3472 flag_asynchronous_unwind_tables = 1;
3473
3474 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3475 loop unroller is active. It is only checked during unrolling, so
3476 we can just set it on by default. */
3477 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3478 flag_variable_expansion_in_unroller = 1;
3479
3480 /* Set the pointer size. */
3481 if (TARGET_64BIT)
3482 {
3483 rs6000_pmode = DImode;
3484 rs6000_pointer_size = 64;
3485 }
3486 else
3487 {
3488 rs6000_pmode = SImode;
3489 rs6000_pointer_size = 32;
3490 }
3491
3492 /* Some OSs don't support saving the high part of 64-bit registers on context
3493 switch. Other OSs don't support saving Altivec registers. On those OSs,
3494 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3495 if the user wants either, the user must explicitly specify them and we
3496 won't interfere with the user's specification. */
3497
3498 set_masks = POWERPC_MASKS;
3499 #ifdef OS_MISSING_POWERPC64
3500 if (OS_MISSING_POWERPC64)
3501 set_masks &= ~OPTION_MASK_POWERPC64;
3502 #endif
3503 #ifdef OS_MISSING_ALTIVEC
3504 if (OS_MISSING_ALTIVEC)
3505 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3506 | OTHER_VSX_VECTOR_MASKS);
3507 #endif
3508
3509 /* Don't override by the processor default if given explicitly. */
3510 set_masks &= ~rs6000_isa_flags_explicit;
3511
3512 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3513 the cpu in a target attribute or pragma, but did not specify a tuning
3514 option, use the cpu for the tuning option rather than the option specified
3515 with -mtune on the command line. Process a '--with-cpu' configuration
3516 request as an implicit --cpu. */
3517 if (rs6000_cpu_index >= 0)
3518 cpu_index = rs6000_cpu_index;
3519 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3520 cpu_index = main_target_opt->x_rs6000_cpu_index;
3521 else if (OPTION_TARGET_CPU_DEFAULT)
3522 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3523
3524 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3525 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3526 with those from the cpu, except for options that were explicitly set. If
3527 we don't have a cpu, do not override the target bits set in
3528 TARGET_DEFAULT. */
3529 if (cpu_index >= 0)
3530 {
3531 rs6000_cpu_index = cpu_index;
3532 rs6000_isa_flags &= ~set_masks;
3533 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3534 & set_masks);
3535 }
3536 else
3537 {
3538 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3539 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3540 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3541 to using rs6000_isa_flags, we need to do the initialization here.
3542
3543 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3544 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3545 HOST_WIDE_INT flags;
3546 if (TARGET_DEFAULT)
3547 flags = TARGET_DEFAULT;
3548 else
3549 {
3550 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3551 const char *default_cpu = (!TARGET_POWERPC64
3552 ? "powerpc"
3553 : (BYTES_BIG_ENDIAN
3554 ? "powerpc64"
3555 : "powerpc64le"));
3556 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3557 flags = processor_target_table[default_cpu_index].target_enable;
3558 }
3559 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3560 }
3561
3562 if (rs6000_tune_index >= 0)
3563 tune_index = rs6000_tune_index;
3564 else if (cpu_index >= 0)
3565 rs6000_tune_index = tune_index = cpu_index;
3566 else
3567 {
3568 size_t i;
3569 enum processor_type tune_proc
3570 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3571
3572 tune_index = -1;
3573 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3574 if (processor_target_table[i].processor == tune_proc)
3575 {
3576 tune_index = i;
3577 break;
3578 }
3579 }
3580
3581 if (cpu_index >= 0)
3582 rs6000_cpu = processor_target_table[cpu_index].processor;
3583 else
3584 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3585
3586 gcc_assert (tune_index >= 0);
3587 rs6000_tune = processor_target_table[tune_index].processor;
3588
3589 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3590 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3591 || rs6000_cpu == PROCESSOR_PPCE5500)
3592 {
3593 if (TARGET_ALTIVEC)
3594 error ("AltiVec not supported in this target");
3595 }
3596
3597 /* If we are optimizing big endian systems for space, use the load/store
3598 multiple instructions. */
3599 if (BYTES_BIG_ENDIAN && optimize_size)
3600 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3601
3602 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3603 because the hardware doesn't support the instructions used in little
3604 endian mode, and causes an alignment trap. The 750 does not cause an
3605 alignment trap (except when the target is unaligned). */
3606
3607 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3608 {
3609 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3610 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3611 warning (0, "%qs is not supported on little endian systems",
3612 "-mmultiple");
3613 }
3614
3615 /* If little-endian, default to -mstrict-align on older processors.
3616 Testing for htm matches power8 and later. */
3617 if (!BYTES_BIG_ENDIAN
3618 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3619 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3620
3621 if (!rs6000_fold_gimple)
3622 fprintf (stderr,
3623 "gimple folding of rs6000 builtins has been disabled.\n");
3624
3625 /* Add some warnings for VSX. */
3626 if (TARGET_VSX)
3627 {
3628 const char *msg = NULL;
3629 if (!TARGET_HARD_FLOAT)
3630 {
3631 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3632 msg = N_("%<-mvsx%> requires hardware floating point");
3633 else
3634 {
3635 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3636 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3637 }
3638 }
3639 else if (TARGET_AVOID_XFORM > 0)
3640 msg = N_("%<-mvsx%> needs indexed addressing");
3641 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3642 & OPTION_MASK_ALTIVEC))
3643 {
3644 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3645 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3646 else
3647 msg = N_("%<-mno-altivec%> disables vsx");
3648 }
3649
3650 if (msg)
3651 {
3652 warning (0, msg);
3653 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3654 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3655 }
3656 }
3657
3658 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3659 the -mcpu setting to enable options that conflict. */
3660 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3661 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3662 | OPTION_MASK_ALTIVEC
3663 | OPTION_MASK_VSX)) != 0)
3664 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3665 | OPTION_MASK_DIRECT_MOVE)
3666 & ~rs6000_isa_flags_explicit);
3667
3668 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3669 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3670
3671 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3672 off all of the options that depend on those flags. */
3673 ignore_masks = rs6000_disable_incompatible_switches ();
3674
3675 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3676 unless the user explicitly used the -mno-<option> to disable the code. */
3677 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3678 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3679 else if (TARGET_P9_MINMAX)
3680 {
3681 if (cpu_index >= 0)
3682 {
3683 if (cpu_index == PROCESSOR_POWER9)
3684 {
3685 /* legacy behavior: allow -mcpu=power9 with certain
3686 capabilities explicitly disabled. */
3687 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3688 }
3689 else
3690 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3691 "for <xxx> less than power9", "-mcpu");
3692 }
3693 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3694 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3695 & rs6000_isa_flags_explicit))
3696 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3697 were explicitly cleared. */
3698 error ("%qs incompatible with explicitly disabled options",
3699 "-mpower9-minmax");
3700 else
3701 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3702 }
3703 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3704 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3705 else if (TARGET_VSX)
3706 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3707 else if (TARGET_POPCNTD)
3708 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3709 else if (TARGET_DFP)
3710 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3711 else if (TARGET_CMPB)
3712 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3713 else if (TARGET_FPRND)
3714 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3715 else if (TARGET_POPCNTB)
3716 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3717 else if (TARGET_ALTIVEC)
3718 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3719
3720 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3721 {
3722 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3723 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3724 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3725 }
3726
3727 if (!TARGET_FPRND && TARGET_VSX)
3728 {
3729 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3730 /* TARGET_VSX = 1 implies Power 7 and newer */
3731 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3732 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3733 }
3734
3735 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3736 {
3737 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3738 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3739 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3740 }
3741
3742 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3743 {
3744 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3745 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3746 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3747 }
3748
3749 if (TARGET_P8_VECTOR && !TARGET_VSX)
3750 {
3751 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3752 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3753 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3754 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3755 {
3756 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3757 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3758 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3759 }
3760 else
3761 {
3762 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3763 not explicit. */
3764 rs6000_isa_flags |= OPTION_MASK_VSX;
3765 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3766 }
3767 }
3768
3769 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3770 {
3771 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3772 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3773 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3774 }
3775
3776 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3777 silently turn off quad memory mode. */
3778 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3779 {
3780 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3781 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3782
3783 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3784 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3785
3786 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3787 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3788 }
3789
3790 /* Non-atomic quad memory load/store are disabled for little endian, since
3791 the words are reversed, but atomic operations can still be done by
3792 swapping the words. */
3793 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3794 {
3795 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3796 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3797 "mode"));
3798
3799 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3800 }
3801
3802 /* Assume if the user asked for normal quad memory instructions, they want
3803 the atomic versions as well, unless they explicity told us not to use quad
3804 word atomic instructions. */
3805 if (TARGET_QUAD_MEMORY
3806 && !TARGET_QUAD_MEMORY_ATOMIC
3807 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3808 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3809
3810 /* If we can shrink-wrap the TOC register save separately, then use
3811 -msave-toc-indirect unless explicitly disabled. */
3812 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3813 && flag_shrink_wrap_separate
3814 && optimize_function_for_speed_p (cfun))
3815 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3816
3817 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3818 generating power8 instructions. Power9 does not optimize power8 fusion
3819 cases. */
3820 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3821 {
3822 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3823 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3824 else
3825 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3826 }
3827
3828 /* Setting additional fusion flags turns on base fusion. */
3829 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3830 {
3831 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3832 {
3833 if (TARGET_P8_FUSION_SIGN)
3834 error ("%qs requires %qs", "-mpower8-fusion-sign",
3835 "-mpower8-fusion");
3836
3837 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3838 }
3839 else
3840 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3841 }
3842
3843 /* Power8 does not fuse sign extended loads with the addis. If we are
3844 optimizing at high levels for speed, convert a sign extended load into a
3845 zero extending load, and an explicit sign extension. */
3846 if (TARGET_P8_FUSION
3847 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3848 && optimize_function_for_speed_p (cfun)
3849 && optimize >= 3)
3850 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3851
3852 /* ISA 3.0 vector instructions include ISA 2.07. */
3853 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3854 {
3855 /* We prefer to not mention undocumented options in
3856 error messages. However, if users have managed to select
3857 power9-vector without selecting power8-vector, they
3858 already know about undocumented flags. */
3859 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3860 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3861 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3862 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3863 {
3864 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3865 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3866 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3867 }
3868 else
3869 {
3870 /* OPTION_MASK_P9_VECTOR is explicit and
3871 OPTION_MASK_P8_VECTOR is not explicit. */
3872 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3873 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3874 }
3875 }
3876
3877 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3878 support. If we only have ISA 2.06 support, and the user did not specify
3879 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3880 but we don't enable the full vectorization support */
3881 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3882 TARGET_ALLOW_MOVMISALIGN = 1;
3883
3884 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3885 {
3886 if (TARGET_ALLOW_MOVMISALIGN > 0
3887 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3888 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3889
3890 TARGET_ALLOW_MOVMISALIGN = 0;
3891 }
3892
3893 /* Determine when unaligned vector accesses are permitted, and when
3894 they are preferred over masked Altivec loads. Note that if
3895 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3896 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3897 not true. */
3898 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3899 {
3900 if (!TARGET_VSX)
3901 {
3902 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3903 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3904
3905 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3906 }
3907
3908 else if (!TARGET_ALLOW_MOVMISALIGN)
3909 {
3910 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3911 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3912 "-mallow-movmisalign");
3913
3914 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3915 }
3916 }
3917
3918 /* Use long double size to select the appropriate long double. We use
3919 TYPE_PRECISION to differentiate the 3 different long double types. We map
3920 128 into the precision used for TFmode. */
3921 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3922 ? 64
3923 : FLOAT_PRECISION_TFmode);
3924
3925 /* Set long double size before the IEEE 128-bit tests. */
3926 if (!global_options_set.x_rs6000_long_double_type_size)
3927 {
3928 if (main_target_opt != NULL
3929 && (main_target_opt->x_rs6000_long_double_type_size
3930 != default_long_double_size))
3931 error ("target attribute or pragma changes %<long double%> size");
3932 else
3933 rs6000_long_double_type_size = default_long_double_size;
3934 }
3935 else if (rs6000_long_double_type_size == 128)
3936 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3937 else if (global_options_set.x_rs6000_ieeequad)
3938 {
3939 if (global_options.x_rs6000_ieeequad)
3940 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3941 else
3942 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3943 }
3944
3945 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3946 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3947 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3948 those systems will not pick up this default. Warn if the user changes the
3949 default unless -Wno-psabi. */
3950 if (!global_options_set.x_rs6000_ieeequad)
3951 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3952
3953 else
3954 {
3955 if (global_options.x_rs6000_ieeequad
3956 && (!TARGET_POPCNTD || !TARGET_VSX))
3957 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3958
3959 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3960 {
3961 static bool warned_change_long_double;
3962 if (!warned_change_long_double)
3963 {
3964 warned_change_long_double = true;
3965 if (TARGET_IEEEQUAD)
3966 warning (OPT_Wpsabi, "Using IEEE extended precision "
3967 "%<long double%>");
3968 else
3969 warning (OPT_Wpsabi, "Using IBM extended precision "
3970 "%<long double%>");
3971 }
3972 }
3973 }
3974
3975 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3976 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
3977 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3978 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3979 the keyword as well as the type. */
3980 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3981
3982 /* IEEE 128-bit floating point requires VSX support. */
3983 if (TARGET_FLOAT128_KEYWORD)
3984 {
3985 if (!TARGET_VSX)
3986 {
3987 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3988 error ("%qs requires VSX support", "-mfloat128");
3989
3990 TARGET_FLOAT128_TYPE = 0;
3991 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3992 | OPTION_MASK_FLOAT128_HW);
3993 }
3994 else if (!TARGET_FLOAT128_TYPE)
3995 {
3996 TARGET_FLOAT128_TYPE = 1;
3997 warning (0, "The %<-mfloat128%> option may not be fully supported");
3998 }
3999 }
4000
4001 /* Enable the __float128 keyword under Linux by default. */
4002 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4003 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4004 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4005
4006 /* If we have are supporting the float128 type and full ISA 3.0 support,
4007 enable -mfloat128-hardware by default. However, don't enable the
4008 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4009 because sometimes the compiler wants to put things in an integer
4010 container, and if we don't have __int128 support, it is impossible. */
4011 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4012 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4013 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4014 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4015
4016 if (TARGET_FLOAT128_HW
4017 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4018 {
4019 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4020 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4021
4022 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4023 }
4024
4025 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4026 {
4027 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4028 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4029
4030 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4031 }
4032
4033 /* Enable -mprefixed by default on 'future' systems. */
4034 if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4035 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4036
4037 /* -mprefixed requires -mcpu=future. */
4038 else if (TARGET_PREFIXED && !TARGET_FUTURE)
4039 {
4040 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4041 error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
4042
4043 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4044 }
4045
4046 /* -mpcrel requires prefixed load/store addressing. */
4047 if (TARGET_PCREL && !TARGET_PREFIXED)
4048 {
4049 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4050 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4051
4052 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4053 }
4054
4055 /* Print the options after updating the defaults. */
4056 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4057 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4058
4059 /* E500mc does "better" if we inline more aggressively. Respect the
4060 user's opinion, though. */
4061 if (rs6000_block_move_inline_limit == 0
4062 && (rs6000_tune == PROCESSOR_PPCE500MC
4063 || rs6000_tune == PROCESSOR_PPCE500MC64
4064 || rs6000_tune == PROCESSOR_PPCE5500
4065 || rs6000_tune == PROCESSOR_PPCE6500))
4066 rs6000_block_move_inline_limit = 128;
4067
4068 /* store_one_arg depends on expand_block_move to handle at least the
4069 size of reg_parm_stack_space. */
4070 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4071 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4072
4073 if (global_init_p)
4074 {
4075 /* If the appropriate debug option is enabled, replace the target hooks
4076 with debug versions that call the real version and then prints
4077 debugging information. */
4078 if (TARGET_DEBUG_COST)
4079 {
4080 targetm.rtx_costs = rs6000_debug_rtx_costs;
4081 targetm.address_cost = rs6000_debug_address_cost;
4082 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4083 }
4084
4085 if (TARGET_DEBUG_ADDR)
4086 {
4087 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4088 targetm.legitimize_address = rs6000_debug_legitimize_address;
4089 rs6000_secondary_reload_class_ptr
4090 = rs6000_debug_secondary_reload_class;
4091 targetm.secondary_memory_needed
4092 = rs6000_debug_secondary_memory_needed;
4093 targetm.can_change_mode_class
4094 = rs6000_debug_can_change_mode_class;
4095 rs6000_preferred_reload_class_ptr
4096 = rs6000_debug_preferred_reload_class;
4097 rs6000_mode_dependent_address_ptr
4098 = rs6000_debug_mode_dependent_address;
4099 }
4100
4101 if (rs6000_veclibabi_name)
4102 {
4103 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4104 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4105 else
4106 {
4107 error ("unknown vectorization library ABI type (%qs) for "
4108 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4109 ret = false;
4110 }
4111 }
4112 }
4113
4114 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4115 target attribute or pragma which automatically enables both options,
4116 unless the altivec ABI was set. This is set by default for 64-bit, but
4117 not for 32-bit. */
4118 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4119 {
4120 TARGET_FLOAT128_TYPE = 0;
4121 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4122 | OPTION_MASK_FLOAT128_KEYWORD)
4123 & ~rs6000_isa_flags_explicit);
4124 }
4125
4126 /* Enable Altivec ABI for AIX -maltivec. */
4127 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4128 {
4129 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4130 error ("target attribute or pragma changes AltiVec ABI");
4131 else
4132 rs6000_altivec_abi = 1;
4133 }
4134
4135 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4136 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4137 be explicitly overridden in either case. */
4138 if (TARGET_ELF)
4139 {
4140 if (!global_options_set.x_rs6000_altivec_abi
4141 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4142 {
4143 if (main_target_opt != NULL &&
4144 !main_target_opt->x_rs6000_altivec_abi)
4145 error ("target attribute or pragma changes AltiVec ABI");
4146 else
4147 rs6000_altivec_abi = 1;
4148 }
4149 }
4150
4151 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4152 So far, the only darwin64 targets are also MACH-O. */
4153 if (TARGET_MACHO
4154 && DEFAULT_ABI == ABI_DARWIN
4155 && TARGET_64BIT)
4156 {
4157 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4158 error ("target attribute or pragma changes darwin64 ABI");
4159 else
4160 {
4161 rs6000_darwin64_abi = 1;
4162 /* Default to natural alignment, for better performance. */
4163 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4164 }
4165 }
4166
4167 /* Place FP constants in the constant pool instead of TOC
4168 if section anchors enabled. */
4169 if (flag_section_anchors
4170 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4171 TARGET_NO_FP_IN_TOC = 1;
4172
4173 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4174 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4175
4176 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4177 SUBTARGET_OVERRIDE_OPTIONS;
4178 #endif
4179 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4180 SUBSUBTARGET_OVERRIDE_OPTIONS;
4181 #endif
4182 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4183 SUB3TARGET_OVERRIDE_OPTIONS;
4184 #endif
4185
4186 /* If the ABI has support for PC-relative relocations, enable it by default.
4187 This test depends on the sub-target tests above setting the code model to
4188 medium for ELF v2 systems. */
4189 if (PCREL_SUPPORTED_BY_OS
4190 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4191 rs6000_isa_flags |= OPTION_MASK_PCREL;
4192
4193 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4194 after the subtarget override options are done. */
4195 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4196 {
4197 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4198 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4199
4200 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4201 }
4202
4203 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4204 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4205
4206 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4207 && rs6000_tune != PROCESSOR_POWER5
4208 && rs6000_tune != PROCESSOR_POWER6
4209 && rs6000_tune != PROCESSOR_POWER7
4210 && rs6000_tune != PROCESSOR_POWER8
4211 && rs6000_tune != PROCESSOR_POWER9
4212 && rs6000_tune != PROCESSOR_FUTURE
4213 && rs6000_tune != PROCESSOR_PPCA2
4214 && rs6000_tune != PROCESSOR_CELL
4215 && rs6000_tune != PROCESSOR_PPC476);
4216 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4217 || rs6000_tune == PROCESSOR_POWER5
4218 || rs6000_tune == PROCESSOR_POWER7
4219 || rs6000_tune == PROCESSOR_POWER8);
4220 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4221 || rs6000_tune == PROCESSOR_POWER5
4222 || rs6000_tune == PROCESSOR_POWER6
4223 || rs6000_tune == PROCESSOR_POWER7
4224 || rs6000_tune == PROCESSOR_POWER8
4225 || rs6000_tune == PROCESSOR_POWER9
4226 || rs6000_tune == PROCESSOR_FUTURE
4227 || rs6000_tune == PROCESSOR_PPCE500MC
4228 || rs6000_tune == PROCESSOR_PPCE500MC64
4229 || rs6000_tune == PROCESSOR_PPCE5500
4230 || rs6000_tune == PROCESSOR_PPCE6500);
4231
4232 /* Allow debug switches to override the above settings. These are set to -1
4233 in rs6000.opt to indicate the user hasn't directly set the switch. */
4234 if (TARGET_ALWAYS_HINT >= 0)
4235 rs6000_always_hint = TARGET_ALWAYS_HINT;
4236
4237 if (TARGET_SCHED_GROUPS >= 0)
4238 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4239
4240 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4241 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4242
4243 rs6000_sched_restricted_insns_priority
4244 = (rs6000_sched_groups ? 1 : 0);
4245
4246 /* Handle -msched-costly-dep option. */
4247 rs6000_sched_costly_dep
4248 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4249
4250 if (rs6000_sched_costly_dep_str)
4251 {
4252 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4253 rs6000_sched_costly_dep = no_dep_costly;
4254 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4255 rs6000_sched_costly_dep = all_deps_costly;
4256 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4257 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4258 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4259 rs6000_sched_costly_dep = store_to_load_dep_costly;
4260 else
4261 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4262 atoi (rs6000_sched_costly_dep_str));
4263 }
4264
4265 /* Handle -minsert-sched-nops option. */
4266 rs6000_sched_insert_nops
4267 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4268
4269 if (rs6000_sched_insert_nops_str)
4270 {
4271 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4272 rs6000_sched_insert_nops = sched_finish_none;
4273 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4274 rs6000_sched_insert_nops = sched_finish_pad_groups;
4275 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4276 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4277 else
4278 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4279 atoi (rs6000_sched_insert_nops_str));
4280 }
4281
4282 /* Handle stack protector */
4283 if (!global_options_set.x_rs6000_stack_protector_guard)
4284 #ifdef TARGET_THREAD_SSP_OFFSET
4285 rs6000_stack_protector_guard = SSP_TLS;
4286 #else
4287 rs6000_stack_protector_guard = SSP_GLOBAL;
4288 #endif
4289
4290 #ifdef TARGET_THREAD_SSP_OFFSET
4291 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4292 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4293 #endif
4294
4295 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4296 {
4297 char *endp;
4298 const char *str = rs6000_stack_protector_guard_offset_str;
4299
4300 errno = 0;
4301 long offset = strtol (str, &endp, 0);
4302 if (!*str || *endp || errno)
4303 error ("%qs is not a valid number in %qs", str,
4304 "-mstack-protector-guard-offset=");
4305
4306 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4307 || (TARGET_64BIT && (offset & 3)))
4308 error ("%qs is not a valid offset in %qs", str,
4309 "-mstack-protector-guard-offset=");
4310
4311 rs6000_stack_protector_guard_offset = offset;
4312 }
4313
4314 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4315 {
4316 const char *str = rs6000_stack_protector_guard_reg_str;
4317 int reg = decode_reg_name (str);
4318
4319 if (!IN_RANGE (reg, 1, 31))
4320 error ("%qs is not a valid base register in %qs", str,
4321 "-mstack-protector-guard-reg=");
4322
4323 rs6000_stack_protector_guard_reg = reg;
4324 }
4325
4326 if (rs6000_stack_protector_guard == SSP_TLS
4327 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4328 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4329
4330 if (global_init_p)
4331 {
4332 #ifdef TARGET_REGNAMES
4333 /* If the user desires alternate register names, copy in the
4334 alternate names now. */
4335 if (TARGET_REGNAMES)
4336 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4337 #endif
4338
4339 /* Set aix_struct_return last, after the ABI is determined.
4340 If -maix-struct-return or -msvr4-struct-return was explicitly
4341 used, don't override with the ABI default. */
4342 if (!global_options_set.x_aix_struct_return)
4343 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4344
4345 #if 0
4346 /* IBM XL compiler defaults to unsigned bitfields. */
4347 if (TARGET_XL_COMPAT)
4348 flag_signed_bitfields = 0;
4349 #endif
4350
4351 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4352 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4353
4354 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4355
4356 /* We can only guarantee the availability of DI pseudo-ops when
4357 assembling for 64-bit targets. */
4358 if (!TARGET_64BIT)
4359 {
4360 targetm.asm_out.aligned_op.di = NULL;
4361 targetm.asm_out.unaligned_op.di = NULL;
4362 }
4363
4364
4365 /* Set branch target alignment, if not optimizing for size. */
4366 if (!optimize_size)
4367 {
4368 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4369 aligned 8byte to avoid misprediction by the branch predictor. */
4370 if (rs6000_tune == PROCESSOR_TITAN
4371 || rs6000_tune == PROCESSOR_CELL)
4372 {
4373 if (flag_align_functions && !str_align_functions)
4374 str_align_functions = "8";
4375 if (flag_align_jumps && !str_align_jumps)
4376 str_align_jumps = "8";
4377 if (flag_align_loops && !str_align_loops)
4378 str_align_loops = "8";
4379 }
4380 if (rs6000_align_branch_targets)
4381 {
4382 if (flag_align_functions && !str_align_functions)
4383 str_align_functions = "16";
4384 if (flag_align_jumps && !str_align_jumps)
4385 str_align_jumps = "16";
4386 if (flag_align_loops && !str_align_loops)
4387 {
4388 can_override_loop_align = 1;
4389 str_align_loops = "16";
4390 }
4391 }
4392 }
4393
4394 /* Arrange to save and restore machine status around nested functions. */
4395 init_machine_status = rs6000_init_machine_status;
4396
4397 /* We should always be splitting complex arguments, but we can't break
4398 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4399 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4400 targetm.calls.split_complex_arg = NULL;
4401
4402 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4403 if (DEFAULT_ABI == ABI_AIX)
4404 targetm.calls.custom_function_descriptors = 0;
4405 }
4406
4407 /* Initialize rs6000_cost with the appropriate target costs. */
4408 if (optimize_size)
4409 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4410 else
4411 switch (rs6000_tune)
4412 {
4413 case PROCESSOR_RS64A:
4414 rs6000_cost = &rs64a_cost;
4415 break;
4416
4417 case PROCESSOR_MPCCORE:
4418 rs6000_cost = &mpccore_cost;
4419 break;
4420
4421 case PROCESSOR_PPC403:
4422 rs6000_cost = &ppc403_cost;
4423 break;
4424
4425 case PROCESSOR_PPC405:
4426 rs6000_cost = &ppc405_cost;
4427 break;
4428
4429 case PROCESSOR_PPC440:
4430 rs6000_cost = &ppc440_cost;
4431 break;
4432
4433 case PROCESSOR_PPC476:
4434 rs6000_cost = &ppc476_cost;
4435 break;
4436
4437 case PROCESSOR_PPC601:
4438 rs6000_cost = &ppc601_cost;
4439 break;
4440
4441 case PROCESSOR_PPC603:
4442 rs6000_cost = &ppc603_cost;
4443 break;
4444
4445 case PROCESSOR_PPC604:
4446 rs6000_cost = &ppc604_cost;
4447 break;
4448
4449 case PROCESSOR_PPC604e:
4450 rs6000_cost = &ppc604e_cost;
4451 break;
4452
4453 case PROCESSOR_PPC620:
4454 rs6000_cost = &ppc620_cost;
4455 break;
4456
4457 case PROCESSOR_PPC630:
4458 rs6000_cost = &ppc630_cost;
4459 break;
4460
4461 case PROCESSOR_CELL:
4462 rs6000_cost = &ppccell_cost;
4463 break;
4464
4465 case PROCESSOR_PPC750:
4466 case PROCESSOR_PPC7400:
4467 rs6000_cost = &ppc750_cost;
4468 break;
4469
4470 case PROCESSOR_PPC7450:
4471 rs6000_cost = &ppc7450_cost;
4472 break;
4473
4474 case PROCESSOR_PPC8540:
4475 case PROCESSOR_PPC8548:
4476 rs6000_cost = &ppc8540_cost;
4477 break;
4478
4479 case PROCESSOR_PPCE300C2:
4480 case PROCESSOR_PPCE300C3:
4481 rs6000_cost = &ppce300c2c3_cost;
4482 break;
4483
4484 case PROCESSOR_PPCE500MC:
4485 rs6000_cost = &ppce500mc_cost;
4486 break;
4487
4488 case PROCESSOR_PPCE500MC64:
4489 rs6000_cost = &ppce500mc64_cost;
4490 break;
4491
4492 case PROCESSOR_PPCE5500:
4493 rs6000_cost = &ppce5500_cost;
4494 break;
4495
4496 case PROCESSOR_PPCE6500:
4497 rs6000_cost = &ppce6500_cost;
4498 break;
4499
4500 case PROCESSOR_TITAN:
4501 rs6000_cost = &titan_cost;
4502 break;
4503
4504 case PROCESSOR_POWER4:
4505 case PROCESSOR_POWER5:
4506 rs6000_cost = &power4_cost;
4507 break;
4508
4509 case PROCESSOR_POWER6:
4510 rs6000_cost = &power6_cost;
4511 break;
4512
4513 case PROCESSOR_POWER7:
4514 rs6000_cost = &power7_cost;
4515 break;
4516
4517 case PROCESSOR_POWER8:
4518 rs6000_cost = &power8_cost;
4519 break;
4520
4521 case PROCESSOR_POWER9:
4522 case PROCESSOR_FUTURE:
4523 rs6000_cost = &power9_cost;
4524 break;
4525
4526 case PROCESSOR_PPCA2:
4527 rs6000_cost = &ppca2_cost;
4528 break;
4529
4530 default:
4531 gcc_unreachable ();
4532 }
4533
4534 if (global_init_p)
4535 {
4536 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4537 param_simultaneous_prefetches,
4538 rs6000_cost->simultaneous_prefetches);
4539 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4540 param_l1_cache_size,
4541 rs6000_cost->l1_cache_size);
4542 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4543 param_l1_cache_line_size,
4544 rs6000_cost->cache_line_size);
4545 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4546 param_l2_cache_size,
4547 rs6000_cost->l2_cache_size);
4548
4549 /* Increase loop peeling limits based on performance analysis. */
4550 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4551 param_max_peeled_insns, 400);
4552 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4553 param_max_completely_peeled_insns, 400);
4554
4555 /* Use the 'model' -fsched-pressure algorithm by default. */
4556 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4557 param_sched_pressure_algorithm,
4558 SCHED_PRESSURE_MODEL);
4559
4560 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4561 turns -frename-registers on. */
4562 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4563 || (global_options_set.x_flag_unroll_all_loops
4564 && flag_unroll_all_loops))
4565 {
4566 if (!global_options_set.x_unroll_only_small_loops)
4567 unroll_only_small_loops = 0;
4568 if (!global_options_set.x_flag_rename_registers)
4569 flag_rename_registers = 1;
4570 if (!global_options_set.x_flag_cunroll_grow_size)
4571 flag_cunroll_grow_size = 1;
4572 }
4573 else
4574 if (!global_options_set.x_flag_cunroll_grow_size)
4575 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
4576
4577 /* If using typedef char *va_list, signal that
4578 __builtin_va_start (&ap, 0) can be optimized to
4579 ap = __builtin_next_arg (0). */
4580 if (DEFAULT_ABI != ABI_V4)
4581 targetm.expand_builtin_va_start = NULL;
4582 }
4583
4584 /* If not explicitly specified via option, decide whether to generate indexed
4585 load/store instructions. A value of -1 indicates that the
4586 initial value of this variable has not been overwritten. During
4587 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4588 if (TARGET_AVOID_XFORM == -1)
4589 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4590 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4591 need indexed accesses and the type used is the scalar type of the element
4592 being loaded or stored. */
4593 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4594 && !TARGET_ALTIVEC);
4595
4596 /* Set the -mrecip options. */
4597 if (rs6000_recip_name)
4598 {
4599 char *p = ASTRDUP (rs6000_recip_name);
4600 char *q;
4601 unsigned int mask, i;
4602 bool invert;
4603
4604 while ((q = strtok (p, ",")) != NULL)
4605 {
4606 p = NULL;
4607 if (*q == '!')
4608 {
4609 invert = true;
4610 q++;
4611 }
4612 else
4613 invert = false;
4614
4615 if (!strcmp (q, "default"))
4616 mask = ((TARGET_RECIP_PRECISION)
4617 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4618 else
4619 {
4620 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4621 if (!strcmp (q, recip_options[i].string))
4622 {
4623 mask = recip_options[i].mask;
4624 break;
4625 }
4626
4627 if (i == ARRAY_SIZE (recip_options))
4628 {
4629 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4630 invert = false;
4631 mask = 0;
4632 ret = false;
4633 }
4634 }
4635
4636 if (invert)
4637 rs6000_recip_control &= ~mask;
4638 else
4639 rs6000_recip_control |= mask;
4640 }
4641 }
4642
4643 /* Set the builtin mask of the various options used that could affect which
4644 builtins were used. In the past we used target_flags, but we've run out
4645 of bits, and some options are no longer in target_flags. */
4646 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4647 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4648 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4649 rs6000_builtin_mask);
4650
4651 /* Initialize all of the registers. */
4652 rs6000_init_hard_regno_mode_ok (global_init_p);
4653
4654 /* Save the initial options in case the user does function specific options */
4655 if (global_init_p)
4656 target_option_default_node = target_option_current_node
4657 = build_target_option_node (&global_options);
4658
4659 /* If not explicitly specified via option, decide whether to generate the
4660 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4661 if (TARGET_LINK_STACK == -1)
4662 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4663
4664 /* Deprecate use of -mno-speculate-indirect-jumps. */
4665 if (!rs6000_speculate_indirect_jumps)
4666 warning (0, "%qs is deprecated and not recommended in any circumstances",
4667 "-mno-speculate-indirect-jumps");
4668
4669 return ret;
4670 }
4671
4672 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4673 define the target cpu type. */
4674
4675 static void
4676 rs6000_option_override (void)
4677 {
4678 (void) rs6000_option_override_internal (true);
4679 }
4680
4681 \f
4682 /* Implement targetm.vectorize.builtin_mask_for_load. */
4683 static tree
4684 rs6000_builtin_mask_for_load (void)
4685 {
4686 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4687 if ((TARGET_ALTIVEC && !TARGET_VSX)
4688 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4689 return altivec_builtin_mask_for_load;
4690 else
4691 return 0;
4692 }
4693
4694 /* Implement LOOP_ALIGN. */
4695 align_flags
4696 rs6000_loop_align (rtx label)
4697 {
4698 basic_block bb;
4699 int ninsns;
4700
4701 /* Don't override loop alignment if -falign-loops was specified. */
4702 if (!can_override_loop_align)
4703 return align_loops;
4704
4705 bb = BLOCK_FOR_INSN (label);
4706 ninsns = num_loop_insns(bb->loop_father);
4707
4708 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4709 if (ninsns > 4 && ninsns <= 8
4710 && (rs6000_tune == PROCESSOR_POWER4
4711 || rs6000_tune == PROCESSOR_POWER5
4712 || rs6000_tune == PROCESSOR_POWER6
4713 || rs6000_tune == PROCESSOR_POWER7
4714 || rs6000_tune == PROCESSOR_POWER8))
4715 return align_flags (5);
4716 else
4717 return align_loops;
4718 }
4719
4720 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4721 after applying N number of iterations. This routine does not determine
4722 how may iterations are required to reach desired alignment. */
4723
4724 static bool
4725 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4726 {
4727 if (is_packed)
4728 return false;
4729
4730 if (TARGET_32BIT)
4731 {
4732 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4733 return true;
4734
4735 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4736 return true;
4737
4738 return false;
4739 }
4740 else
4741 {
4742 if (TARGET_MACHO)
4743 return false;
4744
4745 /* Assuming that all other types are naturally aligned. CHECKME! */
4746 return true;
4747 }
4748 }
4749
4750 /* Return true if the vector misalignment factor is supported by the
4751 target. */
4752 static bool
4753 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4754 const_tree type,
4755 int misalignment,
4756 bool is_packed)
4757 {
4758 if (TARGET_VSX)
4759 {
4760 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4761 return true;
4762
4763 /* Return if movmisalign pattern is not supported for this mode. */
4764 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4765 return false;
4766
4767 if (misalignment == -1)
4768 {
4769 /* Misalignment factor is unknown at compile time but we know
4770 it's word aligned. */
4771 if (rs6000_vector_alignment_reachable (type, is_packed))
4772 {
4773 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4774
4775 if (element_size == 64 || element_size == 32)
4776 return true;
4777 }
4778
4779 return false;
4780 }
4781
4782 /* VSX supports word-aligned vector. */
4783 if (misalignment % 4 == 0)
4784 return true;
4785 }
4786 return false;
4787 }
4788
4789 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4790 static int
4791 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4792 tree vectype, int misalign)
4793 {
4794 unsigned elements;
4795 tree elem_type;
4796
4797 switch (type_of_cost)
4798 {
4799 case scalar_stmt:
4800 case scalar_store:
4801 case vector_stmt:
4802 case vector_store:
4803 case vec_to_scalar:
4804 case scalar_to_vec:
4805 case cond_branch_not_taken:
4806 return 1;
4807 case scalar_load:
4808 case vector_load:
4809 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4810 return 2;
4811
4812 case vec_perm:
4813 /* Power7 has only one permute unit, make it a bit expensive. */
4814 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4815 return 3;
4816 else
4817 return 1;
4818
4819 case vec_promote_demote:
4820 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4821 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4822 return 4;
4823 else
4824 return 1;
4825
4826 case cond_branch_taken:
4827 return 3;
4828
4829 case unaligned_load:
4830 case vector_gather_load:
4831 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4832 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4833 return 2;
4834
4835 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4836 {
4837 elements = TYPE_VECTOR_SUBPARTS (vectype);
4838 if (elements == 2)
4839 /* Double word aligned. */
4840 return 4;
4841
4842 if (elements == 4)
4843 {
4844 switch (misalign)
4845 {
4846 case 8:
4847 /* Double word aligned. */
4848 return 4;
4849
4850 case -1:
4851 /* Unknown misalignment. */
4852 case 4:
4853 case 12:
4854 /* Word aligned. */
4855 return 33;
4856
4857 default:
4858 gcc_unreachable ();
4859 }
4860 }
4861 }
4862
4863 if (TARGET_ALTIVEC)
4864 /* Misaligned loads are not supported. */
4865 gcc_unreachable ();
4866
4867 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4868 return 4;
4869
4870 case unaligned_store:
4871 case vector_scatter_store:
4872 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4873 return 1;
4874
4875 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4876 {
4877 elements = TYPE_VECTOR_SUBPARTS (vectype);
4878 if (elements == 2)
4879 /* Double word aligned. */
4880 return 2;
4881
4882 if (elements == 4)
4883 {
4884 switch (misalign)
4885 {
4886 case 8:
4887 /* Double word aligned. */
4888 return 2;
4889
4890 case -1:
4891 /* Unknown misalignment. */
4892 case 4:
4893 case 12:
4894 /* Word aligned. */
4895 return 23;
4896
4897 default:
4898 gcc_unreachable ();
4899 }
4900 }
4901 }
4902
4903 if (TARGET_ALTIVEC)
4904 /* Misaligned stores are not supported. */
4905 gcc_unreachable ();
4906
4907 return 2;
4908
4909 case vec_construct:
4910 /* This is a rough approximation assuming non-constant elements
4911 constructed into a vector via element insertion. FIXME:
4912 vec_construct is not granular enough for uniformly good
4913 decisions. If the initialization is a splat, this is
4914 cheaper than we estimate. Improve this someday. */
4915 elem_type = TREE_TYPE (vectype);
4916 /* 32-bit vectors loaded into registers are stored as double
4917 precision, so we need 2 permutes, 2 converts, and 1 merge
4918 to construct a vector of short floats from them. */
4919 if (SCALAR_FLOAT_TYPE_P (elem_type)
4920 && TYPE_PRECISION (elem_type) == 32)
4921 return 5;
4922 /* On POWER9, integer vector types are built up in GPRs and then
4923 use a direct move (2 cycles). For POWER8 this is even worse,
4924 as we need two direct moves and a merge, and the direct moves
4925 are five cycles. */
4926 else if (INTEGRAL_TYPE_P (elem_type))
4927 {
4928 if (TARGET_P9_VECTOR)
4929 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4930 else
4931 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4932 }
4933 else
4934 /* V2DFmode doesn't need a direct move. */
4935 return 2;
4936
4937 default:
4938 gcc_unreachable ();
4939 }
4940 }
4941
4942 /* Implement targetm.vectorize.preferred_simd_mode. */
4943
4944 static machine_mode
4945 rs6000_preferred_simd_mode (scalar_mode mode)
4946 {
4947 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4948
4949 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4950 return vmode.require ();
4951
4952 return word_mode;
4953 }
4954
4955 typedef struct _rs6000_cost_data
4956 {
4957 struct loop *loop_info;
4958 unsigned cost[3];
4959 } rs6000_cost_data;
4960
4961 /* Test for likely overcommitment of vector hardware resources. If a
4962 loop iteration is relatively large, and too large a percentage of
4963 instructions in the loop are vectorized, the cost model may not
4964 adequately reflect delays from unavailable vector resources.
4965 Penalize the loop body cost for this case. */
4966
4967 static void
4968 rs6000_density_test (rs6000_cost_data *data)
4969 {
4970 const int DENSITY_PCT_THRESHOLD = 85;
4971 const int DENSITY_SIZE_THRESHOLD = 70;
4972 const int DENSITY_PENALTY = 10;
4973 struct loop *loop = data->loop_info;
4974 basic_block *bbs = get_loop_body (loop);
4975 int nbbs = loop->num_nodes;
4976 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4977 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4978 int i, density_pct;
4979
4980 for (i = 0; i < nbbs; i++)
4981 {
4982 basic_block bb = bbs[i];
4983 gimple_stmt_iterator gsi;
4984
4985 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4986 {
4987 gimple *stmt = gsi_stmt (gsi);
4988 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4989
4990 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4991 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4992 not_vec_cost++;
4993 }
4994 }
4995
4996 free (bbs);
4997 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4998
4999 if (density_pct > DENSITY_PCT_THRESHOLD
5000 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5001 {
5002 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5003 if (dump_enabled_p ())
5004 dump_printf_loc (MSG_NOTE, vect_location,
5005 "density %d%%, cost %d exceeds threshold, penalizing "
5006 "loop body cost by %d%%", density_pct,
5007 vec_cost + not_vec_cost, DENSITY_PENALTY);
5008 }
5009 }
5010
5011 /* Implement targetm.vectorize.init_cost. */
5012
5013 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5014 instruction is needed by the vectorization. */
5015 static bool rs6000_vect_nonmem;
5016
5017 static void *
5018 rs6000_init_cost (struct loop *loop_info)
5019 {
5020 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5021 data->loop_info = loop_info;
5022 data->cost[vect_prologue] = 0;
5023 data->cost[vect_body] = 0;
5024 data->cost[vect_epilogue] = 0;
5025 rs6000_vect_nonmem = false;
5026 return data;
5027 }
5028
5029 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5030 For some statement, we would like to further fine-grain tweak the cost on
5031 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5032 information on statement operation codes etc. One typical case here is
5033 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5034 for scalar cost, but it should be priced more whatever transformed to either
5035 compare + branch or compare + isel instructions. */
5036
5037 static unsigned
5038 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5039 struct _stmt_vec_info *stmt_info)
5040 {
5041 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5042 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5043 {
5044 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5045 if (subcode == COND_EXPR)
5046 return 2;
5047 }
5048
5049 return 0;
5050 }
5051
5052 /* Implement targetm.vectorize.add_stmt_cost. */
5053
5054 static unsigned
5055 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5056 enum vect_cost_for_stmt kind,
5057 struct _stmt_vec_info *stmt_info, tree vectype,
5058 int misalign, enum vect_cost_model_location where)
5059 {
5060 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5061 unsigned retval = 0;
5062
5063 if (flag_vect_cost_model)
5064 {
5065 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5066 misalign);
5067 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5068 /* Statements in an inner loop relative to the loop being
5069 vectorized are weighted more heavily. The value here is
5070 arbitrary and could potentially be improved with analysis. */
5071 if (where == vect_body && stmt_info
5072 && stmt_in_inner_loop_p (vinfo, stmt_info))
5073 count *= 50; /* FIXME. */
5074
5075 retval = (unsigned) (count * stmt_cost);
5076 cost_data->cost[where] += retval;
5077
5078 /* Check whether we're doing something other than just a copy loop.
5079 Not all such loops may be profitably vectorized; see
5080 rs6000_finish_cost. */
5081 if ((kind == vec_to_scalar || kind == vec_perm
5082 || kind == vec_promote_demote || kind == vec_construct
5083 || kind == scalar_to_vec)
5084 || (where == vect_body && kind == vector_stmt))
5085 rs6000_vect_nonmem = true;
5086 }
5087
5088 return retval;
5089 }
5090
5091 /* Implement targetm.vectorize.finish_cost. */
5092
5093 static void
5094 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5095 unsigned *body_cost, unsigned *epilogue_cost)
5096 {
5097 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5098
5099 if (cost_data->loop_info)
5100 rs6000_density_test (cost_data);
5101
5102 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5103 that require versioning for any reason. The vectorization is at
5104 best a wash inside the loop, and the versioning checks make
5105 profitability highly unlikely and potentially quite harmful. */
5106 if (cost_data->loop_info)
5107 {
5108 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5109 if (!rs6000_vect_nonmem
5110 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5111 && LOOP_REQUIRES_VERSIONING (vec_info))
5112 cost_data->cost[vect_body] += 10000;
5113 }
5114
5115 *prologue_cost = cost_data->cost[vect_prologue];
5116 *body_cost = cost_data->cost[vect_body];
5117 *epilogue_cost = cost_data->cost[vect_epilogue];
5118 }
5119
5120 /* Implement targetm.vectorize.destroy_cost_data. */
5121
5122 static void
5123 rs6000_destroy_cost_data (void *data)
5124 {
5125 free (data);
5126 }
5127
5128 /* Implement targetm.loop_unroll_adjust. */
5129
5130 static unsigned
5131 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5132 {
5133 if (unroll_only_small_loops)
5134 {
5135 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5136 example we may want to unroll very small loops more times (4 perhaps).
5137 We also should use a PARAM for this. */
5138 if (loop->ninsns <= 10)
5139 return MIN (2, nunroll);
5140 else
5141 return 0;
5142 }
5143
5144 return nunroll;
5145 }
5146
5147 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5148 library with vectorized intrinsics. */
5149
5150 static tree
5151 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5152 tree type_in)
5153 {
5154 char name[32];
5155 const char *suffix = NULL;
5156 tree fntype, new_fndecl, bdecl = NULL_TREE;
5157 int n_args = 1;
5158 const char *bname;
5159 machine_mode el_mode, in_mode;
5160 int n, in_n;
5161
5162 /* Libmass is suitable for unsafe math only as it does not correctly support
5163 parts of IEEE with the required precision such as denormals. Only support
5164 it if we have VSX to use the simd d2 or f4 functions.
5165 XXX: Add variable length support. */
5166 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5167 return NULL_TREE;
5168
5169 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5170 n = TYPE_VECTOR_SUBPARTS (type_out);
5171 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5172 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5173 if (el_mode != in_mode
5174 || n != in_n)
5175 return NULL_TREE;
5176
5177 switch (fn)
5178 {
5179 CASE_CFN_ATAN2:
5180 CASE_CFN_HYPOT:
5181 CASE_CFN_POW:
5182 n_args = 2;
5183 gcc_fallthrough ();
5184
5185 CASE_CFN_ACOS:
5186 CASE_CFN_ACOSH:
5187 CASE_CFN_ASIN:
5188 CASE_CFN_ASINH:
5189 CASE_CFN_ATAN:
5190 CASE_CFN_ATANH:
5191 CASE_CFN_CBRT:
5192 CASE_CFN_COS:
5193 CASE_CFN_COSH:
5194 CASE_CFN_ERF:
5195 CASE_CFN_ERFC:
5196 CASE_CFN_EXP2:
5197 CASE_CFN_EXP:
5198 CASE_CFN_EXPM1:
5199 CASE_CFN_LGAMMA:
5200 CASE_CFN_LOG10:
5201 CASE_CFN_LOG1P:
5202 CASE_CFN_LOG2:
5203 CASE_CFN_LOG:
5204 CASE_CFN_SIN:
5205 CASE_CFN_SINH:
5206 CASE_CFN_SQRT:
5207 CASE_CFN_TAN:
5208 CASE_CFN_TANH:
5209 if (el_mode == DFmode && n == 2)
5210 {
5211 bdecl = mathfn_built_in (double_type_node, fn);
5212 suffix = "d2"; /* pow -> powd2 */
5213 }
5214 else if (el_mode == SFmode && n == 4)
5215 {
5216 bdecl = mathfn_built_in (float_type_node, fn);
5217 suffix = "4"; /* powf -> powf4 */
5218 }
5219 else
5220 return NULL_TREE;
5221 if (!bdecl)
5222 return NULL_TREE;
5223 break;
5224
5225 default:
5226 return NULL_TREE;
5227 }
5228
5229 gcc_assert (suffix != NULL);
5230 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5231 if (!bname)
5232 return NULL_TREE;
5233
5234 strcpy (name, bname + strlen ("__builtin_"));
5235 strcat (name, suffix);
5236
5237 if (n_args == 1)
5238 fntype = build_function_type_list (type_out, type_in, NULL);
5239 else if (n_args == 2)
5240 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5241 else
5242 gcc_unreachable ();
5243
5244 /* Build a function declaration for the vectorized function. */
5245 new_fndecl = build_decl (BUILTINS_LOCATION,
5246 FUNCTION_DECL, get_identifier (name), fntype);
5247 TREE_PUBLIC (new_fndecl) = 1;
5248 DECL_EXTERNAL (new_fndecl) = 1;
5249 DECL_IS_NOVOPS (new_fndecl) = 1;
5250 TREE_READONLY (new_fndecl) = 1;
5251
5252 return new_fndecl;
5253 }
5254
5255 /* Returns a function decl for a vectorized version of the builtin function
5256 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5257 if it is not available. */
5258
5259 static tree
5260 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5261 tree type_in)
5262 {
5263 machine_mode in_mode, out_mode;
5264 int in_n, out_n;
5265
5266 if (TARGET_DEBUG_BUILTIN)
5267 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5268 combined_fn_name (combined_fn (fn)),
5269 GET_MODE_NAME (TYPE_MODE (type_out)),
5270 GET_MODE_NAME (TYPE_MODE (type_in)));
5271
5272 if (TREE_CODE (type_out) != VECTOR_TYPE
5273 || TREE_CODE (type_in) != VECTOR_TYPE)
5274 return NULL_TREE;
5275
5276 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5277 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5278 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5279 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5280
5281 switch (fn)
5282 {
5283 CASE_CFN_COPYSIGN:
5284 if (VECTOR_UNIT_VSX_P (V2DFmode)
5285 && out_mode == DFmode && out_n == 2
5286 && in_mode == DFmode && in_n == 2)
5287 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5288 if (VECTOR_UNIT_VSX_P (V4SFmode)
5289 && out_mode == SFmode && out_n == 4
5290 && in_mode == SFmode && in_n == 4)
5291 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5292 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5293 && out_mode == SFmode && out_n == 4
5294 && in_mode == SFmode && in_n == 4)
5295 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5296 break;
5297 CASE_CFN_CEIL:
5298 if (VECTOR_UNIT_VSX_P (V2DFmode)
5299 && out_mode == DFmode && out_n == 2
5300 && in_mode == DFmode && in_n == 2)
5301 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5302 if (VECTOR_UNIT_VSX_P (V4SFmode)
5303 && out_mode == SFmode && out_n == 4
5304 && in_mode == SFmode && in_n == 4)
5305 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5306 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5307 && out_mode == SFmode && out_n == 4
5308 && in_mode == SFmode && in_n == 4)
5309 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5310 break;
5311 CASE_CFN_FLOOR:
5312 if (VECTOR_UNIT_VSX_P (V2DFmode)
5313 && out_mode == DFmode && out_n == 2
5314 && in_mode == DFmode && in_n == 2)
5315 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5316 if (VECTOR_UNIT_VSX_P (V4SFmode)
5317 && out_mode == SFmode && out_n == 4
5318 && in_mode == SFmode && in_n == 4)
5319 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5320 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5321 && out_mode == SFmode && out_n == 4
5322 && in_mode == SFmode && in_n == 4)
5323 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5324 break;
5325 CASE_CFN_FMA:
5326 if (VECTOR_UNIT_VSX_P (V2DFmode)
5327 && out_mode == DFmode && out_n == 2
5328 && in_mode == DFmode && in_n == 2)
5329 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5330 if (VECTOR_UNIT_VSX_P (V4SFmode)
5331 && out_mode == SFmode && out_n == 4
5332 && in_mode == SFmode && in_n == 4)
5333 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5334 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5335 && out_mode == SFmode && out_n == 4
5336 && in_mode == SFmode && in_n == 4)
5337 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5338 break;
5339 CASE_CFN_TRUNC:
5340 if (VECTOR_UNIT_VSX_P (V2DFmode)
5341 && out_mode == DFmode && out_n == 2
5342 && in_mode == DFmode && in_n == 2)
5343 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5344 if (VECTOR_UNIT_VSX_P (V4SFmode)
5345 && out_mode == SFmode && out_n == 4
5346 && in_mode == SFmode && in_n == 4)
5347 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5348 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5349 && out_mode == SFmode && out_n == 4
5350 && in_mode == SFmode && in_n == 4)
5351 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5352 break;
5353 CASE_CFN_NEARBYINT:
5354 if (VECTOR_UNIT_VSX_P (V2DFmode)
5355 && flag_unsafe_math_optimizations
5356 && out_mode == DFmode && out_n == 2
5357 && in_mode == DFmode && in_n == 2)
5358 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5359 if (VECTOR_UNIT_VSX_P (V4SFmode)
5360 && flag_unsafe_math_optimizations
5361 && out_mode == SFmode && out_n == 4
5362 && in_mode == SFmode && in_n == 4)
5363 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5364 break;
5365 CASE_CFN_RINT:
5366 if (VECTOR_UNIT_VSX_P (V2DFmode)
5367 && !flag_trapping_math
5368 && out_mode == DFmode && out_n == 2
5369 && in_mode == DFmode && in_n == 2)
5370 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5371 if (VECTOR_UNIT_VSX_P (V4SFmode)
5372 && !flag_trapping_math
5373 && out_mode == SFmode && out_n == 4
5374 && in_mode == SFmode && in_n == 4)
5375 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5376 break;
5377 default:
5378 break;
5379 }
5380
5381 /* Generate calls to libmass if appropriate. */
5382 if (rs6000_veclib_handler)
5383 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5384
5385 return NULL_TREE;
5386 }
5387
5388 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5389
5390 static tree
5391 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5392 tree type_in)
5393 {
5394 machine_mode in_mode, out_mode;
5395 int in_n, out_n;
5396
5397 if (TARGET_DEBUG_BUILTIN)
5398 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5399 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5400 GET_MODE_NAME (TYPE_MODE (type_out)),
5401 GET_MODE_NAME (TYPE_MODE (type_in)));
5402
5403 if (TREE_CODE (type_out) != VECTOR_TYPE
5404 || TREE_CODE (type_in) != VECTOR_TYPE)
5405 return NULL_TREE;
5406
5407 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5408 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5409 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5410 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5411
5412 enum rs6000_builtins fn
5413 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5414 switch (fn)
5415 {
5416 case RS6000_BUILTIN_RSQRTF:
5417 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5418 && out_mode == SFmode && out_n == 4
5419 && in_mode == SFmode && in_n == 4)
5420 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5421 break;
5422 case RS6000_BUILTIN_RSQRT:
5423 if (VECTOR_UNIT_VSX_P (V2DFmode)
5424 && out_mode == DFmode && out_n == 2
5425 && in_mode == DFmode && in_n == 2)
5426 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5427 break;
5428 case RS6000_BUILTIN_RECIPF:
5429 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5430 && out_mode == SFmode && out_n == 4
5431 && in_mode == SFmode && in_n == 4)
5432 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5433 break;
5434 case RS6000_BUILTIN_RECIP:
5435 if (VECTOR_UNIT_VSX_P (V2DFmode)
5436 && out_mode == DFmode && out_n == 2
5437 && in_mode == DFmode && in_n == 2)
5438 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5439 break;
5440 default:
5441 break;
5442 }
5443 return NULL_TREE;
5444 }
5445 \f
5446 /* Default CPU string for rs6000*_file_start functions. */
5447 static const char *rs6000_default_cpu;
5448
5449 #ifdef USING_ELFOS_H
5450 const char *rs6000_machine;
5451
5452 const char *
5453 rs6000_machine_from_flags (void)
5454 {
5455 HOST_WIDE_INT flags = rs6000_isa_flags;
5456
5457 /* Disable the flags that should never influence the .machine selection. */
5458 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5459
5460 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5461 return "future";
5462 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5463 return "power9";
5464 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5465 return "power8";
5466 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5467 return "power7";
5468 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5469 return "power6";
5470 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5471 return "power5";
5472 if ((flags & ISA_2_1_MASKS) != 0)
5473 return "power4";
5474 if ((flags & OPTION_MASK_POWERPC64) != 0)
5475 return "ppc64";
5476 return "ppc";
5477 }
5478
5479 void
5480 emit_asm_machine (void)
5481 {
5482 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5483 }
5484 #endif
5485
5486 /* Do anything needed at the start of the asm file. */
5487
5488 static void
5489 rs6000_file_start (void)
5490 {
5491 char buffer[80];
5492 const char *start = buffer;
5493 FILE *file = asm_out_file;
5494
5495 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5496
5497 default_file_start ();
5498
5499 if (flag_verbose_asm)
5500 {
5501 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5502
5503 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5504 {
5505 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5506 start = "";
5507 }
5508
5509 if (global_options_set.x_rs6000_cpu_index)
5510 {
5511 fprintf (file, "%s -mcpu=%s", start,
5512 processor_target_table[rs6000_cpu_index].name);
5513 start = "";
5514 }
5515
5516 if (global_options_set.x_rs6000_tune_index)
5517 {
5518 fprintf (file, "%s -mtune=%s", start,
5519 processor_target_table[rs6000_tune_index].name);
5520 start = "";
5521 }
5522
5523 if (PPC405_ERRATUM77)
5524 {
5525 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5526 start = "";
5527 }
5528
5529 #ifdef USING_ELFOS_H
5530 switch (rs6000_sdata)
5531 {
5532 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5533 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5534 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5535 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5536 }
5537
5538 if (rs6000_sdata && g_switch_value)
5539 {
5540 fprintf (file, "%s -G %d", start,
5541 g_switch_value);
5542 start = "";
5543 }
5544 #endif
5545
5546 if (*start == '\0')
5547 putc ('\n', file);
5548 }
5549
5550 #ifdef USING_ELFOS_H
5551 rs6000_machine = rs6000_machine_from_flags ();
5552 emit_asm_machine ();
5553 #endif
5554
5555 if (DEFAULT_ABI == ABI_ELFv2)
5556 fprintf (file, "\t.abiversion 2\n");
5557 }
5558
5559 \f
5560 /* Return nonzero if this function is known to have a null epilogue. */
5561
5562 int
5563 direct_return (void)
5564 {
5565 if (reload_completed)
5566 {
5567 rs6000_stack_t *info = rs6000_stack_info ();
5568
5569 if (info->first_gp_reg_save == 32
5570 && info->first_fp_reg_save == 64
5571 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5572 && ! info->lr_save_p
5573 && ! info->cr_save_p
5574 && info->vrsave_size == 0
5575 && ! info->push_p)
5576 return 1;
5577 }
5578
5579 return 0;
5580 }
5581
5582 /* Helper for num_insns_constant. Calculate number of instructions to
5583 load VALUE to a single gpr using combinations of addi, addis, ori,
5584 oris and sldi instructions. */
5585
5586 static int
5587 num_insns_constant_gpr (HOST_WIDE_INT value)
5588 {
5589 /* signed constant loadable with addi */
5590 if (SIGNED_INTEGER_16BIT_P (value))
5591 return 1;
5592
5593 /* constant loadable with addis */
5594 else if ((value & 0xffff) == 0
5595 && (value >> 31 == -1 || value >> 31 == 0))
5596 return 1;
5597
5598 /* PADDI can support up to 34 bit signed integers. */
5599 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5600 return 1;
5601
5602 else if (TARGET_POWERPC64)
5603 {
5604 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5605 HOST_WIDE_INT high = value >> 31;
5606
5607 if (high == 0 || high == -1)
5608 return 2;
5609
5610 high >>= 1;
5611
5612 if (low == 0)
5613 return num_insns_constant_gpr (high) + 1;
5614 else if (high == 0)
5615 return num_insns_constant_gpr (low) + 1;
5616 else
5617 return (num_insns_constant_gpr (high)
5618 + num_insns_constant_gpr (low) + 1);
5619 }
5620
5621 else
5622 return 2;
5623 }
5624
5625 /* Helper for num_insns_constant. Allow constants formed by the
5626 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5627 and handle modes that require multiple gprs. */
5628
5629 static int
5630 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5631 {
5632 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5633 int total = 0;
5634 while (nregs-- > 0)
5635 {
5636 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5637 int insns = num_insns_constant_gpr (low);
5638 if (insns > 2
5639 /* We won't get more than 2 from num_insns_constant_gpr
5640 except when TARGET_POWERPC64 and mode is DImode or
5641 wider, so the register mode must be DImode. */
5642 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5643 insns = 2;
5644 total += insns;
5645 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5646 it all at once would be UB. */
5647 value >>= (BITS_PER_WORD - 1);
5648 value >>= 1;
5649 }
5650 return total;
5651 }
5652
5653 /* Return the number of instructions it takes to form a constant in as
5654 many gprs are needed for MODE. */
5655
5656 int
5657 num_insns_constant (rtx op, machine_mode mode)
5658 {
5659 HOST_WIDE_INT val;
5660
5661 switch (GET_CODE (op))
5662 {
5663 case CONST_INT:
5664 val = INTVAL (op);
5665 break;
5666
5667 case CONST_WIDE_INT:
5668 {
5669 int insns = 0;
5670 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5671 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5672 DImode);
5673 return insns;
5674 }
5675
5676 case CONST_DOUBLE:
5677 {
5678 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5679
5680 if (mode == SFmode || mode == SDmode)
5681 {
5682 long l;
5683
5684 if (mode == SDmode)
5685 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5686 else
5687 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5688 /* See the first define_split in rs6000.md handling a
5689 const_double_operand. */
5690 val = l;
5691 mode = SImode;
5692 }
5693 else if (mode == DFmode || mode == DDmode)
5694 {
5695 long l[2];
5696
5697 if (mode == DDmode)
5698 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5699 else
5700 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5701
5702 /* See the second (32-bit) and third (64-bit) define_split
5703 in rs6000.md handling a const_double_operand. */
5704 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5705 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5706 mode = DImode;
5707 }
5708 else if (mode == TFmode || mode == TDmode
5709 || mode == KFmode || mode == IFmode)
5710 {
5711 long l[4];
5712 int insns;
5713
5714 if (mode == TDmode)
5715 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5716 else
5717 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5718
5719 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5720 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5721 insns = num_insns_constant_multi (val, DImode);
5722 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5723 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5724 insns += num_insns_constant_multi (val, DImode);
5725 return insns;
5726 }
5727 else
5728 gcc_unreachable ();
5729 }
5730 break;
5731
5732 default:
5733 gcc_unreachable ();
5734 }
5735
5736 return num_insns_constant_multi (val, mode);
5737 }
5738
5739 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5740 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5741 corresponding element of the vector, but for V4SFmode, the
5742 corresponding "float" is interpreted as an SImode integer. */
5743
5744 HOST_WIDE_INT
5745 const_vector_elt_as_int (rtx op, unsigned int elt)
5746 {
5747 rtx tmp;
5748
5749 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5750 gcc_assert (GET_MODE (op) != V2DImode
5751 && GET_MODE (op) != V2DFmode);
5752
5753 tmp = CONST_VECTOR_ELT (op, elt);
5754 if (GET_MODE (op) == V4SFmode)
5755 tmp = gen_lowpart (SImode, tmp);
5756 return INTVAL (tmp);
5757 }
5758
5759 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5760 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5761 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5762 all items are set to the same value and contain COPIES replicas of the
5763 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5764 operand and the others are set to the value of the operand's msb. */
5765
5766 static bool
5767 vspltis_constant (rtx op, unsigned step, unsigned copies)
5768 {
5769 machine_mode mode = GET_MODE (op);
5770 machine_mode inner = GET_MODE_INNER (mode);
5771
5772 unsigned i;
5773 unsigned nunits;
5774 unsigned bitsize;
5775 unsigned mask;
5776
5777 HOST_WIDE_INT val;
5778 HOST_WIDE_INT splat_val;
5779 HOST_WIDE_INT msb_val;
5780
5781 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5782 return false;
5783
5784 nunits = GET_MODE_NUNITS (mode);
5785 bitsize = GET_MODE_BITSIZE (inner);
5786 mask = GET_MODE_MASK (inner);
5787
5788 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5789 splat_val = val;
5790 msb_val = val >= 0 ? 0 : -1;
5791
5792 /* Construct the value to be splatted, if possible. If not, return 0. */
5793 for (i = 2; i <= copies; i *= 2)
5794 {
5795 HOST_WIDE_INT small_val;
5796 bitsize /= 2;
5797 small_val = splat_val >> bitsize;
5798 mask >>= bitsize;
5799 if (splat_val != ((HOST_WIDE_INT)
5800 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5801 | (small_val & mask)))
5802 return false;
5803 splat_val = small_val;
5804 }
5805
5806 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5807 if (EASY_VECTOR_15 (splat_val))
5808 ;
5809
5810 /* Also check if we can splat, and then add the result to itself. Do so if
5811 the value is positive, of if the splat instruction is using OP's mode;
5812 for splat_val < 0, the splat and the add should use the same mode. */
5813 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5814 && (splat_val >= 0 || (step == 1 && copies == 1)))
5815 ;
5816
5817 /* Also check if are loading up the most significant bit which can be done by
5818 loading up -1 and shifting the value left by -1. */
5819 else if (EASY_VECTOR_MSB (splat_val, inner))
5820 ;
5821
5822 else
5823 return false;
5824
5825 /* Check if VAL is present in every STEP-th element, and the
5826 other elements are filled with its most significant bit. */
5827 for (i = 1; i < nunits; ++i)
5828 {
5829 HOST_WIDE_INT desired_val;
5830 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5831 if ((i & (step - 1)) == 0)
5832 desired_val = val;
5833 else
5834 desired_val = msb_val;
5835
5836 if (desired_val != const_vector_elt_as_int (op, elt))
5837 return false;
5838 }
5839
5840 return true;
5841 }
5842
5843 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5844 instruction, filling in the bottom elements with 0 or -1.
5845
5846 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5847 for the number of zeroes to shift in, or negative for the number of 0xff
5848 bytes to shift in.
5849
5850 OP is a CONST_VECTOR. */
5851
5852 int
5853 vspltis_shifted (rtx op)
5854 {
5855 machine_mode mode = GET_MODE (op);
5856 machine_mode inner = GET_MODE_INNER (mode);
5857
5858 unsigned i, j;
5859 unsigned nunits;
5860 unsigned mask;
5861
5862 HOST_WIDE_INT val;
5863
5864 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5865 return false;
5866
5867 /* We need to create pseudo registers to do the shift, so don't recognize
5868 shift vector constants after reload. */
5869 if (!can_create_pseudo_p ())
5870 return false;
5871
5872 nunits = GET_MODE_NUNITS (mode);
5873 mask = GET_MODE_MASK (inner);
5874
5875 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5876
5877 /* Check if the value can really be the operand of a vspltis[bhw]. */
5878 if (EASY_VECTOR_15 (val))
5879 ;
5880
5881 /* Also check if we are loading up the most significant bit which can be done
5882 by loading up -1 and shifting the value left by -1. */
5883 else if (EASY_VECTOR_MSB (val, inner))
5884 ;
5885
5886 else
5887 return 0;
5888
5889 /* Check if VAL is present in every STEP-th element until we find elements
5890 that are 0 or all 1 bits. */
5891 for (i = 1; i < nunits; ++i)
5892 {
5893 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5894 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5895
5896 /* If the value isn't the splat value, check for the remaining elements
5897 being 0/-1. */
5898 if (val != elt_val)
5899 {
5900 if (elt_val == 0)
5901 {
5902 for (j = i+1; j < nunits; ++j)
5903 {
5904 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5905 if (const_vector_elt_as_int (op, elt2) != 0)
5906 return 0;
5907 }
5908
5909 return (nunits - i) * GET_MODE_SIZE (inner);
5910 }
5911
5912 else if ((elt_val & mask) == mask)
5913 {
5914 for (j = i+1; j < nunits; ++j)
5915 {
5916 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5917 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5918 return 0;
5919 }
5920
5921 return -((nunits - i) * GET_MODE_SIZE (inner));
5922 }
5923
5924 else
5925 return 0;
5926 }
5927 }
5928
5929 /* If all elements are equal, we don't need to do VLSDOI. */
5930 return 0;
5931 }
5932
5933
5934 /* Return true if OP is of the given MODE and can be synthesized
5935 with a vspltisb, vspltish or vspltisw. */
5936
5937 bool
5938 easy_altivec_constant (rtx op, machine_mode mode)
5939 {
5940 unsigned step, copies;
5941
5942 if (mode == VOIDmode)
5943 mode = GET_MODE (op);
5944 else if (mode != GET_MODE (op))
5945 return false;
5946
5947 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5948 constants. */
5949 if (mode == V2DFmode)
5950 return zero_constant (op, mode);
5951
5952 else if (mode == V2DImode)
5953 {
5954 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5955 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5956 return false;
5957
5958 if (zero_constant (op, mode))
5959 return true;
5960
5961 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5962 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5963 return true;
5964
5965 return false;
5966 }
5967
5968 /* V1TImode is a special container for TImode. Ignore for now. */
5969 else if (mode == V1TImode)
5970 return false;
5971
5972 /* Start with a vspltisw. */
5973 step = GET_MODE_NUNITS (mode) / 4;
5974 copies = 1;
5975
5976 if (vspltis_constant (op, step, copies))
5977 return true;
5978
5979 /* Then try with a vspltish. */
5980 if (step == 1)
5981 copies <<= 1;
5982 else
5983 step >>= 1;
5984
5985 if (vspltis_constant (op, step, copies))
5986 return true;
5987
5988 /* And finally a vspltisb. */
5989 if (step == 1)
5990 copies <<= 1;
5991 else
5992 step >>= 1;
5993
5994 if (vspltis_constant (op, step, copies))
5995 return true;
5996
5997 if (vspltis_shifted (op) != 0)
5998 return true;
5999
6000 return false;
6001 }
6002
6003 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6004 result is OP. Abort if it is not possible. */
6005
6006 rtx
6007 gen_easy_altivec_constant (rtx op)
6008 {
6009 machine_mode mode = GET_MODE (op);
6010 int nunits = GET_MODE_NUNITS (mode);
6011 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6012 unsigned step = nunits / 4;
6013 unsigned copies = 1;
6014
6015 /* Start with a vspltisw. */
6016 if (vspltis_constant (op, step, copies))
6017 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6018
6019 /* Then try with a vspltish. */
6020 if (step == 1)
6021 copies <<= 1;
6022 else
6023 step >>= 1;
6024
6025 if (vspltis_constant (op, step, copies))
6026 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6027
6028 /* And finally a vspltisb. */
6029 if (step == 1)
6030 copies <<= 1;
6031 else
6032 step >>= 1;
6033
6034 if (vspltis_constant (op, step, copies))
6035 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6036
6037 gcc_unreachable ();
6038 }
6039
6040 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6041 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6042
6043 Return the number of instructions needed (1 or 2) into the address pointed
6044 via NUM_INSNS_PTR.
6045
6046 Return the constant that is being split via CONSTANT_PTR. */
6047
6048 bool
6049 xxspltib_constant_p (rtx op,
6050 machine_mode mode,
6051 int *num_insns_ptr,
6052 int *constant_ptr)
6053 {
6054 size_t nunits = GET_MODE_NUNITS (mode);
6055 size_t i;
6056 HOST_WIDE_INT value;
6057 rtx element;
6058
6059 /* Set the returned values to out of bound values. */
6060 *num_insns_ptr = -1;
6061 *constant_ptr = 256;
6062
6063 if (!TARGET_P9_VECTOR)
6064 return false;
6065
6066 if (mode == VOIDmode)
6067 mode = GET_MODE (op);
6068
6069 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6070 return false;
6071
6072 /* Handle (vec_duplicate <constant>). */
6073 if (GET_CODE (op) == VEC_DUPLICATE)
6074 {
6075 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6076 && mode != V2DImode)
6077 return false;
6078
6079 element = XEXP (op, 0);
6080 if (!CONST_INT_P (element))
6081 return false;
6082
6083 value = INTVAL (element);
6084 if (!IN_RANGE (value, -128, 127))
6085 return false;
6086 }
6087
6088 /* Handle (const_vector [...]). */
6089 else if (GET_CODE (op) == CONST_VECTOR)
6090 {
6091 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6092 && mode != V2DImode)
6093 return false;
6094
6095 element = CONST_VECTOR_ELT (op, 0);
6096 if (!CONST_INT_P (element))
6097 return false;
6098
6099 value = INTVAL (element);
6100 if (!IN_RANGE (value, -128, 127))
6101 return false;
6102
6103 for (i = 1; i < nunits; i++)
6104 {
6105 element = CONST_VECTOR_ELT (op, i);
6106 if (!CONST_INT_P (element))
6107 return false;
6108
6109 if (value != INTVAL (element))
6110 return false;
6111 }
6112 }
6113
6114 /* Handle integer constants being loaded into the upper part of the VSX
6115 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6116 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6117 else if (CONST_INT_P (op))
6118 {
6119 if (!SCALAR_INT_MODE_P (mode))
6120 return false;
6121
6122 value = INTVAL (op);
6123 if (!IN_RANGE (value, -128, 127))
6124 return false;
6125
6126 if (!IN_RANGE (value, -1, 0))
6127 {
6128 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6129 return false;
6130
6131 if (EASY_VECTOR_15 (value))
6132 return false;
6133 }
6134 }
6135
6136 else
6137 return false;
6138
6139 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6140 sign extend. Special case 0/-1 to allow getting any VSX register instead
6141 of an Altivec register. */
6142 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6143 && EASY_VECTOR_15 (value))
6144 return false;
6145
6146 /* Return # of instructions and the constant byte for XXSPLTIB. */
6147 if (mode == V16QImode)
6148 *num_insns_ptr = 1;
6149
6150 else if (IN_RANGE (value, -1, 0))
6151 *num_insns_ptr = 1;
6152
6153 else
6154 *num_insns_ptr = 2;
6155
6156 *constant_ptr = (int) value;
6157 return true;
6158 }
6159
6160 const char *
6161 output_vec_const_move (rtx *operands)
6162 {
6163 int shift;
6164 machine_mode mode;
6165 rtx dest, vec;
6166
6167 dest = operands[0];
6168 vec = operands[1];
6169 mode = GET_MODE (dest);
6170
6171 if (TARGET_VSX)
6172 {
6173 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6174 int xxspltib_value = 256;
6175 int num_insns = -1;
6176
6177 if (zero_constant (vec, mode))
6178 {
6179 if (TARGET_P9_VECTOR)
6180 return "xxspltib %x0,0";
6181
6182 else if (dest_vmx_p)
6183 return "vspltisw %0,0";
6184
6185 else
6186 return "xxlxor %x0,%x0,%x0";
6187 }
6188
6189 if (all_ones_constant (vec, mode))
6190 {
6191 if (TARGET_P9_VECTOR)
6192 return "xxspltib %x0,255";
6193
6194 else if (dest_vmx_p)
6195 return "vspltisw %0,-1";
6196
6197 else if (TARGET_P8_VECTOR)
6198 return "xxlorc %x0,%x0,%x0";
6199
6200 else
6201 gcc_unreachable ();
6202 }
6203
6204 if (TARGET_P9_VECTOR
6205 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6206 {
6207 if (num_insns == 1)
6208 {
6209 operands[2] = GEN_INT (xxspltib_value & 0xff);
6210 return "xxspltib %x0,%2";
6211 }
6212
6213 return "#";
6214 }
6215 }
6216
6217 if (TARGET_ALTIVEC)
6218 {
6219 rtx splat_vec;
6220
6221 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6222 if (zero_constant (vec, mode))
6223 return "vspltisw %0,0";
6224
6225 if (all_ones_constant (vec, mode))
6226 return "vspltisw %0,-1";
6227
6228 /* Do we need to construct a value using VSLDOI? */
6229 shift = vspltis_shifted (vec);
6230 if (shift != 0)
6231 return "#";
6232
6233 splat_vec = gen_easy_altivec_constant (vec);
6234 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6235 operands[1] = XEXP (splat_vec, 0);
6236 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6237 return "#";
6238
6239 switch (GET_MODE (splat_vec))
6240 {
6241 case E_V4SImode:
6242 return "vspltisw %0,%1";
6243
6244 case E_V8HImode:
6245 return "vspltish %0,%1";
6246
6247 case E_V16QImode:
6248 return "vspltisb %0,%1";
6249
6250 default:
6251 gcc_unreachable ();
6252 }
6253 }
6254
6255 gcc_unreachable ();
6256 }
6257
6258 /* Initialize vector TARGET to VALS. */
6259
6260 void
6261 rs6000_expand_vector_init (rtx target, rtx vals)
6262 {
6263 machine_mode mode = GET_MODE (target);
6264 machine_mode inner_mode = GET_MODE_INNER (mode);
6265 int n_elts = GET_MODE_NUNITS (mode);
6266 int n_var = 0, one_var = -1;
6267 bool all_same = true, all_const_zero = true;
6268 rtx x, mem;
6269 int i;
6270
6271 for (i = 0; i < n_elts; ++i)
6272 {
6273 x = XVECEXP (vals, 0, i);
6274 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6275 ++n_var, one_var = i;
6276 else if (x != CONST0_RTX (inner_mode))
6277 all_const_zero = false;
6278
6279 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6280 all_same = false;
6281 }
6282
6283 if (n_var == 0)
6284 {
6285 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6286 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6287 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6288 {
6289 /* Zero register. */
6290 emit_move_insn (target, CONST0_RTX (mode));
6291 return;
6292 }
6293 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6294 {
6295 /* Splat immediate. */
6296 emit_insn (gen_rtx_SET (target, const_vec));
6297 return;
6298 }
6299 else
6300 {
6301 /* Load from constant pool. */
6302 emit_move_insn (target, const_vec);
6303 return;
6304 }
6305 }
6306
6307 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6308 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6309 {
6310 rtx op[2];
6311 size_t i;
6312 size_t num_elements = all_same ? 1 : 2;
6313 for (i = 0; i < num_elements; i++)
6314 {
6315 op[i] = XVECEXP (vals, 0, i);
6316 /* Just in case there is a SUBREG with a smaller mode, do a
6317 conversion. */
6318 if (GET_MODE (op[i]) != inner_mode)
6319 {
6320 rtx tmp = gen_reg_rtx (inner_mode);
6321 convert_move (tmp, op[i], 0);
6322 op[i] = tmp;
6323 }
6324 /* Allow load with splat double word. */
6325 else if (MEM_P (op[i]))
6326 {
6327 if (!all_same)
6328 op[i] = force_reg (inner_mode, op[i]);
6329 }
6330 else if (!REG_P (op[i]))
6331 op[i] = force_reg (inner_mode, op[i]);
6332 }
6333
6334 if (all_same)
6335 {
6336 if (mode == V2DFmode)
6337 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6338 else
6339 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6340 }
6341 else
6342 {
6343 if (mode == V2DFmode)
6344 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6345 else
6346 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6347 }
6348 return;
6349 }
6350
6351 /* Special case initializing vector int if we are on 64-bit systems with
6352 direct move or we have the ISA 3.0 instructions. */
6353 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6354 && TARGET_DIRECT_MOVE_64BIT)
6355 {
6356 if (all_same)
6357 {
6358 rtx element0 = XVECEXP (vals, 0, 0);
6359 if (MEM_P (element0))
6360 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6361 else
6362 element0 = force_reg (SImode, element0);
6363
6364 if (TARGET_P9_VECTOR)
6365 emit_insn (gen_vsx_splat_v4si (target, element0));
6366 else
6367 {
6368 rtx tmp = gen_reg_rtx (DImode);
6369 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6370 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6371 }
6372 return;
6373 }
6374 else
6375 {
6376 rtx elements[4];
6377 size_t i;
6378
6379 for (i = 0; i < 4; i++)
6380 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6381
6382 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6383 elements[2], elements[3]));
6384 return;
6385 }
6386 }
6387
6388 /* With single precision floating point on VSX, know that internally single
6389 precision is actually represented as a double, and either make 2 V2DF
6390 vectors, and convert these vectors to single precision, or do one
6391 conversion, and splat the result to the other elements. */
6392 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6393 {
6394 if (all_same)
6395 {
6396 rtx element0 = XVECEXP (vals, 0, 0);
6397
6398 if (TARGET_P9_VECTOR)
6399 {
6400 if (MEM_P (element0))
6401 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6402
6403 emit_insn (gen_vsx_splat_v4sf (target, element0));
6404 }
6405
6406 else
6407 {
6408 rtx freg = gen_reg_rtx (V4SFmode);
6409 rtx sreg = force_reg (SFmode, element0);
6410 rtx cvt = (TARGET_XSCVDPSPN
6411 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6412 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6413
6414 emit_insn (cvt);
6415 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6416 const0_rtx));
6417 }
6418 }
6419 else
6420 {
6421 rtx dbl_even = gen_reg_rtx (V2DFmode);
6422 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6423 rtx flt_even = gen_reg_rtx (V4SFmode);
6424 rtx flt_odd = gen_reg_rtx (V4SFmode);
6425 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6426 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6427 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6428 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6429
6430 /* Use VMRGEW if we can instead of doing a permute. */
6431 if (TARGET_P8_VECTOR)
6432 {
6433 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6434 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6435 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6436 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6437 if (BYTES_BIG_ENDIAN)
6438 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6439 else
6440 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6441 }
6442 else
6443 {
6444 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6445 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6446 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6447 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6448 rs6000_expand_extract_even (target, flt_even, flt_odd);
6449 }
6450 }
6451 return;
6452 }
6453
6454 /* Special case initializing vector short/char that are splats if we are on
6455 64-bit systems with direct move. */
6456 if (all_same && TARGET_DIRECT_MOVE_64BIT
6457 && (mode == V16QImode || mode == V8HImode))
6458 {
6459 rtx op0 = XVECEXP (vals, 0, 0);
6460 rtx di_tmp = gen_reg_rtx (DImode);
6461
6462 if (!REG_P (op0))
6463 op0 = force_reg (GET_MODE_INNER (mode), op0);
6464
6465 if (mode == V16QImode)
6466 {
6467 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6468 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6469 return;
6470 }
6471
6472 if (mode == V8HImode)
6473 {
6474 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6475 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6476 return;
6477 }
6478 }
6479
6480 /* Store value to stack temp. Load vector element. Splat. However, splat
6481 of 64-bit items is not supported on Altivec. */
6482 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6483 {
6484 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6485 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6486 XVECEXP (vals, 0, 0));
6487 x = gen_rtx_UNSPEC (VOIDmode,
6488 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6489 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6490 gen_rtvec (2,
6491 gen_rtx_SET (target, mem),
6492 x)));
6493 x = gen_rtx_VEC_SELECT (inner_mode, target,
6494 gen_rtx_PARALLEL (VOIDmode,
6495 gen_rtvec (1, const0_rtx)));
6496 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6497 return;
6498 }
6499
6500 /* One field is non-constant. Load constant then overwrite
6501 varying field. */
6502 if (n_var == 1)
6503 {
6504 rtx copy = copy_rtx (vals);
6505
6506 /* Load constant part of vector, substitute neighboring value for
6507 varying element. */
6508 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6509 rs6000_expand_vector_init (target, copy);
6510
6511 /* Insert variable. */
6512 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6513 return;
6514 }
6515
6516 /* Construct the vector in memory one field at a time
6517 and load the whole vector. */
6518 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6519 for (i = 0; i < n_elts; i++)
6520 emit_move_insn (adjust_address_nv (mem, inner_mode,
6521 i * GET_MODE_SIZE (inner_mode)),
6522 XVECEXP (vals, 0, i));
6523 emit_move_insn (target, mem);
6524 }
6525
6526 /* Set field ELT of TARGET to VAL. */
6527
6528 void
6529 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6530 {
6531 machine_mode mode = GET_MODE (target);
6532 machine_mode inner_mode = GET_MODE_INNER (mode);
6533 rtx reg = gen_reg_rtx (mode);
6534 rtx mask, mem, x;
6535 int width = GET_MODE_SIZE (inner_mode);
6536 int i;
6537
6538 val = force_reg (GET_MODE (val), val);
6539
6540 if (VECTOR_MEM_VSX_P (mode))
6541 {
6542 rtx insn = NULL_RTX;
6543 rtx elt_rtx = GEN_INT (elt);
6544
6545 if (mode == V2DFmode)
6546 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6547
6548 else if (mode == V2DImode)
6549 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6550
6551 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6552 {
6553 if (mode == V4SImode)
6554 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6555 else if (mode == V8HImode)
6556 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6557 else if (mode == V16QImode)
6558 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6559 else if (mode == V4SFmode)
6560 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6561 }
6562
6563 if (insn)
6564 {
6565 emit_insn (insn);
6566 return;
6567 }
6568 }
6569
6570 /* Simplify setting single element vectors like V1TImode. */
6571 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6572 {
6573 emit_move_insn (target, gen_lowpart (mode, val));
6574 return;
6575 }
6576
6577 /* Load single variable value. */
6578 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6579 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6580 x = gen_rtx_UNSPEC (VOIDmode,
6581 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6582 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6583 gen_rtvec (2,
6584 gen_rtx_SET (reg, mem),
6585 x)));
6586
6587 /* Linear sequence. */
6588 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6589 for (i = 0; i < 16; ++i)
6590 XVECEXP (mask, 0, i) = GEN_INT (i);
6591
6592 /* Set permute mask to insert element into target. */
6593 for (i = 0; i < width; ++i)
6594 XVECEXP (mask, 0, elt*width + i)
6595 = GEN_INT (i + 0x10);
6596 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6597
6598 if (BYTES_BIG_ENDIAN)
6599 x = gen_rtx_UNSPEC (mode,
6600 gen_rtvec (3, target, reg,
6601 force_reg (V16QImode, x)),
6602 UNSPEC_VPERM);
6603 else
6604 {
6605 if (TARGET_P9_VECTOR)
6606 x = gen_rtx_UNSPEC (mode,
6607 gen_rtvec (3, reg, target,
6608 force_reg (V16QImode, x)),
6609 UNSPEC_VPERMR);
6610 else
6611 {
6612 /* Invert selector. We prefer to generate VNAND on P8 so
6613 that future fusion opportunities can kick in, but must
6614 generate VNOR elsewhere. */
6615 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6616 rtx iorx = (TARGET_P8_VECTOR
6617 ? gen_rtx_IOR (V16QImode, notx, notx)
6618 : gen_rtx_AND (V16QImode, notx, notx));
6619 rtx tmp = gen_reg_rtx (V16QImode);
6620 emit_insn (gen_rtx_SET (tmp, iorx));
6621
6622 /* Permute with operands reversed and adjusted selector. */
6623 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6624 UNSPEC_VPERM);
6625 }
6626 }
6627
6628 emit_insn (gen_rtx_SET (target, x));
6629 }
6630
6631 /* Extract field ELT from VEC into TARGET. */
6632
6633 void
6634 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6635 {
6636 machine_mode mode = GET_MODE (vec);
6637 machine_mode inner_mode = GET_MODE_INNER (mode);
6638 rtx mem;
6639
6640 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6641 {
6642 switch (mode)
6643 {
6644 default:
6645 break;
6646 case E_V1TImode:
6647 emit_move_insn (target, gen_lowpart (TImode, vec));
6648 break;
6649 case E_V2DFmode:
6650 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6651 return;
6652 case E_V2DImode:
6653 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6654 return;
6655 case E_V4SFmode:
6656 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6657 return;
6658 case E_V16QImode:
6659 if (TARGET_DIRECT_MOVE_64BIT)
6660 {
6661 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6662 return;
6663 }
6664 else
6665 break;
6666 case E_V8HImode:
6667 if (TARGET_DIRECT_MOVE_64BIT)
6668 {
6669 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6670 return;
6671 }
6672 else
6673 break;
6674 case E_V4SImode:
6675 if (TARGET_DIRECT_MOVE_64BIT)
6676 {
6677 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6678 return;
6679 }
6680 break;
6681 }
6682 }
6683 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6684 && TARGET_DIRECT_MOVE_64BIT)
6685 {
6686 if (GET_MODE (elt) != DImode)
6687 {
6688 rtx tmp = gen_reg_rtx (DImode);
6689 convert_move (tmp, elt, 0);
6690 elt = tmp;
6691 }
6692 else if (!REG_P (elt))
6693 elt = force_reg (DImode, elt);
6694
6695 switch (mode)
6696 {
6697 case E_V1TImode:
6698 emit_move_insn (target, gen_lowpart (TImode, vec));
6699 return;
6700
6701 case E_V2DFmode:
6702 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6703 return;
6704
6705 case E_V2DImode:
6706 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6707 return;
6708
6709 case E_V4SFmode:
6710 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6711 return;
6712
6713 case E_V4SImode:
6714 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6715 return;
6716
6717 case E_V8HImode:
6718 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6719 return;
6720
6721 case E_V16QImode:
6722 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6723 return;
6724
6725 default:
6726 gcc_unreachable ();
6727 }
6728 }
6729
6730 /* Allocate mode-sized buffer. */
6731 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6732
6733 emit_move_insn (mem, vec);
6734 if (CONST_INT_P (elt))
6735 {
6736 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6737
6738 /* Add offset to field within buffer matching vector element. */
6739 mem = adjust_address_nv (mem, inner_mode,
6740 modulo_elt * GET_MODE_SIZE (inner_mode));
6741 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6742 }
6743 else
6744 {
6745 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6746 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6747 rtx new_addr = gen_reg_rtx (Pmode);
6748
6749 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6750 if (ele_size > 1)
6751 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6752 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6753 new_addr = change_address (mem, inner_mode, new_addr);
6754 emit_move_insn (target, new_addr);
6755 }
6756 }
6757
6758 /* Return the offset within a memory object (MEM) of a vector type to a given
6759 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6760 the element is constant, we return a constant integer.
6761
6762 Otherwise, we use a base register temporary to calculate the offset after
6763 masking it to fit within the bounds of the vector and scaling it. The
6764 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6765 built-in function. */
6766
6767 static rtx
6768 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6769 {
6770 if (CONST_INT_P (element))
6771 return GEN_INT (INTVAL (element) * scalar_size);
6772
6773 /* All insns should use the 'Q' constraint (address is a single register) if
6774 the element number is not a constant. */
6775 gcc_assert (satisfies_constraint_Q (mem));
6776
6777 /* Mask the element to make sure the element number is between 0 and the
6778 maximum number of elements - 1 so that we don't generate an address
6779 outside the vector. */
6780 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6781 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6782 emit_insn (gen_rtx_SET (base_tmp, and_op));
6783
6784 /* Shift the element to get the byte offset from the element number. */
6785 int shift = exact_log2 (scalar_size);
6786 gcc_assert (shift >= 0);
6787
6788 if (shift > 0)
6789 {
6790 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6791 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6792 }
6793
6794 return base_tmp;
6795 }
6796
6797 /* Helper function update PC-relative addresses when we are adjusting a memory
6798 address (ADDR) to a vector to point to a scalar field within the vector with
6799 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6800 use the base register temporary (BASE_TMP) to form the address. */
6801
6802 static rtx
6803 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6804 {
6805 rtx new_addr = NULL;
6806
6807 gcc_assert (CONST_INT_P (element_offset));
6808
6809 if (GET_CODE (addr) == CONST)
6810 addr = XEXP (addr, 0);
6811
6812 if (GET_CODE (addr) == PLUS)
6813 {
6814 rtx op0 = XEXP (addr, 0);
6815 rtx op1 = XEXP (addr, 1);
6816
6817 if (CONST_INT_P (op1))
6818 {
6819 HOST_WIDE_INT offset
6820 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6821
6822 if (offset == 0)
6823 new_addr = op0;
6824
6825 else
6826 {
6827 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6828 new_addr = gen_rtx_CONST (Pmode, plus);
6829 }
6830 }
6831
6832 else
6833 {
6834 emit_move_insn (base_tmp, addr);
6835 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6836 }
6837 }
6838
6839 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
6840 {
6841 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
6842 new_addr = gen_rtx_CONST (Pmode, plus);
6843 }
6844
6845 else
6846 gcc_unreachable ();
6847
6848 return new_addr;
6849 }
6850
6851 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6852 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6853 temporary (BASE_TMP) to fixup the address. Return the new memory address
6854 that is valid for reads or writes to a given register (SCALAR_REG).
6855
6856 This function is expected to be called after reload is completed when we are
6857 splitting insns. The temporary BASE_TMP might be set multiple times with
6858 this code. */
6859
6860 rtx
6861 rs6000_adjust_vec_address (rtx scalar_reg,
6862 rtx mem,
6863 rtx element,
6864 rtx base_tmp,
6865 machine_mode scalar_mode)
6866 {
6867 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6868 rtx addr = XEXP (mem, 0);
6869 rtx new_addr;
6870
6871 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6872 gcc_assert (!reg_mentioned_p (base_tmp, element));
6873
6874 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6875 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6876
6877 /* Calculate what we need to add to the address to get the element
6878 address. */
6879 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6880
6881 /* Create the new address pointing to the element within the vector. If we
6882 are adding 0, we don't have to change the address. */
6883 if (element_offset == const0_rtx)
6884 new_addr = addr;
6885
6886 /* A simple indirect address can be converted into a reg + offset
6887 address. */
6888 else if (REG_P (addr) || SUBREG_P (addr))
6889 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6890
6891 /* For references to local static variables, fold a constant offset into the
6892 address. */
6893 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
6894 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
6895
6896 /* Optimize D-FORM addresses with constant offset with a constant element, to
6897 include the element offset in the address directly. */
6898 else if (GET_CODE (addr) == PLUS)
6899 {
6900 rtx op0 = XEXP (addr, 0);
6901 rtx op1 = XEXP (addr, 1);
6902
6903 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6904 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6905 {
6906 /* op0 should never be r0, because r0+offset is not valid. But it
6907 doesn't hurt to make sure it is not r0. */
6908 gcc_assert (reg_or_subregno (op0) != 0);
6909
6910 /* D-FORM address with constant element number. */
6911 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6912 rtx offset_rtx = GEN_INT (offset);
6913 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6914 }
6915 else
6916 {
6917 /* If we don't have a D-FORM address with a constant element number,
6918 add the two elements in the current address. Then add the offset.
6919
6920 Previously, we tried to add the offset to OP1 and change the
6921 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6922 complicated because we had to verify that op1 was not GPR0 and we
6923 had a constant element offset (due to the way ADDI is defined).
6924 By doing the add of OP0 and OP1 first, and then adding in the
6925 offset, it has the benefit that if D-FORM instructions are
6926 allowed, the offset is part of the memory access to the vector
6927 element. */
6928 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6929 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6930 }
6931 }
6932
6933 else
6934 {
6935 emit_move_insn (base_tmp, addr);
6936 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6937 }
6938
6939 /* If the address isn't valid, move the address into the temporary base
6940 register. Some reasons it could not be valid include:
6941
6942 The address offset overflowed the 16 or 34 bit offset size;
6943 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6944 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6945 Only X_FORM loads can be done, and the address is D_FORM. */
6946
6947 enum insn_form iform
6948 = address_to_insn_form (new_addr, scalar_mode,
6949 reg_to_non_prefixed (scalar_reg, scalar_mode));
6950
6951 if (iform == INSN_FORM_BAD)
6952 {
6953 emit_move_insn (base_tmp, new_addr);
6954 new_addr = base_tmp;
6955 }
6956
6957 return change_address (mem, scalar_mode, new_addr);
6958 }
6959
6960 /* Split a variable vec_extract operation into the component instructions. */
6961
6962 void
6963 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6964 rtx tmp_altivec)
6965 {
6966 machine_mode mode = GET_MODE (src);
6967 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6968 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6969 int byte_shift = exact_log2 (scalar_size);
6970
6971 gcc_assert (byte_shift >= 0);
6972
6973 /* If we are given a memory address, optimize to load just the element. We
6974 don't have to adjust the vector element number on little endian
6975 systems. */
6976 if (MEM_P (src))
6977 {
6978 emit_move_insn (dest,
6979 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6980 scalar_mode));
6981 return;
6982 }
6983
6984 else if (REG_P (src) || SUBREG_P (src))
6985 {
6986 int num_elements = GET_MODE_NUNITS (mode);
6987 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6988 int bit_shift = 7 - exact_log2 (num_elements);
6989 rtx element2;
6990 unsigned int dest_regno = reg_or_subregno (dest);
6991 unsigned int src_regno = reg_or_subregno (src);
6992 unsigned int element_regno = reg_or_subregno (element);
6993
6994 gcc_assert (REG_P (tmp_gpr));
6995
6996 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6997 a general purpose register. */
6998 if (TARGET_P9_VECTOR
6999 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7000 && INT_REGNO_P (dest_regno)
7001 && ALTIVEC_REGNO_P (src_regno)
7002 && INT_REGNO_P (element_regno))
7003 {
7004 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7005 rtx element_si = gen_rtx_REG (SImode, element_regno);
7006
7007 if (mode == V16QImode)
7008 emit_insn (BYTES_BIG_ENDIAN
7009 ? gen_vextublx (dest_si, element_si, src)
7010 : gen_vextubrx (dest_si, element_si, src));
7011
7012 else if (mode == V8HImode)
7013 {
7014 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7015 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7016 emit_insn (BYTES_BIG_ENDIAN
7017 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7018 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7019 }
7020
7021
7022 else
7023 {
7024 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7025 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7026 emit_insn (BYTES_BIG_ENDIAN
7027 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7028 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7029 }
7030
7031 return;
7032 }
7033
7034
7035 gcc_assert (REG_P (tmp_altivec));
7036
7037 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7038 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7039 will shift the element into the upper position (adding 3 to convert a
7040 byte shift into a bit shift). */
7041 if (scalar_size == 8)
7042 {
7043 if (!BYTES_BIG_ENDIAN)
7044 {
7045 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7046 element2 = tmp_gpr;
7047 }
7048 else
7049 element2 = element;
7050
7051 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7052 bit. */
7053 emit_insn (gen_rtx_SET (tmp_gpr,
7054 gen_rtx_AND (DImode,
7055 gen_rtx_ASHIFT (DImode,
7056 element2,
7057 GEN_INT (6)),
7058 GEN_INT (64))));
7059 }
7060 else
7061 {
7062 if (!BYTES_BIG_ENDIAN)
7063 {
7064 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7065
7066 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7067 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7068 element2 = tmp_gpr;
7069 }
7070 else
7071 element2 = element;
7072
7073 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7074 }
7075
7076 /* Get the value into the lower byte of the Altivec register where VSLO
7077 expects it. */
7078 if (TARGET_P9_VECTOR)
7079 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7080 else if (can_create_pseudo_p ())
7081 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7082 else
7083 {
7084 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7085 emit_move_insn (tmp_di, tmp_gpr);
7086 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7087 }
7088
7089 /* Do the VSLO to get the value into the final location. */
7090 switch (mode)
7091 {
7092 case E_V2DFmode:
7093 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7094 return;
7095
7096 case E_V2DImode:
7097 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7098 return;
7099
7100 case E_V4SFmode:
7101 {
7102 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7103 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7104 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7105 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7106 tmp_altivec));
7107
7108 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7109 return;
7110 }
7111
7112 case E_V4SImode:
7113 case E_V8HImode:
7114 case E_V16QImode:
7115 {
7116 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7117 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7118 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7119 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7120 tmp_altivec));
7121 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7122 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7123 GEN_INT (64 - bits_in_element)));
7124 return;
7125 }
7126
7127 default:
7128 gcc_unreachable ();
7129 }
7130
7131 return;
7132 }
7133 else
7134 gcc_unreachable ();
7135 }
7136
7137 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7138 selects whether the alignment is abi mandated, optional, or
7139 both abi and optional alignment. */
7140
7141 unsigned int
7142 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7143 {
7144 if (how != align_opt)
7145 {
7146 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7147 align = 128;
7148 }
7149
7150 if (how != align_abi)
7151 {
7152 if (TREE_CODE (type) == ARRAY_TYPE
7153 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7154 {
7155 if (align < BITS_PER_WORD)
7156 align = BITS_PER_WORD;
7157 }
7158 }
7159
7160 return align;
7161 }
7162
7163 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7164 instructions simply ignore the low bits; VSX memory instructions
7165 are aligned to 4 or 8 bytes. */
7166
7167 static bool
7168 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7169 {
7170 return (STRICT_ALIGNMENT
7171 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7172 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7173 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7174 && (int) align < VECTOR_ALIGN (mode)))));
7175 }
7176
7177 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7178
7179 bool
7180 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7181 {
7182 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7183 {
7184 if (computed != 128)
7185 {
7186 static bool warned;
7187 if (!warned && warn_psabi)
7188 {
7189 warned = true;
7190 inform (input_location,
7191 "the layout of aggregates containing vectors with"
7192 " %d-byte alignment has changed in GCC 5",
7193 computed / BITS_PER_UNIT);
7194 }
7195 }
7196 /* In current GCC there is no special case. */
7197 return false;
7198 }
7199
7200 return false;
7201 }
7202
7203 /* AIX increases natural record alignment to doubleword if the first
7204 field is an FP double while the FP fields remain word aligned. */
7205
7206 unsigned int
7207 rs6000_special_round_type_align (tree type, unsigned int computed,
7208 unsigned int specified)
7209 {
7210 unsigned int align = MAX (computed, specified);
7211 tree field = TYPE_FIELDS (type);
7212
7213 /* Skip all non field decls */
7214 while (field != NULL
7215 && (TREE_CODE (field) != FIELD_DECL
7216 || DECL_FIELD_ABI_IGNORED (field)))
7217 field = DECL_CHAIN (field);
7218
7219 if (field != NULL && field != type)
7220 {
7221 type = TREE_TYPE (field);
7222 while (TREE_CODE (type) == ARRAY_TYPE)
7223 type = TREE_TYPE (type);
7224
7225 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7226 align = MAX (align, 64);
7227 }
7228
7229 return align;
7230 }
7231
7232 /* Darwin increases record alignment to the natural alignment of
7233 the first field. */
7234
7235 unsigned int
7236 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7237 unsigned int specified)
7238 {
7239 unsigned int align = MAX (computed, specified);
7240
7241 if (TYPE_PACKED (type))
7242 return align;
7243
7244 /* Find the first field, looking down into aggregates. */
7245 do {
7246 tree field = TYPE_FIELDS (type);
7247 /* Skip all non field decls */
7248 while (field != NULL
7249 && (TREE_CODE (field) != FIELD_DECL
7250 || DECL_FIELD_ABI_IGNORED (field)))
7251 field = DECL_CHAIN (field);
7252 if (! field)
7253 break;
7254 /* A packed field does not contribute any extra alignment. */
7255 if (DECL_PACKED (field))
7256 return align;
7257 type = TREE_TYPE (field);
7258 while (TREE_CODE (type) == ARRAY_TYPE)
7259 type = TREE_TYPE (type);
7260 } while (AGGREGATE_TYPE_P (type));
7261
7262 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7263 align = MAX (align, TYPE_ALIGN (type));
7264
7265 return align;
7266 }
7267
7268 /* Return 1 for an operand in small memory on V.4/eabi. */
7269
7270 int
7271 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7272 machine_mode mode ATTRIBUTE_UNUSED)
7273 {
7274 #if TARGET_ELF
7275 rtx sym_ref;
7276
7277 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7278 return 0;
7279
7280 if (DEFAULT_ABI != ABI_V4)
7281 return 0;
7282
7283 if (SYMBOL_REF_P (op))
7284 sym_ref = op;
7285
7286 else if (GET_CODE (op) != CONST
7287 || GET_CODE (XEXP (op, 0)) != PLUS
7288 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7289 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7290 return 0;
7291
7292 else
7293 {
7294 rtx sum = XEXP (op, 0);
7295 HOST_WIDE_INT summand;
7296
7297 /* We have to be careful here, because it is the referenced address
7298 that must be 32k from _SDA_BASE_, not just the symbol. */
7299 summand = INTVAL (XEXP (sum, 1));
7300 if (summand < 0 || summand > g_switch_value)
7301 return 0;
7302
7303 sym_ref = XEXP (sum, 0);
7304 }
7305
7306 return SYMBOL_REF_SMALL_P (sym_ref);
7307 #else
7308 return 0;
7309 #endif
7310 }
7311
7312 /* Return true if either operand is a general purpose register. */
7313
7314 bool
7315 gpr_or_gpr_p (rtx op0, rtx op1)
7316 {
7317 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7318 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7319 }
7320
7321 /* Return true if this is a move direct operation between GPR registers and
7322 floating point/VSX registers. */
7323
7324 bool
7325 direct_move_p (rtx op0, rtx op1)
7326 {
7327 if (!REG_P (op0) || !REG_P (op1))
7328 return false;
7329
7330 if (!TARGET_DIRECT_MOVE)
7331 return false;
7332
7333 int regno0 = REGNO (op0);
7334 int regno1 = REGNO (op1);
7335 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7336 return false;
7337
7338 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7339 return true;
7340
7341 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7342 return true;
7343
7344 return false;
7345 }
7346
7347 /* Return true if the ADDR is an acceptable address for a quad memory
7348 operation of mode MODE (either LQ/STQ for general purpose registers, or
7349 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7350 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7351 3.0 LXV/STXV instruction. */
7352
7353 bool
7354 quad_address_p (rtx addr, machine_mode mode, bool strict)
7355 {
7356 rtx op0, op1;
7357
7358 if (GET_MODE_SIZE (mode) != 16)
7359 return false;
7360
7361 if (legitimate_indirect_address_p (addr, strict))
7362 return true;
7363
7364 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7365 return false;
7366
7367 /* Is this a valid prefixed address? If the bottom four bits of the offset
7368 are non-zero, we could use a prefixed instruction (which does not have the
7369 DQ-form constraint that the traditional instruction had) instead of
7370 forcing the unaligned offset to a GPR. */
7371 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7372 return true;
7373
7374 if (GET_CODE (addr) != PLUS)
7375 return false;
7376
7377 op0 = XEXP (addr, 0);
7378 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7379 return false;
7380
7381 op1 = XEXP (addr, 1);
7382 if (!CONST_INT_P (op1))
7383 return false;
7384
7385 return quad_address_offset_p (INTVAL (op1));
7386 }
7387
7388 /* Return true if this is a load or store quad operation. This function does
7389 not handle the atomic quad memory instructions. */
7390
7391 bool
7392 quad_load_store_p (rtx op0, rtx op1)
7393 {
7394 bool ret;
7395
7396 if (!TARGET_QUAD_MEMORY)
7397 ret = false;
7398
7399 else if (REG_P (op0) && MEM_P (op1))
7400 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7401 && quad_memory_operand (op1, GET_MODE (op1))
7402 && !reg_overlap_mentioned_p (op0, op1));
7403
7404 else if (MEM_P (op0) && REG_P (op1))
7405 ret = (quad_memory_operand (op0, GET_MODE (op0))
7406 && quad_int_reg_operand (op1, GET_MODE (op1)));
7407
7408 else
7409 ret = false;
7410
7411 if (TARGET_DEBUG_ADDR)
7412 {
7413 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7414 ret ? "true" : "false");
7415 debug_rtx (gen_rtx_SET (op0, op1));
7416 }
7417
7418 return ret;
7419 }
7420
7421 /* Given an address, return a constant offset term if one exists. */
7422
7423 static rtx
7424 address_offset (rtx op)
7425 {
7426 if (GET_CODE (op) == PRE_INC
7427 || GET_CODE (op) == PRE_DEC)
7428 op = XEXP (op, 0);
7429 else if (GET_CODE (op) == PRE_MODIFY
7430 || GET_CODE (op) == LO_SUM)
7431 op = XEXP (op, 1);
7432
7433 if (GET_CODE (op) == CONST)
7434 op = XEXP (op, 0);
7435
7436 if (GET_CODE (op) == PLUS)
7437 op = XEXP (op, 1);
7438
7439 if (CONST_INT_P (op))
7440 return op;
7441
7442 return NULL_RTX;
7443 }
7444
7445 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7446 the mode. If we can't find (or don't know) the alignment of the symbol
7447 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7448 should be pessimistic]. Offsets are validated in the same way as for
7449 reg + offset. */
7450 static bool
7451 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7452 {
7453 /* We should not get here with this. */
7454 gcc_checking_assert (! mode_supports_dq_form (mode));
7455
7456 if (GET_CODE (x) == CONST)
7457 x = XEXP (x, 0);
7458
7459 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7460 x = XVECEXP (x, 0, 0);
7461
7462 rtx sym = NULL_RTX;
7463 unsigned HOST_WIDE_INT offset = 0;
7464
7465 if (GET_CODE (x) == PLUS)
7466 {
7467 sym = XEXP (x, 0);
7468 if (! SYMBOL_REF_P (sym))
7469 return false;
7470 if (!CONST_INT_P (XEXP (x, 1)))
7471 return false;
7472 offset = INTVAL (XEXP (x, 1));
7473 }
7474 else if (SYMBOL_REF_P (x))
7475 sym = x;
7476 else if (CONST_INT_P (x))
7477 offset = INTVAL (x);
7478 else if (GET_CODE (x) == LABEL_REF)
7479 offset = 0; // We assume code labels are Pmode aligned
7480 else
7481 return false; // not sure what we have here.
7482
7483 /* If we don't know the alignment of the thing to which the symbol refers,
7484 we assume optimistically it is "enough".
7485 ??? maybe we should be pessimistic instead. */
7486 unsigned align = 0;
7487
7488 if (sym)
7489 {
7490 tree decl = SYMBOL_REF_DECL (sym);
7491 #if TARGET_MACHO
7492 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7493 /* The decl in an indirection symbol is the original one, which might
7494 be less aligned than the indirection. Our indirections are always
7495 pointer-aligned. */
7496 ;
7497 else
7498 #endif
7499 if (decl && DECL_ALIGN (decl))
7500 align = DECL_ALIGN_UNIT (decl);
7501 }
7502
7503 unsigned int extra = 0;
7504 switch (mode)
7505 {
7506 case E_DFmode:
7507 case E_DDmode:
7508 case E_DImode:
7509 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7510 addressing. */
7511 if (VECTOR_MEM_VSX_P (mode))
7512 return false;
7513
7514 if (!TARGET_POWERPC64)
7515 extra = 4;
7516 else if ((offset & 3) || (align & 3))
7517 return false;
7518 break;
7519
7520 case E_TFmode:
7521 case E_IFmode:
7522 case E_KFmode:
7523 case E_TDmode:
7524 case E_TImode:
7525 case E_PTImode:
7526 extra = 8;
7527 if (!TARGET_POWERPC64)
7528 extra = 12;
7529 else if ((offset & 3) || (align & 3))
7530 return false;
7531 break;
7532
7533 default:
7534 break;
7535 }
7536
7537 /* We only care if the access(es) would cause a change to the high part. */
7538 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7539 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7540 }
7541
7542 /* Return true if the MEM operand is a memory operand suitable for use
7543 with a (full width, possibly multiple) gpr load/store. On
7544 powerpc64 this means the offset must be divisible by 4.
7545 Implements 'Y' constraint.
7546
7547 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7548 a constraint function we know the operand has satisfied a suitable
7549 memory predicate.
7550
7551 Offsetting a lo_sum should not be allowed, except where we know by
7552 alignment that a 32k boundary is not crossed. Note that by
7553 "offsetting" here we mean a further offset to access parts of the
7554 MEM. It's fine to have a lo_sum where the inner address is offset
7555 from a sym, since the same sym+offset will appear in the high part
7556 of the address calculation. */
7557
7558 bool
7559 mem_operand_gpr (rtx op, machine_mode mode)
7560 {
7561 unsigned HOST_WIDE_INT offset;
7562 int extra;
7563 rtx addr = XEXP (op, 0);
7564
7565 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7566 if (TARGET_UPDATE
7567 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7568 && mode_supports_pre_incdec_p (mode)
7569 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7570 return true;
7571
7572 /* Allow prefixed instructions if supported. If the bottom two bits of the
7573 offset are non-zero, we could use a prefixed instruction (which does not
7574 have the DS-form constraint that the traditional instruction had) instead
7575 of forcing the unaligned offset to a GPR. */
7576 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7577 return true;
7578
7579 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7580 really OK. Doing this early avoids teaching all the other machinery
7581 about them. */
7582 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7583 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7584
7585 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7586 if (!rs6000_offsettable_memref_p (op, mode, false))
7587 return false;
7588
7589 op = address_offset (addr);
7590 if (op == NULL_RTX)
7591 return true;
7592
7593 offset = INTVAL (op);
7594 if (TARGET_POWERPC64 && (offset & 3) != 0)
7595 return false;
7596
7597 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7598 if (extra < 0)
7599 extra = 0;
7600
7601 if (GET_CODE (addr) == LO_SUM)
7602 /* For lo_sum addresses, we must allow any offset except one that
7603 causes a wrap, so test only the low 16 bits. */
7604 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7605
7606 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7607 }
7608
7609 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7610 enforce an offset divisible by 4 even for 32-bit. */
7611
7612 bool
7613 mem_operand_ds_form (rtx op, machine_mode mode)
7614 {
7615 unsigned HOST_WIDE_INT offset;
7616 int extra;
7617 rtx addr = XEXP (op, 0);
7618
7619 /* Allow prefixed instructions if supported. If the bottom two bits of the
7620 offset are non-zero, we could use a prefixed instruction (which does not
7621 have the DS-form constraint that the traditional instruction had) instead
7622 of forcing the unaligned offset to a GPR. */
7623 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7624 return true;
7625
7626 if (!offsettable_address_p (false, mode, addr))
7627 return false;
7628
7629 op = address_offset (addr);
7630 if (op == NULL_RTX)
7631 return true;
7632
7633 offset = INTVAL (op);
7634 if ((offset & 3) != 0)
7635 return false;
7636
7637 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7638 if (extra < 0)
7639 extra = 0;
7640
7641 if (GET_CODE (addr) == LO_SUM)
7642 /* For lo_sum addresses, we must allow any offset except one that
7643 causes a wrap, so test only the low 16 bits. */
7644 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7645
7646 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7647 }
7648 \f
7649 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7650
7651 static bool
7652 reg_offset_addressing_ok_p (machine_mode mode)
7653 {
7654 switch (mode)
7655 {
7656 case E_V16QImode:
7657 case E_V8HImode:
7658 case E_V4SFmode:
7659 case E_V4SImode:
7660 case E_V2DFmode:
7661 case E_V2DImode:
7662 case E_V1TImode:
7663 case E_TImode:
7664 case E_TFmode:
7665 case E_KFmode:
7666 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7667 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7668 a vector mode, if we want to use the VSX registers to move it around,
7669 we need to restrict ourselves to reg+reg addressing. Similarly for
7670 IEEE 128-bit floating point that is passed in a single vector
7671 register. */
7672 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7673 return mode_supports_dq_form (mode);
7674 break;
7675
7676 case E_SDmode:
7677 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7678 addressing for the LFIWZX and STFIWX instructions. */
7679 if (TARGET_NO_SDMODE_STACK)
7680 return false;
7681 break;
7682
7683 default:
7684 break;
7685 }
7686
7687 return true;
7688 }
7689
7690 static bool
7691 virtual_stack_registers_memory_p (rtx op)
7692 {
7693 int regnum;
7694
7695 if (REG_P (op))
7696 regnum = REGNO (op);
7697
7698 else if (GET_CODE (op) == PLUS
7699 && REG_P (XEXP (op, 0))
7700 && CONST_INT_P (XEXP (op, 1)))
7701 regnum = REGNO (XEXP (op, 0));
7702
7703 else
7704 return false;
7705
7706 return (regnum >= FIRST_VIRTUAL_REGISTER
7707 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7708 }
7709
7710 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7711 is known to not straddle a 32k boundary. This function is used
7712 to determine whether -mcmodel=medium code can use TOC pointer
7713 relative addressing for OP. This means the alignment of the TOC
7714 pointer must also be taken into account, and unfortunately that is
7715 only 8 bytes. */
7716
7717 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7718 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7719 #endif
7720
7721 static bool
7722 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7723 machine_mode mode)
7724 {
7725 tree decl;
7726 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7727
7728 if (!SYMBOL_REF_P (op))
7729 return false;
7730
7731 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7732 SYMBOL_REF. */
7733 if (mode_supports_dq_form (mode))
7734 return false;
7735
7736 dsize = GET_MODE_SIZE (mode);
7737 decl = SYMBOL_REF_DECL (op);
7738 if (!decl)
7739 {
7740 if (dsize == 0)
7741 return false;
7742
7743 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7744 replacing memory addresses with an anchor plus offset. We
7745 could find the decl by rummaging around in the block->objects
7746 VEC for the given offset but that seems like too much work. */
7747 dalign = BITS_PER_UNIT;
7748 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7749 && SYMBOL_REF_ANCHOR_P (op)
7750 && SYMBOL_REF_BLOCK (op) != NULL)
7751 {
7752 struct object_block *block = SYMBOL_REF_BLOCK (op);
7753
7754 dalign = block->alignment;
7755 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7756 }
7757 else if (CONSTANT_POOL_ADDRESS_P (op))
7758 {
7759 /* It would be nice to have get_pool_align().. */
7760 machine_mode cmode = get_pool_mode (op);
7761
7762 dalign = GET_MODE_ALIGNMENT (cmode);
7763 }
7764 }
7765 else if (DECL_P (decl))
7766 {
7767 dalign = DECL_ALIGN (decl);
7768
7769 if (dsize == 0)
7770 {
7771 /* Allow BLKmode when the entire object is known to not
7772 cross a 32k boundary. */
7773 if (!DECL_SIZE_UNIT (decl))
7774 return false;
7775
7776 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7777 return false;
7778
7779 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7780 if (dsize > 32768)
7781 return false;
7782
7783 dalign /= BITS_PER_UNIT;
7784 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7785 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7786 return dalign >= dsize;
7787 }
7788 }
7789 else
7790 gcc_unreachable ();
7791
7792 /* Find how many bits of the alignment we know for this access. */
7793 dalign /= BITS_PER_UNIT;
7794 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7795 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7796 mask = dalign - 1;
7797 lsb = offset & -offset;
7798 mask &= lsb - 1;
7799 dalign = mask + 1;
7800
7801 return dalign >= dsize;
7802 }
7803
7804 static bool
7805 constant_pool_expr_p (rtx op)
7806 {
7807 rtx base, offset;
7808
7809 split_const (op, &base, &offset);
7810 return (SYMBOL_REF_P (base)
7811 && CONSTANT_POOL_ADDRESS_P (base)
7812 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7813 }
7814
7815 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7816 use that as the register to put the HIGH value into if register allocation
7817 is already done. */
7818
7819 rtx
7820 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7821 {
7822 rtx tocrel, tocreg, hi;
7823
7824 gcc_assert (TARGET_TOC);
7825
7826 if (TARGET_DEBUG_ADDR)
7827 {
7828 if (SYMBOL_REF_P (symbol))
7829 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7830 XSTR (symbol, 0));
7831 else
7832 {
7833 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7834 GET_RTX_NAME (GET_CODE (symbol)));
7835 debug_rtx (symbol);
7836 }
7837 }
7838
7839 if (!can_create_pseudo_p ())
7840 df_set_regs_ever_live (TOC_REGISTER, true);
7841
7842 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7843 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7844 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7845 return tocrel;
7846
7847 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7848 if (largetoc_reg != NULL)
7849 {
7850 emit_move_insn (largetoc_reg, hi);
7851 hi = largetoc_reg;
7852 }
7853 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7854 }
7855
7856 /* These are only used to pass through from print_operand/print_operand_address
7857 to rs6000_output_addr_const_extra over the intervening function
7858 output_addr_const which is not target code. */
7859 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7860
7861 /* Return true if OP is a toc pointer relative address (the output
7862 of create_TOC_reference). If STRICT, do not match non-split
7863 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7864 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7865 TOCREL_OFFSET_RET respectively. */
7866
7867 bool
7868 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7869 const_rtx *tocrel_offset_ret)
7870 {
7871 if (!TARGET_TOC)
7872 return false;
7873
7874 if (TARGET_CMODEL != CMODEL_SMALL)
7875 {
7876 /* When strict ensure we have everything tidy. */
7877 if (strict
7878 && !(GET_CODE (op) == LO_SUM
7879 && REG_P (XEXP (op, 0))
7880 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7881 return false;
7882
7883 /* When not strict, allow non-split TOC addresses and also allow
7884 (lo_sum (high ..)) TOC addresses created during reload. */
7885 if (GET_CODE (op) == LO_SUM)
7886 op = XEXP (op, 1);
7887 }
7888
7889 const_rtx tocrel_base = op;
7890 const_rtx tocrel_offset = const0_rtx;
7891
7892 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7893 {
7894 tocrel_base = XEXP (op, 0);
7895 tocrel_offset = XEXP (op, 1);
7896 }
7897
7898 if (tocrel_base_ret)
7899 *tocrel_base_ret = tocrel_base;
7900 if (tocrel_offset_ret)
7901 *tocrel_offset_ret = tocrel_offset;
7902
7903 return (GET_CODE (tocrel_base) == UNSPEC
7904 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7905 && REG_P (XVECEXP (tocrel_base, 0, 1))
7906 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7907 }
7908
7909 /* Return true if X is a constant pool address, and also for cmodel=medium
7910 if X is a toc-relative address known to be offsettable within MODE. */
7911
7912 bool
7913 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7914 bool strict)
7915 {
7916 const_rtx tocrel_base, tocrel_offset;
7917 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7918 && (TARGET_CMODEL != CMODEL_MEDIUM
7919 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7920 || mode == QImode
7921 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7922 INTVAL (tocrel_offset), mode)));
7923 }
7924
7925 static bool
7926 legitimate_small_data_p (machine_mode mode, rtx x)
7927 {
7928 return (DEFAULT_ABI == ABI_V4
7929 && !flag_pic && !TARGET_TOC
7930 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7931 && small_data_operand (x, mode));
7932 }
7933
7934 bool
7935 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7936 bool strict, bool worst_case)
7937 {
7938 unsigned HOST_WIDE_INT offset;
7939 unsigned int extra;
7940
7941 if (GET_CODE (x) != PLUS)
7942 return false;
7943 if (!REG_P (XEXP (x, 0)))
7944 return false;
7945 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7946 return false;
7947 if (mode_supports_dq_form (mode))
7948 return quad_address_p (x, mode, strict);
7949 if (!reg_offset_addressing_ok_p (mode))
7950 return virtual_stack_registers_memory_p (x);
7951 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7952 return true;
7953 if (!CONST_INT_P (XEXP (x, 1)))
7954 return false;
7955
7956 offset = INTVAL (XEXP (x, 1));
7957 extra = 0;
7958 switch (mode)
7959 {
7960 case E_DFmode:
7961 case E_DDmode:
7962 case E_DImode:
7963 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7964 addressing. */
7965 if (VECTOR_MEM_VSX_P (mode))
7966 return false;
7967
7968 if (!worst_case)
7969 break;
7970 if (!TARGET_POWERPC64)
7971 extra = 4;
7972 else if (offset & 3)
7973 return false;
7974 break;
7975
7976 case E_TFmode:
7977 case E_IFmode:
7978 case E_KFmode:
7979 case E_TDmode:
7980 case E_TImode:
7981 case E_PTImode:
7982 extra = 8;
7983 if (!worst_case)
7984 break;
7985 if (!TARGET_POWERPC64)
7986 extra = 12;
7987 else if (offset & 3)
7988 return false;
7989 break;
7990
7991 default:
7992 break;
7993 }
7994
7995 if (TARGET_PREFIXED)
7996 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7997 else
7998 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7999 }
8000
8001 bool
8002 legitimate_indexed_address_p (rtx x, int strict)
8003 {
8004 rtx op0, op1;
8005
8006 if (GET_CODE (x) != PLUS)
8007 return false;
8008
8009 op0 = XEXP (x, 0);
8010 op1 = XEXP (x, 1);
8011
8012 return (REG_P (op0) && REG_P (op1)
8013 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8014 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8015 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8016 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8017 }
8018
8019 bool
8020 avoiding_indexed_address_p (machine_mode mode)
8021 {
8022 /* Avoid indexed addressing for modes that have non-indexed
8023 load/store instruction forms. */
8024 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8025 }
8026
8027 bool
8028 legitimate_indirect_address_p (rtx x, int strict)
8029 {
8030 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8031 }
8032
8033 bool
8034 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8035 {
8036 if (!TARGET_MACHO || !flag_pic
8037 || mode != SImode || !MEM_P (x))
8038 return false;
8039 x = XEXP (x, 0);
8040
8041 if (GET_CODE (x) != LO_SUM)
8042 return false;
8043 if (!REG_P (XEXP (x, 0)))
8044 return false;
8045 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8046 return false;
8047 x = XEXP (x, 1);
8048
8049 return CONSTANT_P (x);
8050 }
8051
8052 static bool
8053 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8054 {
8055 if (GET_CODE (x) != LO_SUM)
8056 return false;
8057 if (!REG_P (XEXP (x, 0)))
8058 return false;
8059 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8060 return false;
8061 /* quad word addresses are restricted, and we can't use LO_SUM. */
8062 if (mode_supports_dq_form (mode))
8063 return false;
8064 x = XEXP (x, 1);
8065
8066 if (TARGET_ELF || TARGET_MACHO)
8067 {
8068 bool large_toc_ok;
8069
8070 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8071 return false;
8072 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8073 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8074 recognizes some LO_SUM addresses as valid although this
8075 function says opposite. In most cases, LRA through different
8076 transformations can generate correct code for address reloads.
8077 It cannot manage only some LO_SUM cases. So we need to add
8078 code here saying that some addresses are still valid. */
8079 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8080 && small_toc_ref (x, VOIDmode));
8081 if (TARGET_TOC && ! large_toc_ok)
8082 return false;
8083 if (GET_MODE_NUNITS (mode) != 1)
8084 return false;
8085 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8086 && !(/* ??? Assume floating point reg based on mode? */
8087 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8088 return false;
8089
8090 return CONSTANT_P (x) || large_toc_ok;
8091 }
8092
8093 return false;
8094 }
8095
8096
8097 /* Try machine-dependent ways of modifying an illegitimate address
8098 to be legitimate. If we find one, return the new, valid address.
8099 This is used from only one place: `memory_address' in explow.c.
8100
8101 OLDX is the address as it was before break_out_memory_refs was
8102 called. In some cases it is useful to look at this to decide what
8103 needs to be done.
8104
8105 It is always safe for this function to do nothing. It exists to
8106 recognize opportunities to optimize the output.
8107
8108 On RS/6000, first check for the sum of a register with a constant
8109 integer that is out of range. If so, generate code to add the
8110 constant with the low-order 16 bits masked to the register and force
8111 this result into another register (this can be done with `cau').
8112 Then generate an address of REG+(CONST&0xffff), allowing for the
8113 possibility of bit 16 being a one.
8114
8115 Then check for the sum of a register and something not constant, try to
8116 load the other things into a register and return the sum. */
8117
8118 static rtx
8119 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8120 machine_mode mode)
8121 {
8122 unsigned int extra;
8123
8124 if (!reg_offset_addressing_ok_p (mode)
8125 || mode_supports_dq_form (mode))
8126 {
8127 if (virtual_stack_registers_memory_p (x))
8128 return x;
8129
8130 /* In theory we should not be seeing addresses of the form reg+0,
8131 but just in case it is generated, optimize it away. */
8132 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8133 return force_reg (Pmode, XEXP (x, 0));
8134
8135 /* For TImode with load/store quad, restrict addresses to just a single
8136 pointer, so it works with both GPRs and VSX registers. */
8137 /* Make sure both operands are registers. */
8138 else if (GET_CODE (x) == PLUS
8139 && (mode != TImode || !TARGET_VSX))
8140 return gen_rtx_PLUS (Pmode,
8141 force_reg (Pmode, XEXP (x, 0)),
8142 force_reg (Pmode, XEXP (x, 1)));
8143 else
8144 return force_reg (Pmode, x);
8145 }
8146 if (SYMBOL_REF_P (x))
8147 {
8148 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8149 if (model != 0)
8150 return rs6000_legitimize_tls_address (x, model);
8151 }
8152
8153 extra = 0;
8154 switch (mode)
8155 {
8156 case E_TFmode:
8157 case E_TDmode:
8158 case E_TImode:
8159 case E_PTImode:
8160 case E_IFmode:
8161 case E_KFmode:
8162 /* As in legitimate_offset_address_p we do not assume
8163 worst-case. The mode here is just a hint as to the registers
8164 used. A TImode is usually in gprs, but may actually be in
8165 fprs. Leave worst-case scenario for reload to handle via
8166 insn constraints. PTImode is only GPRs. */
8167 extra = 8;
8168 break;
8169 default:
8170 break;
8171 }
8172
8173 if (GET_CODE (x) == PLUS
8174 && REG_P (XEXP (x, 0))
8175 && CONST_INT_P (XEXP (x, 1))
8176 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8177 >= 0x10000 - extra))
8178 {
8179 HOST_WIDE_INT high_int, low_int;
8180 rtx sum;
8181 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8182 if (low_int >= 0x8000 - extra)
8183 low_int = 0;
8184 high_int = INTVAL (XEXP (x, 1)) - low_int;
8185 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8186 GEN_INT (high_int)), 0);
8187 return plus_constant (Pmode, sum, low_int);
8188 }
8189 else if (GET_CODE (x) == PLUS
8190 && REG_P (XEXP (x, 0))
8191 && !CONST_INT_P (XEXP (x, 1))
8192 && GET_MODE_NUNITS (mode) == 1
8193 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8194 || (/* ??? Assume floating point reg based on mode? */
8195 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8196 && !avoiding_indexed_address_p (mode))
8197 {
8198 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8199 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8200 }
8201 else if ((TARGET_ELF
8202 #if TARGET_MACHO
8203 || !MACHO_DYNAMIC_NO_PIC_P
8204 #endif
8205 )
8206 && TARGET_32BIT
8207 && TARGET_NO_TOC_OR_PCREL
8208 && !flag_pic
8209 && !CONST_INT_P (x)
8210 && !CONST_WIDE_INT_P (x)
8211 && !CONST_DOUBLE_P (x)
8212 && CONSTANT_P (x)
8213 && GET_MODE_NUNITS (mode) == 1
8214 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8215 || (/* ??? Assume floating point reg based on mode? */
8216 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8217 {
8218 rtx reg = gen_reg_rtx (Pmode);
8219 if (TARGET_ELF)
8220 emit_insn (gen_elf_high (reg, x));
8221 else
8222 emit_insn (gen_macho_high (Pmode, reg, x));
8223 return gen_rtx_LO_SUM (Pmode, reg, x);
8224 }
8225 else if (TARGET_TOC
8226 && SYMBOL_REF_P (x)
8227 && constant_pool_expr_p (x)
8228 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8229 return create_TOC_reference (x, NULL_RTX);
8230 else
8231 return x;
8232 }
8233
8234 /* Debug version of rs6000_legitimize_address. */
8235 static rtx
8236 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8237 {
8238 rtx ret;
8239 rtx_insn *insns;
8240
8241 start_sequence ();
8242 ret = rs6000_legitimize_address (x, oldx, mode);
8243 insns = get_insns ();
8244 end_sequence ();
8245
8246 if (ret != x)
8247 {
8248 fprintf (stderr,
8249 "\nrs6000_legitimize_address: mode %s, old code %s, "
8250 "new code %s, modified\n",
8251 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8252 GET_RTX_NAME (GET_CODE (ret)));
8253
8254 fprintf (stderr, "Original address:\n");
8255 debug_rtx (x);
8256
8257 fprintf (stderr, "oldx:\n");
8258 debug_rtx (oldx);
8259
8260 fprintf (stderr, "New address:\n");
8261 debug_rtx (ret);
8262
8263 if (insns)
8264 {
8265 fprintf (stderr, "Insns added:\n");
8266 debug_rtx_list (insns, 20);
8267 }
8268 }
8269 else
8270 {
8271 fprintf (stderr,
8272 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8273 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8274
8275 debug_rtx (x);
8276 }
8277
8278 if (insns)
8279 emit_insn (insns);
8280
8281 return ret;
8282 }
8283
8284 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8285 We need to emit DTP-relative relocations. */
8286
8287 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8288 static void
8289 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8290 {
8291 switch (size)
8292 {
8293 case 4:
8294 fputs ("\t.long\t", file);
8295 break;
8296 case 8:
8297 fputs (DOUBLE_INT_ASM_OP, file);
8298 break;
8299 default:
8300 gcc_unreachable ();
8301 }
8302 output_addr_const (file, x);
8303 if (TARGET_ELF)
8304 fputs ("@dtprel+0x8000", file);
8305 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8306 {
8307 switch (SYMBOL_REF_TLS_MODEL (x))
8308 {
8309 case 0:
8310 break;
8311 case TLS_MODEL_LOCAL_EXEC:
8312 fputs ("@le", file);
8313 break;
8314 case TLS_MODEL_INITIAL_EXEC:
8315 fputs ("@ie", file);
8316 break;
8317 case TLS_MODEL_GLOBAL_DYNAMIC:
8318 case TLS_MODEL_LOCAL_DYNAMIC:
8319 fputs ("@m", file);
8320 break;
8321 default:
8322 gcc_unreachable ();
8323 }
8324 }
8325 }
8326
8327 /* Return true if X is a symbol that refers to real (rather than emulated)
8328 TLS. */
8329
8330 static bool
8331 rs6000_real_tls_symbol_ref_p (rtx x)
8332 {
8333 return (SYMBOL_REF_P (x)
8334 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8335 }
8336
8337 /* In the name of slightly smaller debug output, and to cater to
8338 general assembler lossage, recognize various UNSPEC sequences
8339 and turn them back into a direct symbol reference. */
8340
8341 static rtx
8342 rs6000_delegitimize_address (rtx orig_x)
8343 {
8344 rtx x, y, offset;
8345
8346 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8347 orig_x = XVECEXP (orig_x, 0, 0);
8348
8349 orig_x = delegitimize_mem_from_attrs (orig_x);
8350
8351 x = orig_x;
8352 if (MEM_P (x))
8353 x = XEXP (x, 0);
8354
8355 y = x;
8356 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8357 y = XEXP (y, 1);
8358
8359 offset = NULL_RTX;
8360 if (GET_CODE (y) == PLUS
8361 && GET_MODE (y) == Pmode
8362 && CONST_INT_P (XEXP (y, 1)))
8363 {
8364 offset = XEXP (y, 1);
8365 y = XEXP (y, 0);
8366 }
8367
8368 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8369 {
8370 y = XVECEXP (y, 0, 0);
8371
8372 #ifdef HAVE_AS_TLS
8373 /* Do not associate thread-local symbols with the original
8374 constant pool symbol. */
8375 if (TARGET_XCOFF
8376 && SYMBOL_REF_P (y)
8377 && CONSTANT_POOL_ADDRESS_P (y)
8378 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8379 return orig_x;
8380 #endif
8381
8382 if (offset != NULL_RTX)
8383 y = gen_rtx_PLUS (Pmode, y, offset);
8384 if (!MEM_P (orig_x))
8385 return y;
8386 else
8387 return replace_equiv_address_nv (orig_x, y);
8388 }
8389
8390 if (TARGET_MACHO
8391 && GET_CODE (orig_x) == LO_SUM
8392 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8393 {
8394 y = XEXP (XEXP (orig_x, 1), 0);
8395 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8396 return XVECEXP (y, 0, 0);
8397 }
8398
8399 return orig_x;
8400 }
8401
8402 /* Return true if X shouldn't be emitted into the debug info.
8403 The linker doesn't like .toc section references from
8404 .debug_* sections, so reject .toc section symbols. */
8405
8406 static bool
8407 rs6000_const_not_ok_for_debug_p (rtx x)
8408 {
8409 if (GET_CODE (x) == UNSPEC)
8410 return true;
8411 if (SYMBOL_REF_P (x)
8412 && CONSTANT_POOL_ADDRESS_P (x))
8413 {
8414 rtx c = get_pool_constant (x);
8415 machine_mode cmode = get_pool_mode (x);
8416 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8417 return true;
8418 }
8419
8420 return false;
8421 }
8422
8423 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8424
8425 static bool
8426 rs6000_legitimate_combined_insn (rtx_insn *insn)
8427 {
8428 int icode = INSN_CODE (insn);
8429
8430 /* Reject creating doloop insns. Combine should not be allowed
8431 to create these for a number of reasons:
8432 1) In a nested loop, if combine creates one of these in an
8433 outer loop and the register allocator happens to allocate ctr
8434 to the outer loop insn, then the inner loop can't use ctr.
8435 Inner loops ought to be more highly optimized.
8436 2) Combine often wants to create one of these from what was
8437 originally a three insn sequence, first combining the three
8438 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8439 allocated ctr, the splitter takes use back to the three insn
8440 sequence. It's better to stop combine at the two insn
8441 sequence.
8442 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8443 insns, the register allocator sometimes uses floating point
8444 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8445 jump insn and output reloads are not implemented for jumps,
8446 the ctrsi/ctrdi splitters need to handle all possible cases.
8447 That's a pain, and it gets to be seriously difficult when a
8448 splitter that runs after reload needs memory to transfer from
8449 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8450 for the difficult case. It's better to not create problems
8451 in the first place. */
8452 if (icode != CODE_FOR_nothing
8453 && (icode == CODE_FOR_bdz_si
8454 || icode == CODE_FOR_bdz_di
8455 || icode == CODE_FOR_bdnz_si
8456 || icode == CODE_FOR_bdnz_di
8457 || icode == CODE_FOR_bdztf_si
8458 || icode == CODE_FOR_bdztf_di
8459 || icode == CODE_FOR_bdnztf_si
8460 || icode == CODE_FOR_bdnztf_di))
8461 return false;
8462
8463 return true;
8464 }
8465
8466 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8467
8468 static GTY(()) rtx rs6000_tls_symbol;
8469 static rtx
8470 rs6000_tls_get_addr (void)
8471 {
8472 if (!rs6000_tls_symbol)
8473 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8474
8475 return rs6000_tls_symbol;
8476 }
8477
8478 /* Construct the SYMBOL_REF for TLS GOT references. */
8479
8480 static GTY(()) rtx rs6000_got_symbol;
8481 rtx
8482 rs6000_got_sym (void)
8483 {
8484 if (!rs6000_got_symbol)
8485 {
8486 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8487 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8488 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8489 }
8490
8491 return rs6000_got_symbol;
8492 }
8493
8494 /* AIX Thread-Local Address support. */
8495
8496 static rtx
8497 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8498 {
8499 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8500 const char *name;
8501 char *tlsname;
8502
8503 name = XSTR (addr, 0);
8504 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8505 or the symbol will be in TLS private data section. */
8506 if (name[strlen (name) - 1] != ']'
8507 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8508 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8509 {
8510 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8511 strcpy (tlsname, name);
8512 strcat (tlsname,
8513 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8514 tlsaddr = copy_rtx (addr);
8515 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8516 }
8517 else
8518 tlsaddr = addr;
8519
8520 /* Place addr into TOC constant pool. */
8521 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8522
8523 /* Output the TOC entry and create the MEM referencing the value. */
8524 if (constant_pool_expr_p (XEXP (sym, 0))
8525 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8526 {
8527 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8528 mem = gen_const_mem (Pmode, tocref);
8529 set_mem_alias_set (mem, get_TOC_alias_set ());
8530 }
8531 else
8532 return sym;
8533
8534 /* Use global-dynamic for local-dynamic. */
8535 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8536 || model == TLS_MODEL_LOCAL_DYNAMIC)
8537 {
8538 /* Create new TOC reference for @m symbol. */
8539 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8540 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8541 strcpy (tlsname, "*LCM");
8542 strcat (tlsname, name + 3);
8543 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8544 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8545 tocref = create_TOC_reference (modaddr, NULL_RTX);
8546 rtx modmem = gen_const_mem (Pmode, tocref);
8547 set_mem_alias_set (modmem, get_TOC_alias_set ());
8548
8549 rtx modreg = gen_reg_rtx (Pmode);
8550 emit_insn (gen_rtx_SET (modreg, modmem));
8551
8552 tmpreg = gen_reg_rtx (Pmode);
8553 emit_insn (gen_rtx_SET (tmpreg, mem));
8554
8555 dest = gen_reg_rtx (Pmode);
8556 if (TARGET_32BIT)
8557 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8558 else
8559 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8560 return dest;
8561 }
8562 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8563 else if (TARGET_32BIT)
8564 {
8565 tlsreg = gen_reg_rtx (SImode);
8566 emit_insn (gen_tls_get_tpointer (tlsreg));
8567 }
8568 else
8569 tlsreg = gen_rtx_REG (DImode, 13);
8570
8571 /* Load the TOC value into temporary register. */
8572 tmpreg = gen_reg_rtx (Pmode);
8573 emit_insn (gen_rtx_SET (tmpreg, mem));
8574 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8575 gen_rtx_MINUS (Pmode, addr, tlsreg));
8576
8577 /* Add TOC symbol value to TLS pointer. */
8578 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8579
8580 return dest;
8581 }
8582
8583 /* Passes the tls arg value for global dynamic and local dynamic
8584 emit_library_call_value in rs6000_legitimize_tls_address to
8585 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8586 marker relocs put on __tls_get_addr calls. */
8587 static rtx global_tlsarg;
8588
8589 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8590 this (thread-local) address. */
8591
8592 static rtx
8593 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8594 {
8595 rtx dest, insn;
8596
8597 if (TARGET_XCOFF)
8598 return rs6000_legitimize_tls_address_aix (addr, model);
8599
8600 dest = gen_reg_rtx (Pmode);
8601 if (model == TLS_MODEL_LOCAL_EXEC
8602 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8603 {
8604 rtx tlsreg;
8605
8606 if (TARGET_64BIT)
8607 {
8608 tlsreg = gen_rtx_REG (Pmode, 13);
8609 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8610 }
8611 else
8612 {
8613 tlsreg = gen_rtx_REG (Pmode, 2);
8614 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8615 }
8616 emit_insn (insn);
8617 }
8618 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8619 {
8620 rtx tlsreg, tmp;
8621
8622 tmp = gen_reg_rtx (Pmode);
8623 if (TARGET_64BIT)
8624 {
8625 tlsreg = gen_rtx_REG (Pmode, 13);
8626 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8627 }
8628 else
8629 {
8630 tlsreg = gen_rtx_REG (Pmode, 2);
8631 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8632 }
8633 emit_insn (insn);
8634 if (TARGET_64BIT)
8635 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8636 else
8637 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8638 emit_insn (insn);
8639 }
8640 else
8641 {
8642 rtx got, tga, tmp1, tmp2;
8643
8644 /* We currently use relocations like @got@tlsgd for tls, which
8645 means the linker will handle allocation of tls entries, placing
8646 them in the .got section. So use a pointer to the .got section,
8647 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8648 or to secondary GOT sections used by 32-bit -fPIC. */
8649 if (rs6000_pcrel_p (cfun))
8650 got = const0_rtx;
8651 else if (TARGET_64BIT)
8652 got = gen_rtx_REG (Pmode, 2);
8653 else
8654 {
8655 if (flag_pic == 1)
8656 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8657 else
8658 {
8659 rtx gsym = rs6000_got_sym ();
8660 got = gen_reg_rtx (Pmode);
8661 if (flag_pic == 0)
8662 rs6000_emit_move (got, gsym, Pmode);
8663 else
8664 {
8665 rtx mem, lab;
8666
8667 tmp1 = gen_reg_rtx (Pmode);
8668 tmp2 = gen_reg_rtx (Pmode);
8669 mem = gen_const_mem (Pmode, tmp1);
8670 lab = gen_label_rtx ();
8671 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8672 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8673 if (TARGET_LINK_STACK)
8674 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8675 emit_move_insn (tmp2, mem);
8676 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8677 set_unique_reg_note (last, REG_EQUAL, gsym);
8678 }
8679 }
8680 }
8681
8682 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8683 {
8684 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8685 UNSPEC_TLSGD);
8686 tga = rs6000_tls_get_addr ();
8687 rtx argreg = gen_rtx_REG (Pmode, 3);
8688 emit_insn (gen_rtx_SET (argreg, arg));
8689 global_tlsarg = arg;
8690 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8691 global_tlsarg = NULL_RTX;
8692
8693 /* Make a note so that the result of this call can be CSEd. */
8694 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8695 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8696 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8697 }
8698 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8699 {
8700 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8701 tga = rs6000_tls_get_addr ();
8702 tmp1 = gen_reg_rtx (Pmode);
8703 rtx argreg = gen_rtx_REG (Pmode, 3);
8704 emit_insn (gen_rtx_SET (argreg, arg));
8705 global_tlsarg = arg;
8706 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8707 global_tlsarg = NULL_RTX;
8708
8709 /* Make a note so that the result of this call can be CSEd. */
8710 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8711 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8712 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8713
8714 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8715 {
8716 if (TARGET_64BIT)
8717 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8718 else
8719 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8720 }
8721 else if (rs6000_tls_size == 32)
8722 {
8723 tmp2 = gen_reg_rtx (Pmode);
8724 if (TARGET_64BIT)
8725 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8726 else
8727 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8728 emit_insn (insn);
8729 if (TARGET_64BIT)
8730 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8731 else
8732 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8733 }
8734 else
8735 {
8736 tmp2 = gen_reg_rtx (Pmode);
8737 if (TARGET_64BIT)
8738 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8739 else
8740 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8741 emit_insn (insn);
8742 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8743 }
8744 emit_insn (insn);
8745 }
8746 else
8747 {
8748 /* IE, or 64-bit offset LE. */
8749 tmp2 = gen_reg_rtx (Pmode);
8750 if (TARGET_64BIT)
8751 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8752 else
8753 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8754 emit_insn (insn);
8755 if (rs6000_pcrel_p (cfun))
8756 {
8757 if (TARGET_64BIT)
8758 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8759 else
8760 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8761 }
8762 else if (TARGET_64BIT)
8763 insn = gen_tls_tls_64 (dest, tmp2, addr);
8764 else
8765 insn = gen_tls_tls_32 (dest, tmp2, addr);
8766 emit_insn (insn);
8767 }
8768 }
8769
8770 return dest;
8771 }
8772
8773 /* Only create the global variable for the stack protect guard if we are using
8774 the global flavor of that guard. */
8775 static tree
8776 rs6000_init_stack_protect_guard (void)
8777 {
8778 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8779 return default_stack_protect_guard ();
8780
8781 return NULL_TREE;
8782 }
8783
8784 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8785
8786 static bool
8787 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8788 {
8789 if (GET_CODE (x) == HIGH
8790 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8791 return true;
8792
8793 /* A TLS symbol in the TOC cannot contain a sum. */
8794 if (GET_CODE (x) == CONST
8795 && GET_CODE (XEXP (x, 0)) == PLUS
8796 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8797 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8798 return true;
8799
8800 /* Do not place an ELF TLS symbol in the constant pool. */
8801 return TARGET_ELF && tls_referenced_p (x);
8802 }
8803
8804 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8805 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8806 can be addressed relative to the toc pointer. */
8807
8808 static bool
8809 use_toc_relative_ref (rtx sym, machine_mode mode)
8810 {
8811 return ((constant_pool_expr_p (sym)
8812 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8813 get_pool_mode (sym)))
8814 || (TARGET_CMODEL == CMODEL_MEDIUM
8815 && SYMBOL_REF_LOCAL_P (sym)
8816 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8817 }
8818
8819 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8820 that is a valid memory address for an instruction.
8821 The MODE argument is the machine mode for the MEM expression
8822 that wants to use this address.
8823
8824 On the RS/6000, there are four valid address: a SYMBOL_REF that
8825 refers to a constant pool entry of an address (or the sum of it
8826 plus a constant), a short (16-bit signed) constant plus a register,
8827 the sum of two registers, or a register indirect, possibly with an
8828 auto-increment. For DFmode, DDmode and DImode with a constant plus
8829 register, we must ensure that both words are addressable or PowerPC64
8830 with offset word aligned.
8831
8832 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8833 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8834 because adjacent memory cells are accessed by adding word-sized offsets
8835 during assembly output. */
8836 static bool
8837 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8838 {
8839 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8840 bool quad_offset_p = mode_supports_dq_form (mode);
8841
8842 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8843 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
8844 && GET_CODE (x) == AND
8845 && CONST_INT_P (XEXP (x, 1))
8846 && INTVAL (XEXP (x, 1)) == -16)
8847 x = XEXP (x, 0);
8848
8849 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8850 return 0;
8851 if (legitimate_indirect_address_p (x, reg_ok_strict))
8852 return 1;
8853 if (TARGET_UPDATE
8854 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8855 && mode_supports_pre_incdec_p (mode)
8856 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8857 return 1;
8858
8859 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8860 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8861 return 1;
8862
8863 /* Handle restricted vector d-form offsets in ISA 3.0. */
8864 if (quad_offset_p)
8865 {
8866 if (quad_address_p (x, mode, reg_ok_strict))
8867 return 1;
8868 }
8869 else if (virtual_stack_registers_memory_p (x))
8870 return 1;
8871
8872 else if (reg_offset_p)
8873 {
8874 if (legitimate_small_data_p (mode, x))
8875 return 1;
8876 if (legitimate_constant_pool_address_p (x, mode,
8877 reg_ok_strict || lra_in_progress))
8878 return 1;
8879 }
8880
8881 /* For TImode, if we have TImode in VSX registers, only allow register
8882 indirect addresses. This will allow the values to go in either GPRs
8883 or VSX registers without reloading. The vector types would tend to
8884 go into VSX registers, so we allow REG+REG, while TImode seems
8885 somewhat split, in that some uses are GPR based, and some VSX based. */
8886 /* FIXME: We could loosen this by changing the following to
8887 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8888 but currently we cannot allow REG+REG addressing for TImode. See
8889 PR72827 for complete details on how this ends up hoodwinking DSE. */
8890 if (mode == TImode && TARGET_VSX)
8891 return 0;
8892 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8893 if (! reg_ok_strict
8894 && reg_offset_p
8895 && GET_CODE (x) == PLUS
8896 && REG_P (XEXP (x, 0))
8897 && (XEXP (x, 0) == virtual_stack_vars_rtx
8898 || XEXP (x, 0) == arg_pointer_rtx)
8899 && CONST_INT_P (XEXP (x, 1)))
8900 return 1;
8901 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8902 return 1;
8903 if (!FLOAT128_2REG_P (mode)
8904 && (TARGET_HARD_FLOAT
8905 || TARGET_POWERPC64
8906 || (mode != DFmode && mode != DDmode))
8907 && (TARGET_POWERPC64 || mode != DImode)
8908 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8909 && mode != PTImode
8910 && !avoiding_indexed_address_p (mode)
8911 && legitimate_indexed_address_p (x, reg_ok_strict))
8912 return 1;
8913 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8914 && mode_supports_pre_modify_p (mode)
8915 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8916 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8917 reg_ok_strict, false)
8918 || (!avoiding_indexed_address_p (mode)
8919 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8920 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8921 {
8922 /* There is no prefixed version of the load/store with update. */
8923 rtx addr = XEXP (x, 1);
8924 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8925 }
8926 if (reg_offset_p && !quad_offset_p
8927 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8928 return 1;
8929 return 0;
8930 }
8931
8932 /* Debug version of rs6000_legitimate_address_p. */
8933 static bool
8934 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8935 bool reg_ok_strict)
8936 {
8937 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8938 fprintf (stderr,
8939 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8940 "strict = %d, reload = %s, code = %s\n",
8941 ret ? "true" : "false",
8942 GET_MODE_NAME (mode),
8943 reg_ok_strict,
8944 (reload_completed ? "after" : "before"),
8945 GET_RTX_NAME (GET_CODE (x)));
8946 debug_rtx (x);
8947
8948 return ret;
8949 }
8950
8951 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8952
8953 static bool
8954 rs6000_mode_dependent_address_p (const_rtx addr,
8955 addr_space_t as ATTRIBUTE_UNUSED)
8956 {
8957 return rs6000_mode_dependent_address_ptr (addr);
8958 }
8959
8960 /* Go to LABEL if ADDR (a legitimate address expression)
8961 has an effect that depends on the machine mode it is used for.
8962
8963 On the RS/6000 this is true of all integral offsets (since AltiVec
8964 and VSX modes don't allow them) or is a pre-increment or decrement.
8965
8966 ??? Except that due to conceptual problems in offsettable_address_p
8967 we can't really report the problems of integral offsets. So leave
8968 this assuming that the adjustable offset must be valid for the
8969 sub-words of a TFmode operand, which is what we had before. */
8970
8971 static bool
8972 rs6000_mode_dependent_address (const_rtx addr)
8973 {
8974 switch (GET_CODE (addr))
8975 {
8976 case PLUS:
8977 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8978 is considered a legitimate address before reload, so there
8979 are no offset restrictions in that case. Note that this
8980 condition is safe in strict mode because any address involving
8981 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8982 been rejected as illegitimate. */
8983 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8984 && XEXP (addr, 0) != arg_pointer_rtx
8985 && CONST_INT_P (XEXP (addr, 1)))
8986 {
8987 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8988 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8989 if (TARGET_PREFIXED)
8990 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8991 else
8992 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8993 }
8994 break;
8995
8996 case LO_SUM:
8997 /* Anything in the constant pool is sufficiently aligned that
8998 all bytes have the same high part address. */
8999 return !legitimate_constant_pool_address_p (addr, QImode, false);
9000
9001 /* Auto-increment cases are now treated generically in recog.c. */
9002 case PRE_MODIFY:
9003 return TARGET_UPDATE;
9004
9005 /* AND is only allowed in Altivec loads. */
9006 case AND:
9007 return true;
9008
9009 default:
9010 break;
9011 }
9012
9013 return false;
9014 }
9015
9016 /* Debug version of rs6000_mode_dependent_address. */
9017 static bool
9018 rs6000_debug_mode_dependent_address (const_rtx addr)
9019 {
9020 bool ret = rs6000_mode_dependent_address (addr);
9021
9022 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9023 ret ? "true" : "false");
9024 debug_rtx (addr);
9025
9026 return ret;
9027 }
9028
9029 /* Implement FIND_BASE_TERM. */
9030
9031 rtx
9032 rs6000_find_base_term (rtx op)
9033 {
9034 rtx base;
9035
9036 base = op;
9037 if (GET_CODE (base) == CONST)
9038 base = XEXP (base, 0);
9039 if (GET_CODE (base) == PLUS)
9040 base = XEXP (base, 0);
9041 if (GET_CODE (base) == UNSPEC)
9042 switch (XINT (base, 1))
9043 {
9044 case UNSPEC_TOCREL:
9045 case UNSPEC_MACHOPIC_OFFSET:
9046 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9047 for aliasing purposes. */
9048 return XVECEXP (base, 0, 0);
9049 }
9050
9051 return op;
9052 }
9053
9054 /* More elaborate version of recog's offsettable_memref_p predicate
9055 that works around the ??? note of rs6000_mode_dependent_address.
9056 In particular it accepts
9057
9058 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9059
9060 in 32-bit mode, that the recog predicate rejects. */
9061
9062 static bool
9063 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9064 {
9065 bool worst_case;
9066
9067 if (!MEM_P (op))
9068 return false;
9069
9070 /* First mimic offsettable_memref_p. */
9071 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9072 return true;
9073
9074 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9075 the latter predicate knows nothing about the mode of the memory
9076 reference and, therefore, assumes that it is the largest supported
9077 mode (TFmode). As a consequence, legitimate offsettable memory
9078 references are rejected. rs6000_legitimate_offset_address_p contains
9079 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9080 at least with a little bit of help here given that we know the
9081 actual registers used. */
9082 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9083 || GET_MODE_SIZE (reg_mode) == 4);
9084 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9085 strict, worst_case);
9086 }
9087
9088 /* Determine the reassociation width to be used in reassociate_bb.
9089 This takes into account how many parallel operations we
9090 can actually do of a given type, and also the latency.
9091 P8:
9092 int add/sub 6/cycle
9093 mul 2/cycle
9094 vect add/sub/mul 2/cycle
9095 fp add/sub/mul 2/cycle
9096 dfp 1/cycle
9097 */
9098
9099 static int
9100 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9101 machine_mode mode)
9102 {
9103 switch (rs6000_tune)
9104 {
9105 case PROCESSOR_POWER8:
9106 case PROCESSOR_POWER9:
9107 case PROCESSOR_FUTURE:
9108 if (DECIMAL_FLOAT_MODE_P (mode))
9109 return 1;
9110 if (VECTOR_MODE_P (mode))
9111 return 4;
9112 if (INTEGRAL_MODE_P (mode))
9113 return 1;
9114 if (FLOAT_MODE_P (mode))
9115 return 4;
9116 break;
9117 default:
9118 break;
9119 }
9120 return 1;
9121 }
9122
9123 /* Change register usage conditional on target flags. */
9124 static void
9125 rs6000_conditional_register_usage (void)
9126 {
9127 int i;
9128
9129 if (TARGET_DEBUG_TARGET)
9130 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9131
9132 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9133 if (TARGET_64BIT)
9134 fixed_regs[13] = call_used_regs[13] = 1;
9135
9136 /* Conditionally disable FPRs. */
9137 if (TARGET_SOFT_FLOAT)
9138 for (i = 32; i < 64; i++)
9139 fixed_regs[i] = call_used_regs[i] = 1;
9140
9141 /* The TOC register is not killed across calls in a way that is
9142 visible to the compiler. */
9143 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9144 call_used_regs[2] = 0;
9145
9146 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9147 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9148
9149 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9150 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9151 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9152
9153 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9154 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9155 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9156
9157 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9158 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9159
9160 if (!TARGET_ALTIVEC && !TARGET_VSX)
9161 {
9162 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9163 fixed_regs[i] = call_used_regs[i] = 1;
9164 call_used_regs[VRSAVE_REGNO] = 1;
9165 }
9166
9167 if (TARGET_ALTIVEC || TARGET_VSX)
9168 global_regs[VSCR_REGNO] = 1;
9169
9170 if (TARGET_ALTIVEC_ABI)
9171 {
9172 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9173 call_used_regs[i] = 1;
9174
9175 /* AIX reserves VR20:31 in non-extended ABI mode. */
9176 if (TARGET_XCOFF)
9177 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9178 fixed_regs[i] = call_used_regs[i] = 1;
9179 }
9180 }
9181
9182 \f
9183 /* Output insns to set DEST equal to the constant SOURCE as a series of
9184 lis, ori and shl instructions and return TRUE. */
9185
9186 bool
9187 rs6000_emit_set_const (rtx dest, rtx source)
9188 {
9189 machine_mode mode = GET_MODE (dest);
9190 rtx temp, set;
9191 rtx_insn *insn;
9192 HOST_WIDE_INT c;
9193
9194 gcc_checking_assert (CONST_INT_P (source));
9195 c = INTVAL (source);
9196 switch (mode)
9197 {
9198 case E_QImode:
9199 case E_HImode:
9200 emit_insn (gen_rtx_SET (dest, source));
9201 return true;
9202
9203 case E_SImode:
9204 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9205
9206 emit_insn (gen_rtx_SET (copy_rtx (temp),
9207 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9208 emit_insn (gen_rtx_SET (dest,
9209 gen_rtx_IOR (SImode, copy_rtx (temp),
9210 GEN_INT (c & 0xffff))));
9211 break;
9212
9213 case E_DImode:
9214 if (!TARGET_POWERPC64)
9215 {
9216 rtx hi, lo;
9217
9218 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9219 DImode);
9220 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9221 DImode);
9222 emit_move_insn (hi, GEN_INT (c >> 32));
9223 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9224 emit_move_insn (lo, GEN_INT (c));
9225 }
9226 else
9227 rs6000_emit_set_long_const (dest, c);
9228 break;
9229
9230 default:
9231 gcc_unreachable ();
9232 }
9233
9234 insn = get_last_insn ();
9235 set = single_set (insn);
9236 if (! CONSTANT_P (SET_SRC (set)))
9237 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9238
9239 return true;
9240 }
9241
9242 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9243 Output insns to set DEST equal to the constant C as a series of
9244 lis, ori and shl instructions. */
9245
9246 static void
9247 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9248 {
9249 rtx temp;
9250 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9251
9252 ud1 = c & 0xffff;
9253 c = c >> 16;
9254 ud2 = c & 0xffff;
9255 c = c >> 16;
9256 ud3 = c & 0xffff;
9257 c = c >> 16;
9258 ud4 = c & 0xffff;
9259
9260 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9261 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9262 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9263
9264 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9265 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9266 {
9267 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9268
9269 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9270 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9271 if (ud1 != 0)
9272 emit_move_insn (dest,
9273 gen_rtx_IOR (DImode, copy_rtx (temp),
9274 GEN_INT (ud1)));
9275 }
9276 else if (ud3 == 0 && ud4 == 0)
9277 {
9278 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9279
9280 gcc_assert (ud2 & 0x8000);
9281 emit_move_insn (copy_rtx (temp),
9282 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9283 if (ud1 != 0)
9284 emit_move_insn (copy_rtx (temp),
9285 gen_rtx_IOR (DImode, copy_rtx (temp),
9286 GEN_INT (ud1)));
9287 emit_move_insn (dest,
9288 gen_rtx_ZERO_EXTEND (DImode,
9289 gen_lowpart (SImode,
9290 copy_rtx (temp))));
9291 }
9292 else if (ud1 == ud3 && ud2 == ud4)
9293 {
9294 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9295 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9296 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9297 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9298 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9299 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9300 }
9301 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9302 || (ud4 == 0 && ! (ud3 & 0x8000)))
9303 {
9304 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9305
9306 emit_move_insn (copy_rtx (temp),
9307 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9308 if (ud2 != 0)
9309 emit_move_insn (copy_rtx (temp),
9310 gen_rtx_IOR (DImode, copy_rtx (temp),
9311 GEN_INT (ud2)));
9312 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9313 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9314 GEN_INT (16)));
9315 if (ud1 != 0)
9316 emit_move_insn (dest,
9317 gen_rtx_IOR (DImode, copy_rtx (temp),
9318 GEN_INT (ud1)));
9319 }
9320 else
9321 {
9322 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9323
9324 emit_move_insn (copy_rtx (temp),
9325 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9326 if (ud3 != 0)
9327 emit_move_insn (copy_rtx (temp),
9328 gen_rtx_IOR (DImode, copy_rtx (temp),
9329 GEN_INT (ud3)));
9330
9331 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9332 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9333 GEN_INT (32)));
9334 if (ud2 != 0)
9335 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9336 gen_rtx_IOR (DImode, copy_rtx (temp),
9337 GEN_INT (ud2 << 16)));
9338 if (ud1 != 0)
9339 emit_move_insn (dest,
9340 gen_rtx_IOR (DImode, copy_rtx (temp),
9341 GEN_INT (ud1)));
9342 }
9343 }
9344
9345 /* Helper for the following. Get rid of [r+r] memory refs
9346 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9347
9348 static void
9349 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9350 {
9351 if (MEM_P (operands[0])
9352 && !REG_P (XEXP (operands[0], 0))
9353 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9354 GET_MODE (operands[0]), false))
9355 operands[0]
9356 = replace_equiv_address (operands[0],
9357 copy_addr_to_reg (XEXP (operands[0], 0)));
9358
9359 if (MEM_P (operands[1])
9360 && !REG_P (XEXP (operands[1], 0))
9361 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9362 GET_MODE (operands[1]), false))
9363 operands[1]
9364 = replace_equiv_address (operands[1],
9365 copy_addr_to_reg (XEXP (operands[1], 0)));
9366 }
9367
9368 /* Generate a vector of constants to permute MODE for a little-endian
9369 storage operation by swapping the two halves of a vector. */
9370 static rtvec
9371 rs6000_const_vec (machine_mode mode)
9372 {
9373 int i, subparts;
9374 rtvec v;
9375
9376 switch (mode)
9377 {
9378 case E_V1TImode:
9379 subparts = 1;
9380 break;
9381 case E_V2DFmode:
9382 case E_V2DImode:
9383 subparts = 2;
9384 break;
9385 case E_V4SFmode:
9386 case E_V4SImode:
9387 subparts = 4;
9388 break;
9389 case E_V8HImode:
9390 subparts = 8;
9391 break;
9392 case E_V16QImode:
9393 subparts = 16;
9394 break;
9395 default:
9396 gcc_unreachable();
9397 }
9398
9399 v = rtvec_alloc (subparts);
9400
9401 for (i = 0; i < subparts / 2; ++i)
9402 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9403 for (i = subparts / 2; i < subparts; ++i)
9404 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9405
9406 return v;
9407 }
9408
9409 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9410 store operation. */
9411 void
9412 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9413 {
9414 /* Scalar permutations are easier to express in integer modes rather than
9415 floating-point modes, so cast them here. We use V1TImode instead
9416 of TImode to ensure that the values don't go through GPRs. */
9417 if (FLOAT128_VECTOR_P (mode))
9418 {
9419 dest = gen_lowpart (V1TImode, dest);
9420 source = gen_lowpart (V1TImode, source);
9421 mode = V1TImode;
9422 }
9423
9424 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9425 scalar. */
9426 if (mode == TImode || mode == V1TImode)
9427 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9428 GEN_INT (64))));
9429 else
9430 {
9431 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9432 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9433 }
9434 }
9435
9436 /* Emit a little-endian load from vector memory location SOURCE to VSX
9437 register DEST in mode MODE. The load is done with two permuting
9438 insn's that represent an lxvd2x and xxpermdi. */
9439 void
9440 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9441 {
9442 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9443 V1TImode). */
9444 if (mode == TImode || mode == V1TImode)
9445 {
9446 mode = V2DImode;
9447 dest = gen_lowpart (V2DImode, dest);
9448 source = adjust_address (source, V2DImode, 0);
9449 }
9450
9451 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9452 rs6000_emit_le_vsx_permute (tmp, source, mode);
9453 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9454 }
9455
9456 /* Emit a little-endian store to vector memory location DEST from VSX
9457 register SOURCE in mode MODE. The store is done with two permuting
9458 insn's that represent an xxpermdi and an stxvd2x. */
9459 void
9460 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9461 {
9462 /* This should never be called during or after LRA, because it does
9463 not re-permute the source register. It is intended only for use
9464 during expand. */
9465 gcc_assert (!lra_in_progress && !reload_completed);
9466
9467 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9468 V1TImode). */
9469 if (mode == TImode || mode == V1TImode)
9470 {
9471 mode = V2DImode;
9472 dest = adjust_address (dest, V2DImode, 0);
9473 source = gen_lowpart (V2DImode, source);
9474 }
9475
9476 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9477 rs6000_emit_le_vsx_permute (tmp, source, mode);
9478 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9479 }
9480
9481 /* Emit a sequence representing a little-endian VSX load or store,
9482 moving data from SOURCE to DEST in mode MODE. This is done
9483 separately from rs6000_emit_move to ensure it is called only
9484 during expand. LE VSX loads and stores introduced later are
9485 handled with a split. The expand-time RTL generation allows
9486 us to optimize away redundant pairs of register-permutes. */
9487 void
9488 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9489 {
9490 gcc_assert (!BYTES_BIG_ENDIAN
9491 && VECTOR_MEM_VSX_P (mode)
9492 && !TARGET_P9_VECTOR
9493 && !gpr_or_gpr_p (dest, source)
9494 && (MEM_P (source) ^ MEM_P (dest)));
9495
9496 if (MEM_P (source))
9497 {
9498 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9499 rs6000_emit_le_vsx_load (dest, source, mode);
9500 }
9501 else
9502 {
9503 if (!REG_P (source))
9504 source = force_reg (mode, source);
9505 rs6000_emit_le_vsx_store (dest, source, mode);
9506 }
9507 }
9508
9509 /* Return whether a SFmode or SImode move can be done without converting one
9510 mode to another. This arrises when we have:
9511
9512 (SUBREG:SF (REG:SI ...))
9513 (SUBREG:SI (REG:SF ...))
9514
9515 and one of the values is in a floating point/vector register, where SFmode
9516 scalars are stored in DFmode format. */
9517
9518 bool
9519 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9520 {
9521 if (TARGET_ALLOW_SF_SUBREG)
9522 return true;
9523
9524 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9525 return true;
9526
9527 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9528 return true;
9529
9530 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9531 if (SUBREG_P (dest))
9532 {
9533 rtx dest_subreg = SUBREG_REG (dest);
9534 rtx src_subreg = SUBREG_REG (src);
9535 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9536 }
9537
9538 return false;
9539 }
9540
9541
9542 /* Helper function to change moves with:
9543
9544 (SUBREG:SF (REG:SI)) and
9545 (SUBREG:SI (REG:SF))
9546
9547 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9548 values are stored as DFmode values in the VSX registers. We need to convert
9549 the bits before we can use a direct move or operate on the bits in the
9550 vector register as an integer type.
9551
9552 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9553
9554 static bool
9555 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9556 {
9557 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9558 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9559 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9560 {
9561 rtx inner_source = SUBREG_REG (source);
9562 machine_mode inner_mode = GET_MODE (inner_source);
9563
9564 if (mode == SImode && inner_mode == SFmode)
9565 {
9566 emit_insn (gen_movsi_from_sf (dest, inner_source));
9567 return true;
9568 }
9569
9570 if (mode == SFmode && inner_mode == SImode)
9571 {
9572 emit_insn (gen_movsf_from_si (dest, inner_source));
9573 return true;
9574 }
9575 }
9576
9577 return false;
9578 }
9579
9580 /* Emit a move from SOURCE to DEST in mode MODE. */
9581 void
9582 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9583 {
9584 rtx operands[2];
9585 operands[0] = dest;
9586 operands[1] = source;
9587
9588 if (TARGET_DEBUG_ADDR)
9589 {
9590 fprintf (stderr,
9591 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9592 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9593 GET_MODE_NAME (mode),
9594 lra_in_progress,
9595 reload_completed,
9596 can_create_pseudo_p ());
9597 debug_rtx (dest);
9598 fprintf (stderr, "source:\n");
9599 debug_rtx (source);
9600 }
9601
9602 /* Check that we get CONST_WIDE_INT only when we should. */
9603 if (CONST_WIDE_INT_P (operands[1])
9604 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9605 gcc_unreachable ();
9606
9607 #ifdef HAVE_AS_GNU_ATTRIBUTE
9608 /* If we use a long double type, set the flags in .gnu_attribute that say
9609 what the long double type is. This is to allow the linker's warning
9610 message for the wrong long double to be useful, even if the function does
9611 not do a call (for example, doing a 128-bit add on power9 if the long
9612 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9613 used if they aren't the default long dobule type. */
9614 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9615 {
9616 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9617 rs6000_passes_float = rs6000_passes_long_double = true;
9618
9619 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9620 rs6000_passes_float = rs6000_passes_long_double = true;
9621 }
9622 #endif
9623
9624 /* See if we need to special case SImode/SFmode SUBREG moves. */
9625 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9626 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9627 return;
9628
9629 /* Check if GCC is setting up a block move that will end up using FP
9630 registers as temporaries. We must make sure this is acceptable. */
9631 if (MEM_P (operands[0])
9632 && MEM_P (operands[1])
9633 && mode == DImode
9634 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9635 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9636 && ! (rs6000_slow_unaligned_access (SImode,
9637 (MEM_ALIGN (operands[0]) > 32
9638 ? 32 : MEM_ALIGN (operands[0])))
9639 || rs6000_slow_unaligned_access (SImode,
9640 (MEM_ALIGN (operands[1]) > 32
9641 ? 32 : MEM_ALIGN (operands[1]))))
9642 && ! MEM_VOLATILE_P (operands [0])
9643 && ! MEM_VOLATILE_P (operands [1]))
9644 {
9645 emit_move_insn (adjust_address (operands[0], SImode, 0),
9646 adjust_address (operands[1], SImode, 0));
9647 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9648 adjust_address (copy_rtx (operands[1]), SImode, 4));
9649 return;
9650 }
9651
9652 if (can_create_pseudo_p () && MEM_P (operands[0])
9653 && !gpc_reg_operand (operands[1], mode))
9654 operands[1] = force_reg (mode, operands[1]);
9655
9656 /* Recognize the case where operand[1] is a reference to thread-local
9657 data and load its address to a register. */
9658 if (tls_referenced_p (operands[1]))
9659 {
9660 enum tls_model model;
9661 rtx tmp = operands[1];
9662 rtx addend = NULL;
9663
9664 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9665 {
9666 addend = XEXP (XEXP (tmp, 0), 1);
9667 tmp = XEXP (XEXP (tmp, 0), 0);
9668 }
9669
9670 gcc_assert (SYMBOL_REF_P (tmp));
9671 model = SYMBOL_REF_TLS_MODEL (tmp);
9672 gcc_assert (model != 0);
9673
9674 tmp = rs6000_legitimize_tls_address (tmp, model);
9675 if (addend)
9676 {
9677 tmp = gen_rtx_PLUS (mode, tmp, addend);
9678 tmp = force_operand (tmp, operands[0]);
9679 }
9680 operands[1] = tmp;
9681 }
9682
9683 /* 128-bit constant floating-point values on Darwin should really be loaded
9684 as two parts. However, this premature splitting is a problem when DFmode
9685 values can go into Altivec registers. */
9686 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9687 && !reg_addr[DFmode].scalar_in_vmx_p)
9688 {
9689 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9690 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9691 DFmode);
9692 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9693 GET_MODE_SIZE (DFmode)),
9694 simplify_gen_subreg (DFmode, operands[1], mode,
9695 GET_MODE_SIZE (DFmode)),
9696 DFmode);
9697 return;
9698 }
9699
9700 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9701 p1:SD) if p1 is not of floating point class and p0 is spilled as
9702 we can have no analogous movsd_store for this. */
9703 if (lra_in_progress && mode == DDmode
9704 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9705 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9706 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9707 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9708 {
9709 enum reg_class cl;
9710 int regno = REGNO (SUBREG_REG (operands[1]));
9711
9712 if (!HARD_REGISTER_NUM_P (regno))
9713 {
9714 cl = reg_preferred_class (regno);
9715 regno = reg_renumber[regno];
9716 if (regno < 0)
9717 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9718 }
9719 if (regno >= 0 && ! FP_REGNO_P (regno))
9720 {
9721 mode = SDmode;
9722 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9723 operands[1] = SUBREG_REG (operands[1]);
9724 }
9725 }
9726 if (lra_in_progress
9727 && mode == SDmode
9728 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9729 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9730 && (REG_P (operands[1])
9731 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9732 {
9733 int regno = reg_or_subregno (operands[1]);
9734 enum reg_class cl;
9735
9736 if (!HARD_REGISTER_NUM_P (regno))
9737 {
9738 cl = reg_preferred_class (regno);
9739 gcc_assert (cl != NO_REGS);
9740 regno = reg_renumber[regno];
9741 if (regno < 0)
9742 regno = ira_class_hard_regs[cl][0];
9743 }
9744 if (FP_REGNO_P (regno))
9745 {
9746 if (GET_MODE (operands[0]) != DDmode)
9747 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9748 emit_insn (gen_movsd_store (operands[0], operands[1]));
9749 }
9750 else if (INT_REGNO_P (regno))
9751 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9752 else
9753 gcc_unreachable();
9754 return;
9755 }
9756 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9757 p:DD)) if p0 is not of floating point class and p1 is spilled as
9758 we can have no analogous movsd_load for this. */
9759 if (lra_in_progress && mode == DDmode
9760 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9761 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9762 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9763 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9764 {
9765 enum reg_class cl;
9766 int regno = REGNO (SUBREG_REG (operands[0]));
9767
9768 if (!HARD_REGISTER_NUM_P (regno))
9769 {
9770 cl = reg_preferred_class (regno);
9771 regno = reg_renumber[regno];
9772 if (regno < 0)
9773 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9774 }
9775 if (regno >= 0 && ! FP_REGNO_P (regno))
9776 {
9777 mode = SDmode;
9778 operands[0] = SUBREG_REG (operands[0]);
9779 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9780 }
9781 }
9782 if (lra_in_progress
9783 && mode == SDmode
9784 && (REG_P (operands[0])
9785 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9786 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9787 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9788 {
9789 int regno = reg_or_subregno (operands[0]);
9790 enum reg_class cl;
9791
9792 if (!HARD_REGISTER_NUM_P (regno))
9793 {
9794 cl = reg_preferred_class (regno);
9795 gcc_assert (cl != NO_REGS);
9796 regno = reg_renumber[regno];
9797 if (regno < 0)
9798 regno = ira_class_hard_regs[cl][0];
9799 }
9800 if (FP_REGNO_P (regno))
9801 {
9802 if (GET_MODE (operands[1]) != DDmode)
9803 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9804 emit_insn (gen_movsd_load (operands[0], operands[1]));
9805 }
9806 else if (INT_REGNO_P (regno))
9807 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9808 else
9809 gcc_unreachable();
9810 return;
9811 }
9812
9813 /* FIXME: In the long term, this switch statement should go away
9814 and be replaced by a sequence of tests based on things like
9815 mode == Pmode. */
9816 switch (mode)
9817 {
9818 case E_HImode:
9819 case E_QImode:
9820 if (CONSTANT_P (operands[1])
9821 && !CONST_INT_P (operands[1]))
9822 operands[1] = force_const_mem (mode, operands[1]);
9823 break;
9824
9825 case E_TFmode:
9826 case E_TDmode:
9827 case E_IFmode:
9828 case E_KFmode:
9829 if (FLOAT128_2REG_P (mode))
9830 rs6000_eliminate_indexed_memrefs (operands);
9831 /* fall through */
9832
9833 case E_DFmode:
9834 case E_DDmode:
9835 case E_SFmode:
9836 case E_SDmode:
9837 if (CONSTANT_P (operands[1])
9838 && ! easy_fp_constant (operands[1], mode))
9839 operands[1] = force_const_mem (mode, operands[1]);
9840 break;
9841
9842 case E_V16QImode:
9843 case E_V8HImode:
9844 case E_V4SFmode:
9845 case E_V4SImode:
9846 case E_V2DFmode:
9847 case E_V2DImode:
9848 case E_V1TImode:
9849 if (CONSTANT_P (operands[1])
9850 && !easy_vector_constant (operands[1], mode))
9851 operands[1] = force_const_mem (mode, operands[1]);
9852 break;
9853
9854 case E_SImode:
9855 case E_DImode:
9856 /* Use default pattern for address of ELF small data */
9857 if (TARGET_ELF
9858 && mode == Pmode
9859 && DEFAULT_ABI == ABI_V4
9860 && (SYMBOL_REF_P (operands[1])
9861 || GET_CODE (operands[1]) == CONST)
9862 && small_data_operand (operands[1], mode))
9863 {
9864 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9865 return;
9866 }
9867
9868 /* Use the default pattern for loading up PC-relative addresses. */
9869 if (TARGET_PCREL && mode == Pmode
9870 && pcrel_local_or_external_address (operands[1], Pmode))
9871 {
9872 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9873 return;
9874 }
9875
9876 if (DEFAULT_ABI == ABI_V4
9877 && mode == Pmode && mode == SImode
9878 && flag_pic == 1 && got_operand (operands[1], mode))
9879 {
9880 emit_insn (gen_movsi_got (operands[0], operands[1]));
9881 return;
9882 }
9883
9884 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9885 && TARGET_NO_TOC_OR_PCREL
9886 && ! flag_pic
9887 && mode == Pmode
9888 && CONSTANT_P (operands[1])
9889 && GET_CODE (operands[1]) != HIGH
9890 && !CONST_INT_P (operands[1]))
9891 {
9892 rtx target = (!can_create_pseudo_p ()
9893 ? operands[0]
9894 : gen_reg_rtx (mode));
9895
9896 /* If this is a function address on -mcall-aixdesc,
9897 convert it to the address of the descriptor. */
9898 if (DEFAULT_ABI == ABI_AIX
9899 && SYMBOL_REF_P (operands[1])
9900 && XSTR (operands[1], 0)[0] == '.')
9901 {
9902 const char *name = XSTR (operands[1], 0);
9903 rtx new_ref;
9904 while (*name == '.')
9905 name++;
9906 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9907 CONSTANT_POOL_ADDRESS_P (new_ref)
9908 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9909 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9910 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9911 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9912 operands[1] = new_ref;
9913 }
9914
9915 if (DEFAULT_ABI == ABI_DARWIN)
9916 {
9917 #if TARGET_MACHO
9918 /* This is not PIC code, but could require the subset of
9919 indirections used by mdynamic-no-pic. */
9920 if (MACHO_DYNAMIC_NO_PIC_P)
9921 {
9922 /* Take care of any required data indirection. */
9923 operands[1] = rs6000_machopic_legitimize_pic_address (
9924 operands[1], mode, operands[0]);
9925 if (operands[0] != operands[1])
9926 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9927 return;
9928 }
9929 #endif
9930 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9931 emit_insn (gen_macho_low (Pmode, operands[0],
9932 target, operands[1]));
9933 return;
9934 }
9935
9936 emit_insn (gen_elf_high (target, operands[1]));
9937 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9938 return;
9939 }
9940
9941 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9942 and we have put it in the TOC, we just need to make a TOC-relative
9943 reference to it. */
9944 if (TARGET_TOC
9945 && SYMBOL_REF_P (operands[1])
9946 && use_toc_relative_ref (operands[1], mode))
9947 operands[1] = create_TOC_reference (operands[1], operands[0]);
9948 else if (mode == Pmode
9949 && CONSTANT_P (operands[1])
9950 && GET_CODE (operands[1]) != HIGH
9951 && ((REG_P (operands[0])
9952 && FP_REGNO_P (REGNO (operands[0])))
9953 || !CONST_INT_P (operands[1])
9954 || (num_insns_constant (operands[1], mode)
9955 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9956 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9957 && (TARGET_CMODEL == CMODEL_SMALL
9958 || can_create_pseudo_p ()
9959 || (REG_P (operands[0])
9960 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9961 {
9962
9963 #if TARGET_MACHO
9964 /* Darwin uses a special PIC legitimizer. */
9965 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9966 {
9967 operands[1] =
9968 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9969 operands[0]);
9970 if (operands[0] != operands[1])
9971 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9972 return;
9973 }
9974 #endif
9975
9976 /* If we are to limit the number of things we put in the TOC and
9977 this is a symbol plus a constant we can add in one insn,
9978 just put the symbol in the TOC and add the constant. */
9979 if (GET_CODE (operands[1]) == CONST
9980 && TARGET_NO_SUM_IN_TOC
9981 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9982 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9983 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9984 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9985 && ! side_effects_p (operands[0]))
9986 {
9987 rtx sym =
9988 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9989 rtx other = XEXP (XEXP (operands[1], 0), 1);
9990
9991 sym = force_reg (mode, sym);
9992 emit_insn (gen_add3_insn (operands[0], sym, other));
9993 return;
9994 }
9995
9996 operands[1] = force_const_mem (mode, operands[1]);
9997
9998 if (TARGET_TOC
9999 && SYMBOL_REF_P (XEXP (operands[1], 0))
10000 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10001 {
10002 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10003 operands[0]);
10004 operands[1] = gen_const_mem (mode, tocref);
10005 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10006 }
10007 }
10008 break;
10009
10010 case E_TImode:
10011 if (!VECTOR_MEM_VSX_P (TImode))
10012 rs6000_eliminate_indexed_memrefs (operands);
10013 break;
10014
10015 case E_PTImode:
10016 rs6000_eliminate_indexed_memrefs (operands);
10017 break;
10018
10019 default:
10020 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10021 }
10022
10023 /* Above, we may have called force_const_mem which may have returned
10024 an invalid address. If we can, fix this up; otherwise, reload will
10025 have to deal with it. */
10026 if (MEM_P (operands[1]))
10027 operands[1] = validize_mem (operands[1]);
10028
10029 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10030 }
10031 \f
10032
10033 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10034 static void
10035 init_float128_ibm (machine_mode mode)
10036 {
10037 if (!TARGET_XL_COMPAT)
10038 {
10039 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10040 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10041 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10042 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10043
10044 if (!TARGET_HARD_FLOAT)
10045 {
10046 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10047 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10048 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10049 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10050 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10051 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10052 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10053 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10054
10055 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10056 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10057 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10058 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10059 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10060 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10061 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10062 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10063 }
10064 }
10065 else
10066 {
10067 set_optab_libfunc (add_optab, mode, "_xlqadd");
10068 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10069 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10070 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10071 }
10072
10073 /* Add various conversions for IFmode to use the traditional TFmode
10074 names. */
10075 if (mode == IFmode)
10076 {
10077 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10078 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10079 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10080 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10081 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10082 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10083
10084 if (TARGET_POWERPC64)
10085 {
10086 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10087 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10088 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10089 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10090 }
10091 }
10092 }
10093
10094 /* Create a decl for either complex long double multiply or complex long double
10095 divide when long double is IEEE 128-bit floating point. We can't use
10096 __multc3 and __divtc3 because the original long double using IBM extended
10097 double used those names. The complex multiply/divide functions are encoded
10098 as builtin functions with a complex result and 4 scalar inputs. */
10099
10100 static void
10101 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10102 {
10103 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10104 name, NULL_TREE);
10105
10106 set_builtin_decl (fncode, fndecl, true);
10107
10108 if (TARGET_DEBUG_BUILTIN)
10109 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10110
10111 return;
10112 }
10113
10114 /* Set up IEEE 128-bit floating point routines. Use different names if the
10115 arguments can be passed in a vector register. The historical PowerPC
10116 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10117 continue to use that if we aren't using vector registers to pass IEEE
10118 128-bit floating point. */
10119
10120 static void
10121 init_float128_ieee (machine_mode mode)
10122 {
10123 if (FLOAT128_VECTOR_P (mode))
10124 {
10125 static bool complex_muldiv_init_p = false;
10126
10127 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10128 we have clone or target attributes, this will be called a second
10129 time. We want to create the built-in function only once. */
10130 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10131 {
10132 complex_muldiv_init_p = true;
10133 built_in_function fncode_mul =
10134 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10135 - MIN_MODE_COMPLEX_FLOAT);
10136 built_in_function fncode_div =
10137 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10138 - MIN_MODE_COMPLEX_FLOAT);
10139
10140 tree fntype = build_function_type_list (complex_long_double_type_node,
10141 long_double_type_node,
10142 long_double_type_node,
10143 long_double_type_node,
10144 long_double_type_node,
10145 NULL_TREE);
10146
10147 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10148 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10149 }
10150
10151 set_optab_libfunc (add_optab, mode, "__addkf3");
10152 set_optab_libfunc (sub_optab, mode, "__subkf3");
10153 set_optab_libfunc (neg_optab, mode, "__negkf2");
10154 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10155 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10156 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10157 set_optab_libfunc (abs_optab, mode, "__abskf2");
10158 set_optab_libfunc (powi_optab, mode, "__powikf2");
10159
10160 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10161 set_optab_libfunc (ne_optab, mode, "__nekf2");
10162 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10163 set_optab_libfunc (ge_optab, mode, "__gekf2");
10164 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10165 set_optab_libfunc (le_optab, mode, "__lekf2");
10166 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10167
10168 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10169 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10170 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10171 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10172
10173 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10174 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10175 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10176
10177 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10178 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10179 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10180
10181 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10182 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10183 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10184 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10185 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10186 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10187
10188 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10189 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10190 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10191 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10192
10193 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10194 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10195 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10196 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10197
10198 if (TARGET_POWERPC64)
10199 {
10200 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10201 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10202 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10203 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10204 }
10205 }
10206
10207 else
10208 {
10209 set_optab_libfunc (add_optab, mode, "_q_add");
10210 set_optab_libfunc (sub_optab, mode, "_q_sub");
10211 set_optab_libfunc (neg_optab, mode, "_q_neg");
10212 set_optab_libfunc (smul_optab, mode, "_q_mul");
10213 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10214 if (TARGET_PPC_GPOPT)
10215 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10216
10217 set_optab_libfunc (eq_optab, mode, "_q_feq");
10218 set_optab_libfunc (ne_optab, mode, "_q_fne");
10219 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10220 set_optab_libfunc (ge_optab, mode, "_q_fge");
10221 set_optab_libfunc (lt_optab, mode, "_q_flt");
10222 set_optab_libfunc (le_optab, mode, "_q_fle");
10223
10224 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10225 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10226 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10227 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10228 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10229 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10230 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10231 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10232 }
10233 }
10234
10235 static void
10236 rs6000_init_libfuncs (void)
10237 {
10238 /* __float128 support. */
10239 if (TARGET_FLOAT128_TYPE)
10240 {
10241 init_float128_ibm (IFmode);
10242 init_float128_ieee (KFmode);
10243 }
10244
10245 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10246 if (TARGET_LONG_DOUBLE_128)
10247 {
10248 if (!TARGET_IEEEQUAD)
10249 init_float128_ibm (TFmode);
10250
10251 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10252 else
10253 init_float128_ieee (TFmode);
10254 }
10255 }
10256
10257 /* Emit a potentially record-form instruction, setting DST from SRC.
10258 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10259 signed comparison of DST with zero. If DOT is 1, the generated RTL
10260 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10261 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10262 a separate COMPARE. */
10263
10264 void
10265 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10266 {
10267 if (dot == 0)
10268 {
10269 emit_move_insn (dst, src);
10270 return;
10271 }
10272
10273 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10274 {
10275 emit_move_insn (dst, src);
10276 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10277 return;
10278 }
10279
10280 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10281 if (dot == 1)
10282 {
10283 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10284 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10285 }
10286 else
10287 {
10288 rtx set = gen_rtx_SET (dst, src);
10289 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10290 }
10291 }
10292
10293 \f
10294 /* A validation routine: say whether CODE, a condition code, and MODE
10295 match. The other alternatives either don't make sense or should
10296 never be generated. */
10297
10298 void
10299 validate_condition_mode (enum rtx_code code, machine_mode mode)
10300 {
10301 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10302 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10303 && GET_MODE_CLASS (mode) == MODE_CC);
10304
10305 /* These don't make sense. */
10306 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10307 || mode != CCUNSmode);
10308
10309 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10310 || mode == CCUNSmode);
10311
10312 gcc_assert (mode == CCFPmode
10313 || (code != ORDERED && code != UNORDERED
10314 && code != UNEQ && code != LTGT
10315 && code != UNGT && code != UNLT
10316 && code != UNGE && code != UNLE));
10317
10318 /* These are invalid; the information is not there. */
10319 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10320 }
10321
10322 \f
10323 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10324 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10325 not zero, store there the bit offset (counted from the right) where
10326 the single stretch of 1 bits begins; and similarly for B, the bit
10327 offset where it ends. */
10328
10329 bool
10330 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10331 {
10332 unsigned HOST_WIDE_INT val = INTVAL (mask);
10333 unsigned HOST_WIDE_INT bit;
10334 int nb, ne;
10335 int n = GET_MODE_PRECISION (mode);
10336
10337 if (mode != DImode && mode != SImode)
10338 return false;
10339
10340 if (INTVAL (mask) >= 0)
10341 {
10342 bit = val & -val;
10343 ne = exact_log2 (bit);
10344 nb = exact_log2 (val + bit);
10345 }
10346 else if (val + 1 == 0)
10347 {
10348 nb = n;
10349 ne = 0;
10350 }
10351 else if (val & 1)
10352 {
10353 val = ~val;
10354 bit = val & -val;
10355 nb = exact_log2 (bit);
10356 ne = exact_log2 (val + bit);
10357 }
10358 else
10359 {
10360 bit = val & -val;
10361 ne = exact_log2 (bit);
10362 if (val + bit == 0)
10363 nb = n;
10364 else
10365 nb = 0;
10366 }
10367
10368 nb--;
10369
10370 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10371 return false;
10372
10373 if (b)
10374 *b = nb;
10375 if (e)
10376 *e = ne;
10377
10378 return true;
10379 }
10380
10381 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10382 or rldicr instruction, to implement an AND with it in mode MODE. */
10383
10384 bool
10385 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10386 {
10387 int nb, ne;
10388
10389 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10390 return false;
10391
10392 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10393 does not wrap. */
10394 if (mode == DImode)
10395 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10396
10397 /* For SImode, rlwinm can do everything. */
10398 if (mode == SImode)
10399 return (nb < 32 && ne < 32);
10400
10401 return false;
10402 }
10403
10404 /* Return the instruction template for an AND with mask in mode MODE, with
10405 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10406
10407 const char *
10408 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10409 {
10410 int nb, ne;
10411
10412 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10413 gcc_unreachable ();
10414
10415 if (mode == DImode && ne == 0)
10416 {
10417 operands[3] = GEN_INT (63 - nb);
10418 if (dot)
10419 return "rldicl. %0,%1,0,%3";
10420 return "rldicl %0,%1,0,%3";
10421 }
10422
10423 if (mode == DImode && nb == 63)
10424 {
10425 operands[3] = GEN_INT (63 - ne);
10426 if (dot)
10427 return "rldicr. %0,%1,0,%3";
10428 return "rldicr %0,%1,0,%3";
10429 }
10430
10431 if (nb < 32 && ne < 32)
10432 {
10433 operands[3] = GEN_INT (31 - nb);
10434 operands[4] = GEN_INT (31 - ne);
10435 if (dot)
10436 return "rlwinm. %0,%1,0,%3,%4";
10437 return "rlwinm %0,%1,0,%3,%4";
10438 }
10439
10440 gcc_unreachable ();
10441 }
10442
10443 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10444 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10445 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10446
10447 bool
10448 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10449 {
10450 int nb, ne;
10451
10452 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10453 return false;
10454
10455 int n = GET_MODE_PRECISION (mode);
10456 int sh = -1;
10457
10458 if (CONST_INT_P (XEXP (shift, 1)))
10459 {
10460 sh = INTVAL (XEXP (shift, 1));
10461 if (sh < 0 || sh >= n)
10462 return false;
10463 }
10464
10465 rtx_code code = GET_CODE (shift);
10466
10467 /* Convert any shift by 0 to a rotate, to simplify below code. */
10468 if (sh == 0)
10469 code = ROTATE;
10470
10471 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10472 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10473 code = ASHIFT;
10474 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10475 {
10476 code = LSHIFTRT;
10477 sh = n - sh;
10478 }
10479
10480 /* DImode rotates need rld*. */
10481 if (mode == DImode && code == ROTATE)
10482 return (nb == 63 || ne == 0 || ne == sh);
10483
10484 /* SImode rotates need rlw*. */
10485 if (mode == SImode && code == ROTATE)
10486 return (nb < 32 && ne < 32 && sh < 32);
10487
10488 /* Wrap-around masks are only okay for rotates. */
10489 if (ne > nb)
10490 return false;
10491
10492 /* Variable shifts are only okay for rotates. */
10493 if (sh < 0)
10494 return false;
10495
10496 /* Don't allow ASHIFT if the mask is wrong for that. */
10497 if (code == ASHIFT && ne < sh)
10498 return false;
10499
10500 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10501 if the mask is wrong for that. */
10502 if (nb < 32 && ne < 32 && sh < 32
10503 && !(code == LSHIFTRT && nb >= 32 - sh))
10504 return true;
10505
10506 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10507 if the mask is wrong for that. */
10508 if (code == LSHIFTRT)
10509 sh = 64 - sh;
10510 if (nb == 63 || ne == 0 || ne == sh)
10511 return !(code == LSHIFTRT && nb >= sh);
10512
10513 return false;
10514 }
10515
10516 /* Return the instruction template for a shift with mask in mode MODE, with
10517 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10518
10519 const char *
10520 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10521 {
10522 int nb, ne;
10523
10524 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10525 gcc_unreachable ();
10526
10527 if (mode == DImode && ne == 0)
10528 {
10529 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10530 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10531 operands[3] = GEN_INT (63 - nb);
10532 if (dot)
10533 return "rld%I2cl. %0,%1,%2,%3";
10534 return "rld%I2cl %0,%1,%2,%3";
10535 }
10536
10537 if (mode == DImode && nb == 63)
10538 {
10539 operands[3] = GEN_INT (63 - ne);
10540 if (dot)
10541 return "rld%I2cr. %0,%1,%2,%3";
10542 return "rld%I2cr %0,%1,%2,%3";
10543 }
10544
10545 if (mode == DImode
10546 && GET_CODE (operands[4]) != LSHIFTRT
10547 && CONST_INT_P (operands[2])
10548 && ne == INTVAL (operands[2]))
10549 {
10550 operands[3] = GEN_INT (63 - nb);
10551 if (dot)
10552 return "rld%I2c. %0,%1,%2,%3";
10553 return "rld%I2c %0,%1,%2,%3";
10554 }
10555
10556 if (nb < 32 && ne < 32)
10557 {
10558 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10559 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10560 operands[3] = GEN_INT (31 - nb);
10561 operands[4] = GEN_INT (31 - ne);
10562 /* This insn can also be a 64-bit rotate with mask that really makes
10563 it just a shift right (with mask); the %h below are to adjust for
10564 that situation (shift count is >= 32 in that case). */
10565 if (dot)
10566 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10567 return "rlw%I2nm %0,%1,%h2,%3,%4";
10568 }
10569
10570 gcc_unreachable ();
10571 }
10572
10573 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10574 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10575 ASHIFT, or LSHIFTRT) in mode MODE. */
10576
10577 bool
10578 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10579 {
10580 int nb, ne;
10581
10582 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10583 return false;
10584
10585 int n = GET_MODE_PRECISION (mode);
10586
10587 int sh = INTVAL (XEXP (shift, 1));
10588 if (sh < 0 || sh >= n)
10589 return false;
10590
10591 rtx_code code = GET_CODE (shift);
10592
10593 /* Convert any shift by 0 to a rotate, to simplify below code. */
10594 if (sh == 0)
10595 code = ROTATE;
10596
10597 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10598 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10599 code = ASHIFT;
10600 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10601 {
10602 code = LSHIFTRT;
10603 sh = n - sh;
10604 }
10605
10606 /* DImode rotates need rldimi. */
10607 if (mode == DImode && code == ROTATE)
10608 return (ne == sh);
10609
10610 /* SImode rotates need rlwimi. */
10611 if (mode == SImode && code == ROTATE)
10612 return (nb < 32 && ne < 32 && sh < 32);
10613
10614 /* Wrap-around masks are only okay for rotates. */
10615 if (ne > nb)
10616 return false;
10617
10618 /* Don't allow ASHIFT if the mask is wrong for that. */
10619 if (code == ASHIFT && ne < sh)
10620 return false;
10621
10622 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10623 if the mask is wrong for that. */
10624 if (nb < 32 && ne < 32 && sh < 32
10625 && !(code == LSHIFTRT && nb >= 32 - sh))
10626 return true;
10627
10628 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10629 if the mask is wrong for that. */
10630 if (code == LSHIFTRT)
10631 sh = 64 - sh;
10632 if (ne == sh)
10633 return !(code == LSHIFTRT && nb >= sh);
10634
10635 return false;
10636 }
10637
10638 /* Return the instruction template for an insert with mask in mode MODE, with
10639 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10640
10641 const char *
10642 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10643 {
10644 int nb, ne;
10645
10646 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10647 gcc_unreachable ();
10648
10649 /* Prefer rldimi because rlwimi is cracked. */
10650 if (TARGET_POWERPC64
10651 && (!dot || mode == DImode)
10652 && GET_CODE (operands[4]) != LSHIFTRT
10653 && ne == INTVAL (operands[2]))
10654 {
10655 operands[3] = GEN_INT (63 - nb);
10656 if (dot)
10657 return "rldimi. %0,%1,%2,%3";
10658 return "rldimi %0,%1,%2,%3";
10659 }
10660
10661 if (nb < 32 && ne < 32)
10662 {
10663 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10664 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10665 operands[3] = GEN_INT (31 - nb);
10666 operands[4] = GEN_INT (31 - ne);
10667 if (dot)
10668 return "rlwimi. %0,%1,%2,%3,%4";
10669 return "rlwimi %0,%1,%2,%3,%4";
10670 }
10671
10672 gcc_unreachable ();
10673 }
10674
10675 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10676 using two machine instructions. */
10677
10678 bool
10679 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10680 {
10681 /* There are two kinds of AND we can handle with two insns:
10682 1) those we can do with two rl* insn;
10683 2) ori[s];xori[s].
10684
10685 We do not handle that last case yet. */
10686
10687 /* If there is just one stretch of ones, we can do it. */
10688 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10689 return true;
10690
10691 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10692 one insn, we can do the whole thing with two. */
10693 unsigned HOST_WIDE_INT val = INTVAL (c);
10694 unsigned HOST_WIDE_INT bit1 = val & -val;
10695 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10696 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10697 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10698 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10699 }
10700
10701 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10702 If EXPAND is true, split rotate-and-mask instructions we generate to
10703 their constituent parts as well (this is used during expand); if DOT
10704 is 1, make the last insn a record-form instruction clobbering the
10705 destination GPR and setting the CC reg (from operands[3]); if 2, set
10706 that GPR as well as the CC reg. */
10707
10708 void
10709 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10710 {
10711 gcc_assert (!(expand && dot));
10712
10713 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10714
10715 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10716 shift right. This generates better code than doing the masks without
10717 shifts, or shifting first right and then left. */
10718 int nb, ne;
10719 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10720 {
10721 gcc_assert (mode == DImode);
10722
10723 int shift = 63 - nb;
10724 if (expand)
10725 {
10726 rtx tmp1 = gen_reg_rtx (DImode);
10727 rtx tmp2 = gen_reg_rtx (DImode);
10728 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10729 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10730 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10731 }
10732 else
10733 {
10734 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10735 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10736 emit_move_insn (operands[0], tmp);
10737 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10738 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10739 }
10740 return;
10741 }
10742
10743 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10744 that does the rest. */
10745 unsigned HOST_WIDE_INT bit1 = val & -val;
10746 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10747 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10748 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10749
10750 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10751 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10752
10753 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10754
10755 /* Two "no-rotate"-and-mask instructions, for SImode. */
10756 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10757 {
10758 gcc_assert (mode == SImode);
10759
10760 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10761 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10762 emit_move_insn (reg, tmp);
10763 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10764 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10765 return;
10766 }
10767
10768 gcc_assert (mode == DImode);
10769
10770 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10771 insns; we have to do the first in SImode, because it wraps. */
10772 if (mask2 <= 0xffffffff
10773 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10774 {
10775 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10776 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10777 GEN_INT (mask1));
10778 rtx reg_low = gen_lowpart (SImode, reg);
10779 emit_move_insn (reg_low, tmp);
10780 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10781 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10782 return;
10783 }
10784
10785 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10786 at the top end), rotate back and clear the other hole. */
10787 int right = exact_log2 (bit3);
10788 int left = 64 - right;
10789
10790 /* Rotate the mask too. */
10791 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10792
10793 if (expand)
10794 {
10795 rtx tmp1 = gen_reg_rtx (DImode);
10796 rtx tmp2 = gen_reg_rtx (DImode);
10797 rtx tmp3 = gen_reg_rtx (DImode);
10798 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10799 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10800 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10801 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10802 }
10803 else
10804 {
10805 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10806 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10807 emit_move_insn (operands[0], tmp);
10808 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10809 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10810 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10811 }
10812 }
10813 \f
10814 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10815 for lfq and stfq insns iff the registers are hard registers. */
10816
10817 int
10818 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10819 {
10820 /* We might have been passed a SUBREG. */
10821 if (!REG_P (reg1) || !REG_P (reg2))
10822 return 0;
10823
10824 /* We might have been passed non floating point registers. */
10825 if (!FP_REGNO_P (REGNO (reg1))
10826 || !FP_REGNO_P (REGNO (reg2)))
10827 return 0;
10828
10829 return (REGNO (reg1) == REGNO (reg2) - 1);
10830 }
10831
10832 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10833 addr1 and addr2 must be in consecutive memory locations
10834 (addr2 == addr1 + 8). */
10835
10836 int
10837 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10838 {
10839 rtx addr1, addr2;
10840 unsigned int reg1, reg2;
10841 int offset1, offset2;
10842
10843 /* The mems cannot be volatile. */
10844 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10845 return 0;
10846
10847 addr1 = XEXP (mem1, 0);
10848 addr2 = XEXP (mem2, 0);
10849
10850 /* Extract an offset (if used) from the first addr. */
10851 if (GET_CODE (addr1) == PLUS)
10852 {
10853 /* If not a REG, return zero. */
10854 if (!REG_P (XEXP (addr1, 0)))
10855 return 0;
10856 else
10857 {
10858 reg1 = REGNO (XEXP (addr1, 0));
10859 /* The offset must be constant! */
10860 if (!CONST_INT_P (XEXP (addr1, 1)))
10861 return 0;
10862 offset1 = INTVAL (XEXP (addr1, 1));
10863 }
10864 }
10865 else if (!REG_P (addr1))
10866 return 0;
10867 else
10868 {
10869 reg1 = REGNO (addr1);
10870 /* This was a simple (mem (reg)) expression. Offset is 0. */
10871 offset1 = 0;
10872 }
10873
10874 /* And now for the second addr. */
10875 if (GET_CODE (addr2) == PLUS)
10876 {
10877 /* If not a REG, return zero. */
10878 if (!REG_P (XEXP (addr2, 0)))
10879 return 0;
10880 else
10881 {
10882 reg2 = REGNO (XEXP (addr2, 0));
10883 /* The offset must be constant. */
10884 if (!CONST_INT_P (XEXP (addr2, 1)))
10885 return 0;
10886 offset2 = INTVAL (XEXP (addr2, 1));
10887 }
10888 }
10889 else if (!REG_P (addr2))
10890 return 0;
10891 else
10892 {
10893 reg2 = REGNO (addr2);
10894 /* This was a simple (mem (reg)) expression. Offset is 0. */
10895 offset2 = 0;
10896 }
10897
10898 /* Both of these must have the same base register. */
10899 if (reg1 != reg2)
10900 return 0;
10901
10902 /* The offset for the second addr must be 8 more than the first addr. */
10903 if (offset2 != offset1 + 8)
10904 return 0;
10905
10906 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10907 instructions. */
10908 return 1;
10909 }
10910 \f
10911 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10912 need to use DDmode, in all other cases we can use the same mode. */
10913 static machine_mode
10914 rs6000_secondary_memory_needed_mode (machine_mode mode)
10915 {
10916 if (lra_in_progress && mode == SDmode)
10917 return DDmode;
10918 return mode;
10919 }
10920
10921 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10922 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10923 only work on the traditional altivec registers, note if an altivec register
10924 was chosen. */
10925
10926 static enum rs6000_reg_type
10927 register_to_reg_type (rtx reg, bool *is_altivec)
10928 {
10929 HOST_WIDE_INT regno;
10930 enum reg_class rclass;
10931
10932 if (SUBREG_P (reg))
10933 reg = SUBREG_REG (reg);
10934
10935 if (!REG_P (reg))
10936 return NO_REG_TYPE;
10937
10938 regno = REGNO (reg);
10939 if (!HARD_REGISTER_NUM_P (regno))
10940 {
10941 if (!lra_in_progress && !reload_completed)
10942 return PSEUDO_REG_TYPE;
10943
10944 regno = true_regnum (reg);
10945 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10946 return PSEUDO_REG_TYPE;
10947 }
10948
10949 gcc_assert (regno >= 0);
10950
10951 if (is_altivec && ALTIVEC_REGNO_P (regno))
10952 *is_altivec = true;
10953
10954 rclass = rs6000_regno_regclass[regno];
10955 return reg_class_to_reg_type[(int)rclass];
10956 }
10957
10958 /* Helper function to return the cost of adding a TOC entry address. */
10959
10960 static inline int
10961 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10962 {
10963 int ret;
10964
10965 if (TARGET_CMODEL != CMODEL_SMALL)
10966 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10967
10968 else
10969 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10970
10971 return ret;
10972 }
10973
10974 /* Helper function for rs6000_secondary_reload to determine whether the memory
10975 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10976 needs reloading. Return negative if the memory is not handled by the memory
10977 helper functions and to try a different reload method, 0 if no additional
10978 instructions are need, and positive to give the extra cost for the
10979 memory. */
10980
10981 static int
10982 rs6000_secondary_reload_memory (rtx addr,
10983 enum reg_class rclass,
10984 machine_mode mode)
10985 {
10986 int extra_cost = 0;
10987 rtx reg, and_arg, plus_arg0, plus_arg1;
10988 addr_mask_type addr_mask;
10989 const char *type = NULL;
10990 const char *fail_msg = NULL;
10991
10992 if (GPR_REG_CLASS_P (rclass))
10993 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10994
10995 else if (rclass == FLOAT_REGS)
10996 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10997
10998 else if (rclass == ALTIVEC_REGS)
10999 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11000
11001 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11002 else if (rclass == VSX_REGS)
11003 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11004 & ~RELOAD_REG_AND_M16);
11005
11006 /* If the register allocator hasn't made up its mind yet on the register
11007 class to use, settle on defaults to use. */
11008 else if (rclass == NO_REGS)
11009 {
11010 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11011 & ~RELOAD_REG_AND_M16);
11012
11013 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11014 addr_mask &= ~(RELOAD_REG_INDEXED
11015 | RELOAD_REG_PRE_INCDEC
11016 | RELOAD_REG_PRE_MODIFY);
11017 }
11018
11019 else
11020 addr_mask = 0;
11021
11022 /* If the register isn't valid in this register class, just return now. */
11023 if ((addr_mask & RELOAD_REG_VALID) == 0)
11024 {
11025 if (TARGET_DEBUG_ADDR)
11026 {
11027 fprintf (stderr,
11028 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11029 "not valid in class\n",
11030 GET_MODE_NAME (mode), reg_class_names[rclass]);
11031 debug_rtx (addr);
11032 }
11033
11034 return -1;
11035 }
11036
11037 switch (GET_CODE (addr))
11038 {
11039 /* Does the register class supports auto update forms for this mode? We
11040 don't need a scratch register, since the powerpc only supports
11041 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11042 case PRE_INC:
11043 case PRE_DEC:
11044 reg = XEXP (addr, 0);
11045 if (!base_reg_operand (addr, GET_MODE (reg)))
11046 {
11047 fail_msg = "no base register #1";
11048 extra_cost = -1;
11049 }
11050
11051 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11052 {
11053 extra_cost = 1;
11054 type = "update";
11055 }
11056 break;
11057
11058 case PRE_MODIFY:
11059 reg = XEXP (addr, 0);
11060 plus_arg1 = XEXP (addr, 1);
11061 if (!base_reg_operand (reg, GET_MODE (reg))
11062 || GET_CODE (plus_arg1) != PLUS
11063 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11064 {
11065 fail_msg = "bad PRE_MODIFY";
11066 extra_cost = -1;
11067 }
11068
11069 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11070 {
11071 extra_cost = 1;
11072 type = "update";
11073 }
11074 break;
11075
11076 /* Do we need to simulate AND -16 to clear the bottom address bits used
11077 in VMX load/stores? Only allow the AND for vector sizes. */
11078 case AND:
11079 and_arg = XEXP (addr, 0);
11080 if (GET_MODE_SIZE (mode) != 16
11081 || !CONST_INT_P (XEXP (addr, 1))
11082 || INTVAL (XEXP (addr, 1)) != -16)
11083 {
11084 fail_msg = "bad Altivec AND #1";
11085 extra_cost = -1;
11086 }
11087
11088 if (rclass != ALTIVEC_REGS)
11089 {
11090 if (legitimate_indirect_address_p (and_arg, false))
11091 extra_cost = 1;
11092
11093 else if (legitimate_indexed_address_p (and_arg, false))
11094 extra_cost = 2;
11095
11096 else
11097 {
11098 fail_msg = "bad Altivec AND #2";
11099 extra_cost = -1;
11100 }
11101
11102 type = "and";
11103 }
11104 break;
11105
11106 /* If this is an indirect address, make sure it is a base register. */
11107 case REG:
11108 case SUBREG:
11109 if (!legitimate_indirect_address_p (addr, false))
11110 {
11111 extra_cost = 1;
11112 type = "move";
11113 }
11114 break;
11115
11116 /* If this is an indexed address, make sure the register class can handle
11117 indexed addresses for this mode. */
11118 case PLUS:
11119 plus_arg0 = XEXP (addr, 0);
11120 plus_arg1 = XEXP (addr, 1);
11121
11122 /* (plus (plus (reg) (constant)) (constant)) is generated during
11123 push_reload processing, so handle it now. */
11124 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11125 {
11126 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11127 {
11128 extra_cost = 1;
11129 type = "offset";
11130 }
11131 }
11132
11133 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11134 push_reload processing, so handle it now. */
11135 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11136 {
11137 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11138 {
11139 extra_cost = 1;
11140 type = "indexed #2";
11141 }
11142 }
11143
11144 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11145 {
11146 fail_msg = "no base register #2";
11147 extra_cost = -1;
11148 }
11149
11150 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11151 {
11152 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11153 || !legitimate_indexed_address_p (addr, false))
11154 {
11155 extra_cost = 1;
11156 type = "indexed";
11157 }
11158 }
11159
11160 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11161 && CONST_INT_P (plus_arg1))
11162 {
11163 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11164 {
11165 extra_cost = 1;
11166 type = "vector d-form offset";
11167 }
11168 }
11169
11170 /* Make sure the register class can handle offset addresses. */
11171 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11172 {
11173 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11174 {
11175 extra_cost = 1;
11176 type = "offset #2";
11177 }
11178 }
11179
11180 else
11181 {
11182 fail_msg = "bad PLUS";
11183 extra_cost = -1;
11184 }
11185
11186 break;
11187
11188 case LO_SUM:
11189 /* Quad offsets are restricted and can't handle normal addresses. */
11190 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11191 {
11192 extra_cost = -1;
11193 type = "vector d-form lo_sum";
11194 }
11195
11196 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11197 {
11198 fail_msg = "bad LO_SUM";
11199 extra_cost = -1;
11200 }
11201
11202 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11203 {
11204 extra_cost = 1;
11205 type = "lo_sum";
11206 }
11207 break;
11208
11209 /* Static addresses need to create a TOC entry. */
11210 case CONST:
11211 case SYMBOL_REF:
11212 case LABEL_REF:
11213 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11214 {
11215 extra_cost = -1;
11216 type = "vector d-form lo_sum #2";
11217 }
11218
11219 else
11220 {
11221 type = "address";
11222 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11223 }
11224 break;
11225
11226 /* TOC references look like offsetable memory. */
11227 case UNSPEC:
11228 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11229 {
11230 fail_msg = "bad UNSPEC";
11231 extra_cost = -1;
11232 }
11233
11234 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11235 {
11236 extra_cost = -1;
11237 type = "vector d-form lo_sum #3";
11238 }
11239
11240 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11241 {
11242 extra_cost = 1;
11243 type = "toc reference";
11244 }
11245 break;
11246
11247 default:
11248 {
11249 fail_msg = "bad address";
11250 extra_cost = -1;
11251 }
11252 }
11253
11254 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11255 {
11256 if (extra_cost < 0)
11257 fprintf (stderr,
11258 "rs6000_secondary_reload_memory error: mode = %s, "
11259 "class = %s, addr_mask = '%s', %s\n",
11260 GET_MODE_NAME (mode),
11261 reg_class_names[rclass],
11262 rs6000_debug_addr_mask (addr_mask, false),
11263 (fail_msg != NULL) ? fail_msg : "<bad address>");
11264
11265 else
11266 fprintf (stderr,
11267 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11268 "addr_mask = '%s', extra cost = %d, %s\n",
11269 GET_MODE_NAME (mode),
11270 reg_class_names[rclass],
11271 rs6000_debug_addr_mask (addr_mask, false),
11272 extra_cost,
11273 (type) ? type : "<none>");
11274
11275 debug_rtx (addr);
11276 }
11277
11278 return extra_cost;
11279 }
11280
11281 /* Helper function for rs6000_secondary_reload to return true if a move to a
11282 different register classe is really a simple move. */
11283
11284 static bool
11285 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11286 enum rs6000_reg_type from_type,
11287 machine_mode mode)
11288 {
11289 int size = GET_MODE_SIZE (mode);
11290
11291 /* Add support for various direct moves available. In this function, we only
11292 look at cases where we don't need any extra registers, and one or more
11293 simple move insns are issued. Originally small integers are not allowed
11294 in FPR/VSX registers. Single precision binary floating is not a simple
11295 move because we need to convert to the single precision memory layout.
11296 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11297 need special direct move handling, which we do not support yet. */
11298 if (TARGET_DIRECT_MOVE
11299 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11300 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11301 {
11302 if (TARGET_POWERPC64)
11303 {
11304 /* ISA 2.07: MTVSRD or MVFVSRD. */
11305 if (size == 8)
11306 return true;
11307
11308 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11309 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11310 return true;
11311 }
11312
11313 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11314 if (TARGET_P8_VECTOR)
11315 {
11316 if (mode == SImode)
11317 return true;
11318
11319 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11320 return true;
11321 }
11322
11323 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11324 if (mode == SDmode)
11325 return true;
11326 }
11327
11328 /* Move to/from SPR. */
11329 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11330 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11331 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11332 return true;
11333
11334 return false;
11335 }
11336
11337 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11338 special direct moves that involve allocating an extra register, return the
11339 insn code of the helper function if there is such a function or
11340 CODE_FOR_nothing if not. */
11341
11342 static bool
11343 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11344 enum rs6000_reg_type from_type,
11345 machine_mode mode,
11346 secondary_reload_info *sri,
11347 bool altivec_p)
11348 {
11349 bool ret = false;
11350 enum insn_code icode = CODE_FOR_nothing;
11351 int cost = 0;
11352 int size = GET_MODE_SIZE (mode);
11353
11354 if (TARGET_POWERPC64 && size == 16)
11355 {
11356 /* Handle moving 128-bit values from GPRs to VSX point registers on
11357 ISA 2.07 (power8, power9) when running in 64-bit mode using
11358 XXPERMDI to glue the two 64-bit values back together. */
11359 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11360 {
11361 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11362 icode = reg_addr[mode].reload_vsx_gpr;
11363 }
11364
11365 /* Handle moving 128-bit values from VSX point registers to GPRs on
11366 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11367 bottom 64-bit value. */
11368 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11369 {
11370 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11371 icode = reg_addr[mode].reload_gpr_vsx;
11372 }
11373 }
11374
11375 else if (TARGET_POWERPC64 && mode == SFmode)
11376 {
11377 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11378 {
11379 cost = 3; /* xscvdpspn, mfvsrd, and. */
11380 icode = reg_addr[mode].reload_gpr_vsx;
11381 }
11382
11383 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11384 {
11385 cost = 2; /* mtvsrz, xscvspdpn. */
11386 icode = reg_addr[mode].reload_vsx_gpr;
11387 }
11388 }
11389
11390 else if (!TARGET_POWERPC64 && size == 8)
11391 {
11392 /* Handle moving 64-bit values from GPRs to floating point registers on
11393 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11394 32-bit values back together. Altivec register classes must be handled
11395 specially since a different instruction is used, and the secondary
11396 reload support requires a single instruction class in the scratch
11397 register constraint. However, right now TFmode is not allowed in
11398 Altivec registers, so the pattern will never match. */
11399 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11400 {
11401 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11402 icode = reg_addr[mode].reload_fpr_gpr;
11403 }
11404 }
11405
11406 if (icode != CODE_FOR_nothing)
11407 {
11408 ret = true;
11409 if (sri)
11410 {
11411 sri->icode = icode;
11412 sri->extra_cost = cost;
11413 }
11414 }
11415
11416 return ret;
11417 }
11418
11419 /* Return whether a move between two register classes can be done either
11420 directly (simple move) or via a pattern that uses a single extra temporary
11421 (using ISA 2.07's direct move in this case. */
11422
11423 static bool
11424 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11425 enum rs6000_reg_type from_type,
11426 machine_mode mode,
11427 secondary_reload_info *sri,
11428 bool altivec_p)
11429 {
11430 /* Fall back to load/store reloads if either type is not a register. */
11431 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11432 return false;
11433
11434 /* If we haven't allocated registers yet, assume the move can be done for the
11435 standard register types. */
11436 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11437 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11438 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11439 return true;
11440
11441 /* Moves to the same set of registers is a simple move for non-specialized
11442 registers. */
11443 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11444 return true;
11445
11446 /* Check whether a simple move can be done directly. */
11447 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11448 {
11449 if (sri)
11450 {
11451 sri->icode = CODE_FOR_nothing;
11452 sri->extra_cost = 0;
11453 }
11454 return true;
11455 }
11456
11457 /* Now check if we can do it in a few steps. */
11458 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11459 altivec_p);
11460 }
11461
11462 /* Inform reload about cases where moving X with a mode MODE to a register in
11463 RCLASS requires an extra scratch or immediate register. Return the class
11464 needed for the immediate register.
11465
11466 For VSX and Altivec, we may need a register to convert sp+offset into
11467 reg+sp.
11468
11469 For misaligned 64-bit gpr loads and stores we need a register to
11470 convert an offset address to indirect. */
11471
11472 static reg_class_t
11473 rs6000_secondary_reload (bool in_p,
11474 rtx x,
11475 reg_class_t rclass_i,
11476 machine_mode mode,
11477 secondary_reload_info *sri)
11478 {
11479 enum reg_class rclass = (enum reg_class) rclass_i;
11480 reg_class_t ret = ALL_REGS;
11481 enum insn_code icode;
11482 bool default_p = false;
11483 bool done_p = false;
11484
11485 /* Allow subreg of memory before/during reload. */
11486 bool memory_p = (MEM_P (x)
11487 || (!reload_completed && SUBREG_P (x)
11488 && MEM_P (SUBREG_REG (x))));
11489
11490 sri->icode = CODE_FOR_nothing;
11491 sri->t_icode = CODE_FOR_nothing;
11492 sri->extra_cost = 0;
11493 icode = ((in_p)
11494 ? reg_addr[mode].reload_load
11495 : reg_addr[mode].reload_store);
11496
11497 if (REG_P (x) || register_operand (x, mode))
11498 {
11499 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11500 bool altivec_p = (rclass == ALTIVEC_REGS);
11501 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11502
11503 if (!in_p)
11504 std::swap (to_type, from_type);
11505
11506 /* Can we do a direct move of some sort? */
11507 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11508 altivec_p))
11509 {
11510 icode = (enum insn_code)sri->icode;
11511 default_p = false;
11512 done_p = true;
11513 ret = NO_REGS;
11514 }
11515 }
11516
11517 /* Make sure 0.0 is not reloaded or forced into memory. */
11518 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11519 {
11520 ret = NO_REGS;
11521 default_p = false;
11522 done_p = true;
11523 }
11524
11525 /* If this is a scalar floating point value and we want to load it into the
11526 traditional Altivec registers, do it via a move via a traditional floating
11527 point register, unless we have D-form addressing. Also make sure that
11528 non-zero constants use a FPR. */
11529 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11530 && !mode_supports_vmx_dform (mode)
11531 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11532 && (memory_p || CONST_DOUBLE_P (x)))
11533 {
11534 ret = FLOAT_REGS;
11535 default_p = false;
11536 done_p = true;
11537 }
11538
11539 /* Handle reload of load/stores if we have reload helper functions. */
11540 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11541 {
11542 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11543 mode);
11544
11545 if (extra_cost >= 0)
11546 {
11547 done_p = true;
11548 ret = NO_REGS;
11549 if (extra_cost > 0)
11550 {
11551 sri->extra_cost = extra_cost;
11552 sri->icode = icode;
11553 }
11554 }
11555 }
11556
11557 /* Handle unaligned loads and stores of integer registers. */
11558 if (!done_p && TARGET_POWERPC64
11559 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11560 && memory_p
11561 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11562 {
11563 rtx addr = XEXP (x, 0);
11564 rtx off = address_offset (addr);
11565
11566 if (off != NULL_RTX)
11567 {
11568 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11569 unsigned HOST_WIDE_INT offset = INTVAL (off);
11570
11571 /* We need a secondary reload when our legitimate_address_p
11572 says the address is good (as otherwise the entire address
11573 will be reloaded), and the offset is not a multiple of
11574 four or we have an address wrap. Address wrap will only
11575 occur for LO_SUMs since legitimate_offset_address_p
11576 rejects addresses for 16-byte mems that will wrap. */
11577 if (GET_CODE (addr) == LO_SUM
11578 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11579 && ((offset & 3) != 0
11580 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11581 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11582 && (offset & 3) != 0))
11583 {
11584 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11585 if (in_p)
11586 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11587 : CODE_FOR_reload_di_load);
11588 else
11589 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11590 : CODE_FOR_reload_di_store);
11591 sri->extra_cost = 2;
11592 ret = NO_REGS;
11593 done_p = true;
11594 }
11595 else
11596 default_p = true;
11597 }
11598 else
11599 default_p = true;
11600 }
11601
11602 if (!done_p && !TARGET_POWERPC64
11603 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11604 && memory_p
11605 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11606 {
11607 rtx addr = XEXP (x, 0);
11608 rtx off = address_offset (addr);
11609
11610 if (off != NULL_RTX)
11611 {
11612 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11613 unsigned HOST_WIDE_INT offset = INTVAL (off);
11614
11615 /* We need a secondary reload when our legitimate_address_p
11616 says the address is good (as otherwise the entire address
11617 will be reloaded), and we have a wrap.
11618
11619 legitimate_lo_sum_address_p allows LO_SUM addresses to
11620 have any offset so test for wrap in the low 16 bits.
11621
11622 legitimate_offset_address_p checks for the range
11623 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11624 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11625 [0x7ff4,0x7fff] respectively, so test for the
11626 intersection of these ranges, [0x7ffc,0x7fff] and
11627 [0x7ff4,0x7ff7] respectively.
11628
11629 Note that the address we see here may have been
11630 manipulated by legitimize_reload_address. */
11631 if (GET_CODE (addr) == LO_SUM
11632 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11633 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11634 {
11635 if (in_p)
11636 sri->icode = CODE_FOR_reload_si_load;
11637 else
11638 sri->icode = CODE_FOR_reload_si_store;
11639 sri->extra_cost = 2;
11640 ret = NO_REGS;
11641 done_p = true;
11642 }
11643 else
11644 default_p = true;
11645 }
11646 else
11647 default_p = true;
11648 }
11649
11650 if (!done_p)
11651 default_p = true;
11652
11653 if (default_p)
11654 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11655
11656 gcc_assert (ret != ALL_REGS);
11657
11658 if (TARGET_DEBUG_ADDR)
11659 {
11660 fprintf (stderr,
11661 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11662 "mode = %s",
11663 reg_class_names[ret],
11664 in_p ? "true" : "false",
11665 reg_class_names[rclass],
11666 GET_MODE_NAME (mode));
11667
11668 if (reload_completed)
11669 fputs (", after reload", stderr);
11670
11671 if (!done_p)
11672 fputs (", done_p not set", stderr);
11673
11674 if (default_p)
11675 fputs (", default secondary reload", stderr);
11676
11677 if (sri->icode != CODE_FOR_nothing)
11678 fprintf (stderr, ", reload func = %s, extra cost = %d",
11679 insn_data[sri->icode].name, sri->extra_cost);
11680
11681 else if (sri->extra_cost > 0)
11682 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11683
11684 fputs ("\n", stderr);
11685 debug_rtx (x);
11686 }
11687
11688 return ret;
11689 }
11690
11691 /* Better tracing for rs6000_secondary_reload_inner. */
11692
11693 static void
11694 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11695 bool store_p)
11696 {
11697 rtx set, clobber;
11698
11699 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11700
11701 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11702 store_p ? "store" : "load");
11703
11704 if (store_p)
11705 set = gen_rtx_SET (mem, reg);
11706 else
11707 set = gen_rtx_SET (reg, mem);
11708
11709 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11710 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11711 }
11712
11713 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11714 ATTRIBUTE_NORETURN;
11715
11716 static void
11717 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11718 bool store_p)
11719 {
11720 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11721 gcc_unreachable ();
11722 }
11723
11724 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11725 reload helper functions. These were identified in
11726 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11727 reload, it calls the insns:
11728 reload_<RELOAD:mode>_<P:mptrsize>_store
11729 reload_<RELOAD:mode>_<P:mptrsize>_load
11730
11731 which in turn calls this function, to do whatever is necessary to create
11732 valid addresses. */
11733
11734 void
11735 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11736 {
11737 int regno = true_regnum (reg);
11738 machine_mode mode = GET_MODE (reg);
11739 addr_mask_type addr_mask;
11740 rtx addr;
11741 rtx new_addr;
11742 rtx op_reg, op0, op1;
11743 rtx and_op;
11744 rtx cc_clobber;
11745 rtvec rv;
11746
11747 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11748 || !base_reg_operand (scratch, GET_MODE (scratch)))
11749 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11750
11751 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11752 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11753
11754 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11755 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11756
11757 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11758 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11759
11760 else
11761 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11762
11763 /* Make sure the mode is valid in this register class. */
11764 if ((addr_mask & RELOAD_REG_VALID) == 0)
11765 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11766
11767 if (TARGET_DEBUG_ADDR)
11768 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11769
11770 new_addr = addr = XEXP (mem, 0);
11771 switch (GET_CODE (addr))
11772 {
11773 /* Does the register class support auto update forms for this mode? If
11774 not, do the update now. We don't need a scratch register, since the
11775 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11776 case PRE_INC:
11777 case PRE_DEC:
11778 op_reg = XEXP (addr, 0);
11779 if (!base_reg_operand (op_reg, Pmode))
11780 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11781
11782 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11783 {
11784 int delta = GET_MODE_SIZE (mode);
11785 if (GET_CODE (addr) == PRE_DEC)
11786 delta = -delta;
11787 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11788 new_addr = op_reg;
11789 }
11790 break;
11791
11792 case PRE_MODIFY:
11793 op0 = XEXP (addr, 0);
11794 op1 = XEXP (addr, 1);
11795 if (!base_reg_operand (op0, Pmode)
11796 || GET_CODE (op1) != PLUS
11797 || !rtx_equal_p (op0, XEXP (op1, 0)))
11798 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11799
11800 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11801 {
11802 emit_insn (gen_rtx_SET (op0, op1));
11803 new_addr = reg;
11804 }
11805 break;
11806
11807 /* Do we need to simulate AND -16 to clear the bottom address bits used
11808 in VMX load/stores? */
11809 case AND:
11810 op0 = XEXP (addr, 0);
11811 op1 = XEXP (addr, 1);
11812 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11813 {
11814 if (REG_P (op0) || SUBREG_P (op0))
11815 op_reg = op0;
11816
11817 else if (GET_CODE (op1) == PLUS)
11818 {
11819 emit_insn (gen_rtx_SET (scratch, op1));
11820 op_reg = scratch;
11821 }
11822
11823 else
11824 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11825
11826 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11827 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11828 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11829 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11830 new_addr = scratch;
11831 }
11832 break;
11833
11834 /* If this is an indirect address, make sure it is a base register. */
11835 case REG:
11836 case SUBREG:
11837 if (!base_reg_operand (addr, GET_MODE (addr)))
11838 {
11839 emit_insn (gen_rtx_SET (scratch, addr));
11840 new_addr = scratch;
11841 }
11842 break;
11843
11844 /* If this is an indexed address, make sure the register class can handle
11845 indexed addresses for this mode. */
11846 case PLUS:
11847 op0 = XEXP (addr, 0);
11848 op1 = XEXP (addr, 1);
11849 if (!base_reg_operand (op0, Pmode))
11850 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11851
11852 else if (int_reg_operand (op1, Pmode))
11853 {
11854 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11855 {
11856 emit_insn (gen_rtx_SET (scratch, addr));
11857 new_addr = scratch;
11858 }
11859 }
11860
11861 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11862 {
11863 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11864 || !quad_address_p (addr, mode, false))
11865 {
11866 emit_insn (gen_rtx_SET (scratch, addr));
11867 new_addr = scratch;
11868 }
11869 }
11870
11871 /* Make sure the register class can handle offset addresses. */
11872 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11873 {
11874 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11875 {
11876 emit_insn (gen_rtx_SET (scratch, addr));
11877 new_addr = scratch;
11878 }
11879 }
11880
11881 else
11882 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11883
11884 break;
11885
11886 case LO_SUM:
11887 op0 = XEXP (addr, 0);
11888 op1 = XEXP (addr, 1);
11889 if (!base_reg_operand (op0, Pmode))
11890 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11891
11892 else if (int_reg_operand (op1, Pmode))
11893 {
11894 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11895 {
11896 emit_insn (gen_rtx_SET (scratch, addr));
11897 new_addr = scratch;
11898 }
11899 }
11900
11901 /* Quad offsets are restricted and can't handle normal addresses. */
11902 else if (mode_supports_dq_form (mode))
11903 {
11904 emit_insn (gen_rtx_SET (scratch, addr));
11905 new_addr = scratch;
11906 }
11907
11908 /* Make sure the register class can handle offset addresses. */
11909 else if (legitimate_lo_sum_address_p (mode, addr, false))
11910 {
11911 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11912 {
11913 emit_insn (gen_rtx_SET (scratch, addr));
11914 new_addr = scratch;
11915 }
11916 }
11917
11918 else
11919 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11920
11921 break;
11922
11923 case SYMBOL_REF:
11924 case CONST:
11925 case LABEL_REF:
11926 rs6000_emit_move (scratch, addr, Pmode);
11927 new_addr = scratch;
11928 break;
11929
11930 default:
11931 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11932 }
11933
11934 /* Adjust the address if it changed. */
11935 if (addr != new_addr)
11936 {
11937 mem = replace_equiv_address_nv (mem, new_addr);
11938 if (TARGET_DEBUG_ADDR)
11939 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11940 }
11941
11942 /* Now create the move. */
11943 if (store_p)
11944 emit_insn (gen_rtx_SET (mem, reg));
11945 else
11946 emit_insn (gen_rtx_SET (reg, mem));
11947
11948 return;
11949 }
11950
11951 /* Convert reloads involving 64-bit gprs and misaligned offset
11952 addressing, or multiple 32-bit gprs and offsets that are too large,
11953 to use indirect addressing. */
11954
11955 void
11956 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11957 {
11958 int regno = true_regnum (reg);
11959 enum reg_class rclass;
11960 rtx addr;
11961 rtx scratch_or_premodify = scratch;
11962
11963 if (TARGET_DEBUG_ADDR)
11964 {
11965 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11966 store_p ? "store" : "load");
11967 fprintf (stderr, "reg:\n");
11968 debug_rtx (reg);
11969 fprintf (stderr, "mem:\n");
11970 debug_rtx (mem);
11971 fprintf (stderr, "scratch:\n");
11972 debug_rtx (scratch);
11973 }
11974
11975 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11976 gcc_assert (MEM_P (mem));
11977 rclass = REGNO_REG_CLASS (regno);
11978 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11979 addr = XEXP (mem, 0);
11980
11981 if (GET_CODE (addr) == PRE_MODIFY)
11982 {
11983 gcc_assert (REG_P (XEXP (addr, 0))
11984 && GET_CODE (XEXP (addr, 1)) == PLUS
11985 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11986 scratch_or_premodify = XEXP (addr, 0);
11987 addr = XEXP (addr, 1);
11988 }
11989 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11990
11991 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11992
11993 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11994
11995 /* Now create the move. */
11996 if (store_p)
11997 emit_insn (gen_rtx_SET (mem, reg));
11998 else
11999 emit_insn (gen_rtx_SET (reg, mem));
12000
12001 return;
12002 }
12003
12004 /* Given an rtx X being reloaded into a reg required to be
12005 in class CLASS, return the class of reg to actually use.
12006 In general this is just CLASS; but on some machines
12007 in some cases it is preferable to use a more restrictive class.
12008
12009 On the RS/6000, we have to return NO_REGS when we want to reload a
12010 floating-point CONST_DOUBLE to force it to be copied to memory.
12011
12012 We also don't want to reload integer values into floating-point
12013 registers if we can at all help it. In fact, this can
12014 cause reload to die, if it tries to generate a reload of CTR
12015 into a FP register and discovers it doesn't have the memory location
12016 required.
12017
12018 ??? Would it be a good idea to have reload do the converse, that is
12019 try to reload floating modes into FP registers if possible?
12020 */
12021
12022 static enum reg_class
12023 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12024 {
12025 machine_mode mode = GET_MODE (x);
12026 bool is_constant = CONSTANT_P (x);
12027
12028 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12029 reload class for it. */
12030 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12031 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12032 return NO_REGS;
12033
12034 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12035 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12036 return NO_REGS;
12037
12038 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12039 the reloading of address expressions using PLUS into floating point
12040 registers. */
12041 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12042 {
12043 if (is_constant)
12044 {
12045 /* Zero is always allowed in all VSX registers. */
12046 if (x == CONST0_RTX (mode))
12047 return rclass;
12048
12049 /* If this is a vector constant that can be formed with a few Altivec
12050 instructions, we want altivec registers. */
12051 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12052 return ALTIVEC_REGS;
12053
12054 /* If this is an integer constant that can easily be loaded into
12055 vector registers, allow it. */
12056 if (CONST_INT_P (x))
12057 {
12058 HOST_WIDE_INT value = INTVAL (x);
12059
12060 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12061 2.06 can generate it in the Altivec registers with
12062 VSPLTI<x>. */
12063 if (value == -1)
12064 {
12065 if (TARGET_P8_VECTOR)
12066 return rclass;
12067 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12068 return ALTIVEC_REGS;
12069 else
12070 return NO_REGS;
12071 }
12072
12073 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12074 a sign extend in the Altivec registers. */
12075 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12076 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12077 return ALTIVEC_REGS;
12078 }
12079
12080 /* Force constant to memory. */
12081 return NO_REGS;
12082 }
12083
12084 /* D-form addressing can easily reload the value. */
12085 if (mode_supports_vmx_dform (mode)
12086 || mode_supports_dq_form (mode))
12087 return rclass;
12088
12089 /* If this is a scalar floating point value and we don't have D-form
12090 addressing, prefer the traditional floating point registers so that we
12091 can use D-form (register+offset) addressing. */
12092 if (rclass == VSX_REGS
12093 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12094 return FLOAT_REGS;
12095
12096 /* Prefer the Altivec registers if Altivec is handling the vector
12097 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12098 loads. */
12099 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12100 || mode == V1TImode)
12101 return ALTIVEC_REGS;
12102
12103 return rclass;
12104 }
12105
12106 if (is_constant || GET_CODE (x) == PLUS)
12107 {
12108 if (reg_class_subset_p (GENERAL_REGS, rclass))
12109 return GENERAL_REGS;
12110 if (reg_class_subset_p (BASE_REGS, rclass))
12111 return BASE_REGS;
12112 return NO_REGS;
12113 }
12114
12115 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12116 return GENERAL_REGS;
12117
12118 return rclass;
12119 }
12120
12121 /* Debug version of rs6000_preferred_reload_class. */
12122 static enum reg_class
12123 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12124 {
12125 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12126
12127 fprintf (stderr,
12128 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12129 "mode = %s, x:\n",
12130 reg_class_names[ret], reg_class_names[rclass],
12131 GET_MODE_NAME (GET_MODE (x)));
12132 debug_rtx (x);
12133
12134 return ret;
12135 }
12136
12137 /* If we are copying between FP or AltiVec registers and anything else, we need
12138 a memory location. The exception is when we are targeting ppc64 and the
12139 move to/from fpr to gpr instructions are available. Also, under VSX, you
12140 can copy vector registers from the FP register set to the Altivec register
12141 set and vice versa. */
12142
12143 static bool
12144 rs6000_secondary_memory_needed (machine_mode mode,
12145 reg_class_t from_class,
12146 reg_class_t to_class)
12147 {
12148 enum rs6000_reg_type from_type, to_type;
12149 bool altivec_p = ((from_class == ALTIVEC_REGS)
12150 || (to_class == ALTIVEC_REGS));
12151
12152 /* If a simple/direct move is available, we don't need secondary memory */
12153 from_type = reg_class_to_reg_type[(int)from_class];
12154 to_type = reg_class_to_reg_type[(int)to_class];
12155
12156 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12157 (secondary_reload_info *)0, altivec_p))
12158 return false;
12159
12160 /* If we have a floating point or vector register class, we need to use
12161 memory to transfer the data. */
12162 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12163 return true;
12164
12165 return false;
12166 }
12167
12168 /* Debug version of rs6000_secondary_memory_needed. */
12169 static bool
12170 rs6000_debug_secondary_memory_needed (machine_mode mode,
12171 reg_class_t from_class,
12172 reg_class_t to_class)
12173 {
12174 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12175
12176 fprintf (stderr,
12177 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12178 "to_class = %s, mode = %s\n",
12179 ret ? "true" : "false",
12180 reg_class_names[from_class],
12181 reg_class_names[to_class],
12182 GET_MODE_NAME (mode));
12183
12184 return ret;
12185 }
12186
12187 /* Return the register class of a scratch register needed to copy IN into
12188 or out of a register in RCLASS in MODE. If it can be done directly,
12189 NO_REGS is returned. */
12190
12191 static enum reg_class
12192 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12193 rtx in)
12194 {
12195 int regno;
12196
12197 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12198 #if TARGET_MACHO
12199 && MACHOPIC_INDIRECT
12200 #endif
12201 ))
12202 {
12203 /* We cannot copy a symbolic operand directly into anything
12204 other than BASE_REGS for TARGET_ELF. So indicate that a
12205 register from BASE_REGS is needed as an intermediate
12206 register.
12207
12208 On Darwin, pic addresses require a load from memory, which
12209 needs a base register. */
12210 if (rclass != BASE_REGS
12211 && (SYMBOL_REF_P (in)
12212 || GET_CODE (in) == HIGH
12213 || GET_CODE (in) == LABEL_REF
12214 || GET_CODE (in) == CONST))
12215 return BASE_REGS;
12216 }
12217
12218 if (REG_P (in))
12219 {
12220 regno = REGNO (in);
12221 if (!HARD_REGISTER_NUM_P (regno))
12222 {
12223 regno = true_regnum (in);
12224 if (!HARD_REGISTER_NUM_P (regno))
12225 regno = -1;
12226 }
12227 }
12228 else if (SUBREG_P (in))
12229 {
12230 regno = true_regnum (in);
12231 if (!HARD_REGISTER_NUM_P (regno))
12232 regno = -1;
12233 }
12234 else
12235 regno = -1;
12236
12237 /* If we have VSX register moves, prefer moving scalar values between
12238 Altivec registers and GPR by going via an FPR (and then via memory)
12239 instead of reloading the secondary memory address for Altivec moves. */
12240 if (TARGET_VSX
12241 && GET_MODE_SIZE (mode) < 16
12242 && !mode_supports_vmx_dform (mode)
12243 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12244 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12245 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12246 && (regno >= 0 && INT_REGNO_P (regno)))))
12247 return FLOAT_REGS;
12248
12249 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12250 into anything. */
12251 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12252 || (regno >= 0 && INT_REGNO_P (regno)))
12253 return NO_REGS;
12254
12255 /* Constants, memory, and VSX registers can go into VSX registers (both the
12256 traditional floating point and the altivec registers). */
12257 if (rclass == VSX_REGS
12258 && (regno == -1 || VSX_REGNO_P (regno)))
12259 return NO_REGS;
12260
12261 /* Constants, memory, and FP registers can go into FP registers. */
12262 if ((regno == -1 || FP_REGNO_P (regno))
12263 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12264 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12265
12266 /* Memory, and AltiVec registers can go into AltiVec registers. */
12267 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12268 && rclass == ALTIVEC_REGS)
12269 return NO_REGS;
12270
12271 /* We can copy among the CR registers. */
12272 if ((rclass == CR_REGS || rclass == CR0_REGS)
12273 && regno >= 0 && CR_REGNO_P (regno))
12274 return NO_REGS;
12275
12276 /* Otherwise, we need GENERAL_REGS. */
12277 return GENERAL_REGS;
12278 }
12279
12280 /* Debug version of rs6000_secondary_reload_class. */
12281 static enum reg_class
12282 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12283 machine_mode mode, rtx in)
12284 {
12285 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12286 fprintf (stderr,
12287 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12288 "mode = %s, input rtx:\n",
12289 reg_class_names[ret], reg_class_names[rclass],
12290 GET_MODE_NAME (mode));
12291 debug_rtx (in);
12292
12293 return ret;
12294 }
12295
12296 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12297
12298 static bool
12299 rs6000_can_change_mode_class (machine_mode from,
12300 machine_mode to,
12301 reg_class_t rclass)
12302 {
12303 unsigned from_size = GET_MODE_SIZE (from);
12304 unsigned to_size = GET_MODE_SIZE (to);
12305
12306 if (from_size != to_size)
12307 {
12308 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12309
12310 if (reg_classes_intersect_p (xclass, rclass))
12311 {
12312 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12313 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12314 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12315 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12316
12317 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12318 single register under VSX because the scalar part of the register
12319 is in the upper 64-bits, and not the lower 64-bits. Types like
12320 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12321 IEEE floating point can't overlap, and neither can small
12322 values. */
12323
12324 if (to_float128_vector_p && from_float128_vector_p)
12325 return true;
12326
12327 else if (to_float128_vector_p || from_float128_vector_p)
12328 return false;
12329
12330 /* TDmode in floating-mode registers must always go into a register
12331 pair with the most significant word in the even-numbered register
12332 to match ISA requirements. In little-endian mode, this does not
12333 match subreg numbering, so we cannot allow subregs. */
12334 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12335 return false;
12336
12337 /* Allow SD<->DD changes, since SDmode values are stored in
12338 the low half of the DDmode, just like target-independent
12339 code expects. We need to allow at least SD->DD since
12340 rs6000_secondary_memory_needed_mode asks for that change
12341 to be made for SD reloads. */
12342 if ((to == DDmode && from == SDmode)
12343 || (to == SDmode && from == DDmode))
12344 return true;
12345
12346 if (from_size < 8 || to_size < 8)
12347 return false;
12348
12349 if (from_size == 8 && (8 * to_nregs) != to_size)
12350 return false;
12351
12352 if (to_size == 8 && (8 * from_nregs) != from_size)
12353 return false;
12354
12355 return true;
12356 }
12357 else
12358 return true;
12359 }
12360
12361 /* Since the VSX register set includes traditional floating point registers
12362 and altivec registers, just check for the size being different instead of
12363 trying to check whether the modes are vector modes. Otherwise it won't
12364 allow say DF and DI to change classes. For types like TFmode and TDmode
12365 that take 2 64-bit registers, rather than a single 128-bit register, don't
12366 allow subregs of those types to other 128 bit types. */
12367 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12368 {
12369 unsigned num_regs = (from_size + 15) / 16;
12370 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12371 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12372 return false;
12373
12374 return (from_size == 8 || from_size == 16);
12375 }
12376
12377 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12378 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12379 return false;
12380
12381 return true;
12382 }
12383
12384 /* Debug version of rs6000_can_change_mode_class. */
12385 static bool
12386 rs6000_debug_can_change_mode_class (machine_mode from,
12387 machine_mode to,
12388 reg_class_t rclass)
12389 {
12390 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12391
12392 fprintf (stderr,
12393 "rs6000_can_change_mode_class, return %s, from = %s, "
12394 "to = %s, rclass = %s\n",
12395 ret ? "true" : "false",
12396 GET_MODE_NAME (from), GET_MODE_NAME (to),
12397 reg_class_names[rclass]);
12398
12399 return ret;
12400 }
12401 \f
12402 /* Return a string to do a move operation of 128 bits of data. */
12403
12404 const char *
12405 rs6000_output_move_128bit (rtx operands[])
12406 {
12407 rtx dest = operands[0];
12408 rtx src = operands[1];
12409 machine_mode mode = GET_MODE (dest);
12410 int dest_regno;
12411 int src_regno;
12412 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12413 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12414
12415 if (REG_P (dest))
12416 {
12417 dest_regno = REGNO (dest);
12418 dest_gpr_p = INT_REGNO_P (dest_regno);
12419 dest_fp_p = FP_REGNO_P (dest_regno);
12420 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12421 dest_vsx_p = dest_fp_p | dest_vmx_p;
12422 }
12423 else
12424 {
12425 dest_regno = -1;
12426 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12427 }
12428
12429 if (REG_P (src))
12430 {
12431 src_regno = REGNO (src);
12432 src_gpr_p = INT_REGNO_P (src_regno);
12433 src_fp_p = FP_REGNO_P (src_regno);
12434 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12435 src_vsx_p = src_fp_p | src_vmx_p;
12436 }
12437 else
12438 {
12439 src_regno = -1;
12440 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12441 }
12442
12443 /* Register moves. */
12444 if (dest_regno >= 0 && src_regno >= 0)
12445 {
12446 if (dest_gpr_p)
12447 {
12448 if (src_gpr_p)
12449 return "#";
12450
12451 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12452 return (WORDS_BIG_ENDIAN
12453 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12454 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12455
12456 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12457 return "#";
12458 }
12459
12460 else if (TARGET_VSX && dest_vsx_p)
12461 {
12462 if (src_vsx_p)
12463 return "xxlor %x0,%x1,%x1";
12464
12465 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12466 return (WORDS_BIG_ENDIAN
12467 ? "mtvsrdd %x0,%1,%L1"
12468 : "mtvsrdd %x0,%L1,%1");
12469
12470 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12471 return "#";
12472 }
12473
12474 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12475 return "vor %0,%1,%1";
12476
12477 else if (dest_fp_p && src_fp_p)
12478 return "#";
12479 }
12480
12481 /* Loads. */
12482 else if (dest_regno >= 0 && MEM_P (src))
12483 {
12484 if (dest_gpr_p)
12485 {
12486 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12487 return "lq %0,%1";
12488 else
12489 return "#";
12490 }
12491
12492 else if (TARGET_ALTIVEC && dest_vmx_p
12493 && altivec_indexed_or_indirect_operand (src, mode))
12494 return "lvx %0,%y1";
12495
12496 else if (TARGET_VSX && dest_vsx_p)
12497 {
12498 if (mode_supports_dq_form (mode)
12499 && quad_address_p (XEXP (src, 0), mode, true))
12500 return "lxv %x0,%1";
12501
12502 else if (TARGET_P9_VECTOR)
12503 return "lxvx %x0,%y1";
12504
12505 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12506 return "lxvw4x %x0,%y1";
12507
12508 else
12509 return "lxvd2x %x0,%y1";
12510 }
12511
12512 else if (TARGET_ALTIVEC && dest_vmx_p)
12513 return "lvx %0,%y1";
12514
12515 else if (dest_fp_p)
12516 return "#";
12517 }
12518
12519 /* Stores. */
12520 else if (src_regno >= 0 && MEM_P (dest))
12521 {
12522 if (src_gpr_p)
12523 {
12524 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12525 return "stq %1,%0";
12526 else
12527 return "#";
12528 }
12529
12530 else if (TARGET_ALTIVEC && src_vmx_p
12531 && altivec_indexed_or_indirect_operand (dest, mode))
12532 return "stvx %1,%y0";
12533
12534 else if (TARGET_VSX && src_vsx_p)
12535 {
12536 if (mode_supports_dq_form (mode)
12537 && quad_address_p (XEXP (dest, 0), mode, true))
12538 return "stxv %x1,%0";
12539
12540 else if (TARGET_P9_VECTOR)
12541 return "stxvx %x1,%y0";
12542
12543 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12544 return "stxvw4x %x1,%y0";
12545
12546 else
12547 return "stxvd2x %x1,%y0";
12548 }
12549
12550 else if (TARGET_ALTIVEC && src_vmx_p)
12551 return "stvx %1,%y0";
12552
12553 else if (src_fp_p)
12554 return "#";
12555 }
12556
12557 /* Constants. */
12558 else if (dest_regno >= 0
12559 && (CONST_INT_P (src)
12560 || CONST_WIDE_INT_P (src)
12561 || CONST_DOUBLE_P (src)
12562 || GET_CODE (src) == CONST_VECTOR))
12563 {
12564 if (dest_gpr_p)
12565 return "#";
12566
12567 else if ((dest_vmx_p && TARGET_ALTIVEC)
12568 || (dest_vsx_p && TARGET_VSX))
12569 return output_vec_const_move (operands);
12570 }
12571
12572 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12573 }
12574
12575 /* Validate a 128-bit move. */
12576 bool
12577 rs6000_move_128bit_ok_p (rtx operands[])
12578 {
12579 machine_mode mode = GET_MODE (operands[0]);
12580 return (gpc_reg_operand (operands[0], mode)
12581 || gpc_reg_operand (operands[1], mode));
12582 }
12583
12584 /* Return true if a 128-bit move needs to be split. */
12585 bool
12586 rs6000_split_128bit_ok_p (rtx operands[])
12587 {
12588 if (!reload_completed)
12589 return false;
12590
12591 if (!gpr_or_gpr_p (operands[0], operands[1]))
12592 return false;
12593
12594 if (quad_load_store_p (operands[0], operands[1]))
12595 return false;
12596
12597 return true;
12598 }
12599
12600 \f
12601 /* Given a comparison operation, return the bit number in CCR to test. We
12602 know this is a valid comparison.
12603
12604 SCC_P is 1 if this is for an scc. That means that %D will have been
12605 used instead of %C, so the bits will be in different places.
12606
12607 Return -1 if OP isn't a valid comparison for some reason. */
12608
12609 int
12610 ccr_bit (rtx op, int scc_p)
12611 {
12612 enum rtx_code code = GET_CODE (op);
12613 machine_mode cc_mode;
12614 int cc_regnum;
12615 int base_bit;
12616 rtx reg;
12617
12618 if (!COMPARISON_P (op))
12619 return -1;
12620
12621 reg = XEXP (op, 0);
12622
12623 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12624 return -1;
12625
12626 cc_mode = GET_MODE (reg);
12627 cc_regnum = REGNO (reg);
12628 base_bit = 4 * (cc_regnum - CR0_REGNO);
12629
12630 validate_condition_mode (code, cc_mode);
12631
12632 /* When generating a sCOND operation, only positive conditions are
12633 allowed. */
12634 if (scc_p)
12635 switch (code)
12636 {
12637 case EQ:
12638 case GT:
12639 case LT:
12640 case UNORDERED:
12641 case GTU:
12642 case LTU:
12643 break;
12644 default:
12645 return -1;
12646 }
12647
12648 switch (code)
12649 {
12650 case NE:
12651 return scc_p ? base_bit + 3 : base_bit + 2;
12652 case EQ:
12653 return base_bit + 2;
12654 case GT: case GTU: case UNLE:
12655 return base_bit + 1;
12656 case LT: case LTU: case UNGE:
12657 return base_bit;
12658 case ORDERED: case UNORDERED:
12659 return base_bit + 3;
12660
12661 case GE: case GEU:
12662 /* If scc, we will have done a cror to put the bit in the
12663 unordered position. So test that bit. For integer, this is ! LT
12664 unless this is an scc insn. */
12665 return scc_p ? base_bit + 3 : base_bit;
12666
12667 case LE: case LEU:
12668 return scc_p ? base_bit + 3 : base_bit + 1;
12669
12670 default:
12671 return -1;
12672 }
12673 }
12674 \f
12675 /* Return the GOT register. */
12676
12677 rtx
12678 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12679 {
12680 /* The second flow pass currently (June 1999) can't update
12681 regs_ever_live without disturbing other parts of the compiler, so
12682 update it here to make the prolog/epilogue code happy. */
12683 if (!can_create_pseudo_p ()
12684 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12685 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12686
12687 crtl->uses_pic_offset_table = 1;
12688
12689 return pic_offset_table_rtx;
12690 }
12691 \f
12692 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12693
12694 /* Write out a function code label. */
12695
12696 void
12697 rs6000_output_function_entry (FILE *file, const char *fname)
12698 {
12699 if (fname[0] != '.')
12700 {
12701 switch (DEFAULT_ABI)
12702 {
12703 default:
12704 gcc_unreachable ();
12705
12706 case ABI_AIX:
12707 if (DOT_SYMBOLS)
12708 putc ('.', file);
12709 else
12710 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12711 break;
12712
12713 case ABI_ELFv2:
12714 case ABI_V4:
12715 case ABI_DARWIN:
12716 break;
12717 }
12718 }
12719
12720 RS6000_OUTPUT_BASENAME (file, fname);
12721 }
12722
12723 /* Print an operand. Recognize special options, documented below. */
12724
12725 #if TARGET_ELF
12726 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12727 only introduced by the linker, when applying the sda21
12728 relocation. */
12729 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12730 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12731 #else
12732 #define SMALL_DATA_RELOC "sda21"
12733 #define SMALL_DATA_REG 0
12734 #endif
12735
12736 void
12737 print_operand (FILE *file, rtx x, int code)
12738 {
12739 int i;
12740 unsigned HOST_WIDE_INT uval;
12741
12742 switch (code)
12743 {
12744 /* %a is output_address. */
12745
12746 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12747 output_operand. */
12748
12749 case 'D':
12750 /* Like 'J' but get to the GT bit only. */
12751 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12752 {
12753 output_operand_lossage ("invalid %%D value");
12754 return;
12755 }
12756
12757 /* Bit 1 is GT bit. */
12758 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12759
12760 /* Add one for shift count in rlinm for scc. */
12761 fprintf (file, "%d", i + 1);
12762 return;
12763
12764 case 'e':
12765 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12766 if (! INT_P (x))
12767 {
12768 output_operand_lossage ("invalid %%e value");
12769 return;
12770 }
12771
12772 uval = INTVAL (x);
12773 if ((uval & 0xffff) == 0 && uval != 0)
12774 putc ('s', file);
12775 return;
12776
12777 case 'E':
12778 /* X is a CR register. Print the number of the EQ bit of the CR */
12779 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12780 output_operand_lossage ("invalid %%E value");
12781 else
12782 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12783 return;
12784
12785 case 'f':
12786 /* X is a CR register. Print the shift count needed to move it
12787 to the high-order four bits. */
12788 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12789 output_operand_lossage ("invalid %%f value");
12790 else
12791 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12792 return;
12793
12794 case 'F':
12795 /* Similar, but print the count for the rotate in the opposite
12796 direction. */
12797 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12798 output_operand_lossage ("invalid %%F value");
12799 else
12800 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12801 return;
12802
12803 case 'G':
12804 /* X is a constant integer. If it is negative, print "m",
12805 otherwise print "z". This is to make an aze or ame insn. */
12806 if (!CONST_INT_P (x))
12807 output_operand_lossage ("invalid %%G value");
12808 else if (INTVAL (x) >= 0)
12809 putc ('z', file);
12810 else
12811 putc ('m', file);
12812 return;
12813
12814 case 'h':
12815 /* If constant, output low-order five bits. Otherwise, write
12816 normally. */
12817 if (INT_P (x))
12818 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12819 else
12820 print_operand (file, x, 0);
12821 return;
12822
12823 case 'H':
12824 /* If constant, output low-order six bits. Otherwise, write
12825 normally. */
12826 if (INT_P (x))
12827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12828 else
12829 print_operand (file, x, 0);
12830 return;
12831
12832 case 'I':
12833 /* Print `i' if this is a constant, else nothing. */
12834 if (INT_P (x))
12835 putc ('i', file);
12836 return;
12837
12838 case 'j':
12839 /* Write the bit number in CCR for jump. */
12840 i = ccr_bit (x, 0);
12841 if (i == -1)
12842 output_operand_lossage ("invalid %%j code");
12843 else
12844 fprintf (file, "%d", i);
12845 return;
12846
12847 case 'J':
12848 /* Similar, but add one for shift count in rlinm for scc and pass
12849 scc flag to `ccr_bit'. */
12850 i = ccr_bit (x, 1);
12851 if (i == -1)
12852 output_operand_lossage ("invalid %%J code");
12853 else
12854 /* If we want bit 31, write a shift count of zero, not 32. */
12855 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12856 return;
12857
12858 case 'k':
12859 /* X must be a constant. Write the 1's complement of the
12860 constant. */
12861 if (! INT_P (x))
12862 output_operand_lossage ("invalid %%k value");
12863 else
12864 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12865 return;
12866
12867 case 'K':
12868 /* X must be a symbolic constant on ELF. Write an
12869 expression suitable for an 'addi' that adds in the low 16
12870 bits of the MEM. */
12871 if (GET_CODE (x) == CONST)
12872 {
12873 if (GET_CODE (XEXP (x, 0)) != PLUS
12874 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12875 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12876 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12877 output_operand_lossage ("invalid %%K value");
12878 }
12879 print_operand_address (file, x);
12880 fputs ("@l", file);
12881 return;
12882
12883 /* %l is output_asm_label. */
12884
12885 case 'L':
12886 /* Write second word of DImode or DFmode reference. Works on register
12887 or non-indexed memory only. */
12888 if (REG_P (x))
12889 fputs (reg_names[REGNO (x) + 1], file);
12890 else if (MEM_P (x))
12891 {
12892 machine_mode mode = GET_MODE (x);
12893 /* Handle possible auto-increment. Since it is pre-increment and
12894 we have already done it, we can just use an offset of word. */
12895 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12896 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12897 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12898 UNITS_PER_WORD));
12899 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12900 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12901 UNITS_PER_WORD));
12902 else
12903 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12904 UNITS_PER_WORD),
12905 0));
12906
12907 if (small_data_operand (x, GET_MODE (x)))
12908 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12909 reg_names[SMALL_DATA_REG]);
12910 }
12911 return;
12912
12913 case 'N': /* Unused */
12914 /* Write the number of elements in the vector times 4. */
12915 if (GET_CODE (x) != PARALLEL)
12916 output_operand_lossage ("invalid %%N value");
12917 else
12918 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12919 return;
12920
12921 case 'O': /* Unused */
12922 /* Similar, but subtract 1 first. */
12923 if (GET_CODE (x) != PARALLEL)
12924 output_operand_lossage ("invalid %%O value");
12925 else
12926 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12927 return;
12928
12929 case 'p':
12930 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12931 if (! INT_P (x)
12932 || INTVAL (x) < 0
12933 || (i = exact_log2 (INTVAL (x))) < 0)
12934 output_operand_lossage ("invalid %%p value");
12935 else
12936 fprintf (file, "%d", i);
12937 return;
12938
12939 case 'P':
12940 /* The operand must be an indirect memory reference. The result
12941 is the register name. */
12942 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12943 || REGNO (XEXP (x, 0)) >= 32)
12944 output_operand_lossage ("invalid %%P value");
12945 else
12946 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12947 return;
12948
12949 case 'q':
12950 /* This outputs the logical code corresponding to a boolean
12951 expression. The expression may have one or both operands
12952 negated (if one, only the first one). For condition register
12953 logical operations, it will also treat the negated
12954 CR codes as NOTs, but not handle NOTs of them. */
12955 {
12956 const char *const *t = 0;
12957 const char *s;
12958 enum rtx_code code = GET_CODE (x);
12959 static const char * const tbl[3][3] = {
12960 { "and", "andc", "nor" },
12961 { "or", "orc", "nand" },
12962 { "xor", "eqv", "xor" } };
12963
12964 if (code == AND)
12965 t = tbl[0];
12966 else if (code == IOR)
12967 t = tbl[1];
12968 else if (code == XOR)
12969 t = tbl[2];
12970 else
12971 output_operand_lossage ("invalid %%q value");
12972
12973 if (GET_CODE (XEXP (x, 0)) != NOT)
12974 s = t[0];
12975 else
12976 {
12977 if (GET_CODE (XEXP (x, 1)) == NOT)
12978 s = t[2];
12979 else
12980 s = t[1];
12981 }
12982
12983 fputs (s, file);
12984 }
12985 return;
12986
12987 case 'Q':
12988 if (! TARGET_MFCRF)
12989 return;
12990 fputc (',', file);
12991 /* FALLTHRU */
12992
12993 case 'R':
12994 /* X is a CR register. Print the mask for `mtcrf'. */
12995 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12996 output_operand_lossage ("invalid %%R value");
12997 else
12998 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12999 return;
13000
13001 case 's':
13002 /* Low 5 bits of 32 - value */
13003 if (! INT_P (x))
13004 output_operand_lossage ("invalid %%s value");
13005 else
13006 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13007 return;
13008
13009 case 't':
13010 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13011 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13012 {
13013 output_operand_lossage ("invalid %%t value");
13014 return;
13015 }
13016
13017 /* Bit 3 is OV bit. */
13018 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13019
13020 /* If we want bit 31, write a shift count of zero, not 32. */
13021 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13022 return;
13023
13024 case 'T':
13025 /* Print the symbolic name of a branch target register. */
13026 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13027 x = XVECEXP (x, 0, 0);
13028 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13029 && REGNO (x) != CTR_REGNO))
13030 output_operand_lossage ("invalid %%T value");
13031 else if (REGNO (x) == LR_REGNO)
13032 fputs ("lr", file);
13033 else
13034 fputs ("ctr", file);
13035 return;
13036
13037 case 'u':
13038 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13039 for use in unsigned operand. */
13040 if (! INT_P (x))
13041 {
13042 output_operand_lossage ("invalid %%u value");
13043 return;
13044 }
13045
13046 uval = INTVAL (x);
13047 if ((uval & 0xffff) == 0)
13048 uval >>= 16;
13049
13050 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13051 return;
13052
13053 case 'v':
13054 /* High-order 16 bits of constant for use in signed operand. */
13055 if (! INT_P (x))
13056 output_operand_lossage ("invalid %%v value");
13057 else
13058 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13059 (INTVAL (x) >> 16) & 0xffff);
13060 return;
13061
13062 case 'U':
13063 /* Print `u' if this has an auto-increment or auto-decrement. */
13064 if (MEM_P (x)
13065 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13066 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13067 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13068 putc ('u', file);
13069 return;
13070
13071 case 'V':
13072 /* Print the trap code for this operand. */
13073 switch (GET_CODE (x))
13074 {
13075 case EQ:
13076 fputs ("eq", file); /* 4 */
13077 break;
13078 case NE:
13079 fputs ("ne", file); /* 24 */
13080 break;
13081 case LT:
13082 fputs ("lt", file); /* 16 */
13083 break;
13084 case LE:
13085 fputs ("le", file); /* 20 */
13086 break;
13087 case GT:
13088 fputs ("gt", file); /* 8 */
13089 break;
13090 case GE:
13091 fputs ("ge", file); /* 12 */
13092 break;
13093 case LTU:
13094 fputs ("llt", file); /* 2 */
13095 break;
13096 case LEU:
13097 fputs ("lle", file); /* 6 */
13098 break;
13099 case GTU:
13100 fputs ("lgt", file); /* 1 */
13101 break;
13102 case GEU:
13103 fputs ("lge", file); /* 5 */
13104 break;
13105 default:
13106 output_operand_lossage ("invalid %%V value");
13107 }
13108 break;
13109
13110 case 'w':
13111 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13112 normally. */
13113 if (INT_P (x))
13114 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13115 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13116 else
13117 print_operand (file, x, 0);
13118 return;
13119
13120 case 'x':
13121 /* X is a FPR or Altivec register used in a VSX context. */
13122 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13123 output_operand_lossage ("invalid %%x value");
13124 else
13125 {
13126 int reg = REGNO (x);
13127 int vsx_reg = (FP_REGNO_P (reg)
13128 ? reg - 32
13129 : reg - FIRST_ALTIVEC_REGNO + 32);
13130
13131 #ifdef TARGET_REGNAMES
13132 if (TARGET_REGNAMES)
13133 fprintf (file, "%%vs%d", vsx_reg);
13134 else
13135 #endif
13136 fprintf (file, "%d", vsx_reg);
13137 }
13138 return;
13139
13140 case 'X':
13141 if (MEM_P (x)
13142 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13143 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13144 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13145 putc ('x', file);
13146 return;
13147
13148 case 'Y':
13149 /* Like 'L', for third word of TImode/PTImode */
13150 if (REG_P (x))
13151 fputs (reg_names[REGNO (x) + 2], file);
13152 else if (MEM_P (x))
13153 {
13154 machine_mode mode = GET_MODE (x);
13155 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13156 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13157 output_address (mode, plus_constant (Pmode,
13158 XEXP (XEXP (x, 0), 0), 8));
13159 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13160 output_address (mode, plus_constant (Pmode,
13161 XEXP (XEXP (x, 0), 0), 8));
13162 else
13163 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13164 if (small_data_operand (x, GET_MODE (x)))
13165 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13166 reg_names[SMALL_DATA_REG]);
13167 }
13168 return;
13169
13170 case 'z':
13171 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13172 x = XVECEXP (x, 0, 1);
13173 /* X is a SYMBOL_REF. Write out the name preceded by a
13174 period and without any trailing data in brackets. Used for function
13175 names. If we are configured for System V (or the embedded ABI) on
13176 the PowerPC, do not emit the period, since those systems do not use
13177 TOCs and the like. */
13178 if (!SYMBOL_REF_P (x))
13179 {
13180 output_operand_lossage ("invalid %%z value");
13181 return;
13182 }
13183
13184 /* For macho, check to see if we need a stub. */
13185 if (TARGET_MACHO)
13186 {
13187 const char *name = XSTR (x, 0);
13188 #if TARGET_MACHO
13189 if (darwin_symbol_stubs
13190 && MACHOPIC_INDIRECT
13191 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13192 name = machopic_indirection_name (x, /*stub_p=*/true);
13193 #endif
13194 assemble_name (file, name);
13195 }
13196 else if (!DOT_SYMBOLS)
13197 assemble_name (file, XSTR (x, 0));
13198 else
13199 rs6000_output_function_entry (file, XSTR (x, 0));
13200 return;
13201
13202 case 'Z':
13203 /* Like 'L', for last word of TImode/PTImode. */
13204 if (REG_P (x))
13205 fputs (reg_names[REGNO (x) + 3], file);
13206 else if (MEM_P (x))
13207 {
13208 machine_mode mode = GET_MODE (x);
13209 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13210 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13211 output_address (mode, plus_constant (Pmode,
13212 XEXP (XEXP (x, 0), 0), 12));
13213 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13214 output_address (mode, plus_constant (Pmode,
13215 XEXP (XEXP (x, 0), 0), 12));
13216 else
13217 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13218 if (small_data_operand (x, GET_MODE (x)))
13219 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13220 reg_names[SMALL_DATA_REG]);
13221 }
13222 return;
13223
13224 /* Print AltiVec memory operand. */
13225 case 'y':
13226 {
13227 rtx tmp;
13228
13229 gcc_assert (MEM_P (x));
13230
13231 tmp = XEXP (x, 0);
13232
13233 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13234 && GET_CODE (tmp) == AND
13235 && CONST_INT_P (XEXP (tmp, 1))
13236 && INTVAL (XEXP (tmp, 1)) == -16)
13237 tmp = XEXP (tmp, 0);
13238 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13239 && GET_CODE (tmp) == PRE_MODIFY)
13240 tmp = XEXP (tmp, 1);
13241 if (REG_P (tmp))
13242 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13243 else
13244 {
13245 if (GET_CODE (tmp) != PLUS
13246 || !REG_P (XEXP (tmp, 0))
13247 || !REG_P (XEXP (tmp, 1)))
13248 {
13249 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13250 break;
13251 }
13252
13253 if (REGNO (XEXP (tmp, 0)) == 0)
13254 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13255 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13256 else
13257 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13258 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13259 }
13260 break;
13261 }
13262
13263 case 0:
13264 if (REG_P (x))
13265 fprintf (file, "%s", reg_names[REGNO (x)]);
13266 else if (MEM_P (x))
13267 {
13268 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13269 know the width from the mode. */
13270 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13271 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13272 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13273 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13274 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13275 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13276 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13277 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13278 else
13279 output_address (GET_MODE (x), XEXP (x, 0));
13280 }
13281 else if (toc_relative_expr_p (x, false,
13282 &tocrel_base_oac, &tocrel_offset_oac))
13283 /* This hack along with a corresponding hack in
13284 rs6000_output_addr_const_extra arranges to output addends
13285 where the assembler expects to find them. eg.
13286 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13287 without this hack would be output as "x@toc+4". We
13288 want "x+4@toc". */
13289 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13290 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13291 output_addr_const (file, XVECEXP (x, 0, 0));
13292 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13293 output_addr_const (file, XVECEXP (x, 0, 1));
13294 else
13295 output_addr_const (file, x);
13296 return;
13297
13298 case '&':
13299 if (const char *name = get_some_local_dynamic_name ())
13300 assemble_name (file, name);
13301 else
13302 output_operand_lossage ("'%%&' used without any "
13303 "local dynamic TLS references");
13304 return;
13305
13306 default:
13307 output_operand_lossage ("invalid %%xn code");
13308 }
13309 }
13310 \f
13311 /* Print the address of an operand. */
13312
13313 void
13314 print_operand_address (FILE *file, rtx x)
13315 {
13316 if (REG_P (x))
13317 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13318
13319 /* Is it a PC-relative address? */
13320 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13321 {
13322 HOST_WIDE_INT offset;
13323
13324 if (GET_CODE (x) == CONST)
13325 x = XEXP (x, 0);
13326
13327 if (GET_CODE (x) == PLUS)
13328 {
13329 offset = INTVAL (XEXP (x, 1));
13330 x = XEXP (x, 0);
13331 }
13332 else
13333 offset = 0;
13334
13335 output_addr_const (file, x);
13336
13337 if (offset)
13338 fprintf (file, "%+" PRId64, offset);
13339
13340 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13341 fprintf (file, "@got");
13342
13343 fprintf (file, "@pcrel");
13344 }
13345 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13346 || GET_CODE (x) == LABEL_REF)
13347 {
13348 output_addr_const (file, x);
13349 if (small_data_operand (x, GET_MODE (x)))
13350 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13351 reg_names[SMALL_DATA_REG]);
13352 else
13353 gcc_assert (!TARGET_TOC);
13354 }
13355 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13356 && REG_P (XEXP (x, 1)))
13357 {
13358 if (REGNO (XEXP (x, 0)) == 0)
13359 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13360 reg_names[ REGNO (XEXP (x, 0)) ]);
13361 else
13362 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13363 reg_names[ REGNO (XEXP (x, 1)) ]);
13364 }
13365 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13366 && CONST_INT_P (XEXP (x, 1)))
13367 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13368 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13369 #if TARGET_MACHO
13370 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13371 && CONSTANT_P (XEXP (x, 1)))
13372 {
13373 fprintf (file, "lo16(");
13374 output_addr_const (file, XEXP (x, 1));
13375 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13376 }
13377 #endif
13378 #if TARGET_ELF
13379 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13380 && CONSTANT_P (XEXP (x, 1)))
13381 {
13382 output_addr_const (file, XEXP (x, 1));
13383 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13384 }
13385 #endif
13386 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13387 {
13388 /* This hack along with a corresponding hack in
13389 rs6000_output_addr_const_extra arranges to output addends
13390 where the assembler expects to find them. eg.
13391 (lo_sum (reg 9)
13392 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13393 without this hack would be output as "x@toc+8@l(9)". We
13394 want "x+8@toc@l(9)". */
13395 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13396 if (GET_CODE (x) == LO_SUM)
13397 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13398 else
13399 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13400 }
13401 else
13402 output_addr_const (file, x);
13403 }
13404 \f
13405 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13406
13407 bool
13408 rs6000_output_addr_const_extra (FILE *file, rtx x)
13409 {
13410 if (GET_CODE (x) == UNSPEC)
13411 switch (XINT (x, 1))
13412 {
13413 case UNSPEC_TOCREL:
13414 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13415 && REG_P (XVECEXP (x, 0, 1))
13416 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13417 output_addr_const (file, XVECEXP (x, 0, 0));
13418 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13419 {
13420 if (INTVAL (tocrel_offset_oac) >= 0)
13421 fprintf (file, "+");
13422 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13423 }
13424 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13425 {
13426 putc ('-', file);
13427 assemble_name (file, toc_label_name);
13428 need_toc_init = 1;
13429 }
13430 else if (TARGET_ELF)
13431 fputs ("@toc", file);
13432 return true;
13433
13434 #if TARGET_MACHO
13435 case UNSPEC_MACHOPIC_OFFSET:
13436 output_addr_const (file, XVECEXP (x, 0, 0));
13437 putc ('-', file);
13438 machopic_output_function_base_name (file);
13439 return true;
13440 #endif
13441 }
13442 return false;
13443 }
13444 \f
13445 /* Target hook for assembling integer objects. The PowerPC version has
13446 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13447 is defined. It also needs to handle DI-mode objects on 64-bit
13448 targets. */
13449
13450 static bool
13451 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13452 {
13453 #ifdef RELOCATABLE_NEEDS_FIXUP
13454 /* Special handling for SI values. */
13455 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13456 {
13457 static int recurse = 0;
13458
13459 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13460 the .fixup section. Since the TOC section is already relocated, we
13461 don't need to mark it here. We used to skip the text section, but it
13462 should never be valid for relocated addresses to be placed in the text
13463 section. */
13464 if (DEFAULT_ABI == ABI_V4
13465 && (TARGET_RELOCATABLE || flag_pic > 1)
13466 && in_section != toc_section
13467 && !recurse
13468 && !CONST_SCALAR_INT_P (x)
13469 && CONSTANT_P (x))
13470 {
13471 char buf[256];
13472
13473 recurse = 1;
13474 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13475 fixuplabelno++;
13476 ASM_OUTPUT_LABEL (asm_out_file, buf);
13477 fprintf (asm_out_file, "\t.long\t(");
13478 output_addr_const (asm_out_file, x);
13479 fprintf (asm_out_file, ")@fixup\n");
13480 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13481 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13482 fprintf (asm_out_file, "\t.long\t");
13483 assemble_name (asm_out_file, buf);
13484 fprintf (asm_out_file, "\n\t.previous\n");
13485 recurse = 0;
13486 return true;
13487 }
13488 /* Remove initial .'s to turn a -mcall-aixdesc function
13489 address into the address of the descriptor, not the function
13490 itself. */
13491 else if (SYMBOL_REF_P (x)
13492 && XSTR (x, 0)[0] == '.'
13493 && DEFAULT_ABI == ABI_AIX)
13494 {
13495 const char *name = XSTR (x, 0);
13496 while (*name == '.')
13497 name++;
13498
13499 fprintf (asm_out_file, "\t.long\t%s\n", name);
13500 return true;
13501 }
13502 }
13503 #endif /* RELOCATABLE_NEEDS_FIXUP */
13504 return default_assemble_integer (x, size, aligned_p);
13505 }
13506
13507 /* Return a template string for assembly to emit when making an
13508 external call. FUNOP is the call mem argument operand number. */
13509
13510 static const char *
13511 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13512 {
13513 /* -Wformat-overflow workaround, without which gcc thinks that %u
13514 might produce 10 digits. */
13515 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13516
13517 char arg[12];
13518 arg[0] = 0;
13519 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13520 {
13521 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13522 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13523 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13524 sprintf (arg, "(%%&@tlsld)");
13525 }
13526
13527 /* The magic 32768 offset here corresponds to the offset of
13528 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13529 char z[11];
13530 sprintf (z, "%%z%u%s", funop,
13531 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13532 ? "+32768" : ""));
13533
13534 static char str[32]; /* 1 spare */
13535 if (rs6000_pcrel_p (cfun))
13536 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13537 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13538 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13539 sibcall ? "" : "\n\tnop");
13540 else if (DEFAULT_ABI == ABI_V4)
13541 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13542 flag_pic ? "@plt" : "");
13543 #if TARGET_MACHO
13544 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13545 else if (DEFAULT_ABI == ABI_DARWIN)
13546 {
13547 /* The cookie is in operand func+2. */
13548 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13549 int cookie = INTVAL (operands[funop + 2]);
13550 if (cookie & CALL_LONG)
13551 {
13552 tree funname = get_identifier (XSTR (operands[funop], 0));
13553 tree labelname = get_prev_label (funname);
13554 gcc_checking_assert (labelname && !sibcall);
13555
13556 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13557 instruction will reach 'foo', otherwise link as 'bl L42'".
13558 "L42" should be a 'branch island', that will do a far jump to
13559 'foo'. Branch islands are generated in
13560 macho_branch_islands(). */
13561 sprintf (str, "jbsr %%z%u,%.10s", funop,
13562 IDENTIFIER_POINTER (labelname));
13563 }
13564 else
13565 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13566 after the call. */
13567 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13568 }
13569 #endif
13570 else
13571 gcc_unreachable ();
13572 return str;
13573 }
13574
13575 const char *
13576 rs6000_call_template (rtx *operands, unsigned int funop)
13577 {
13578 return rs6000_call_template_1 (operands, funop, false);
13579 }
13580
13581 const char *
13582 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13583 {
13584 return rs6000_call_template_1 (operands, funop, true);
13585 }
13586
13587 /* As above, for indirect calls. */
13588
13589 static const char *
13590 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13591 bool sibcall)
13592 {
13593 /* -Wformat-overflow workaround, without which gcc thinks that %u
13594 might produce 10 digits. Note that -Wformat-overflow will not
13595 currently warn here for str[], so do not rely on a warning to
13596 ensure str[] is correctly sized. */
13597 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13598
13599 /* Currently, funop is either 0 or 1. The maximum string is always
13600 a !speculate 64-bit __tls_get_addr call.
13601
13602 ABI_ELFv2, pcrel:
13603 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13604 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13605 . 9 crset 2\n\t
13606 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13607 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13608 . 8 beq%T1l-
13609 .---
13610 .142
13611
13612 ABI_AIX:
13613 . 9 ld 2,%3\n\t
13614 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13615 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13616 . 9 crset 2\n\t
13617 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13618 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13619 . 10 beq%T1l-\n\t
13620 . 10 ld 2,%4(1)
13621 .---
13622 .151
13623
13624 ABI_ELFv2:
13625 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13626 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13627 . 9 crset 2\n\t
13628 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13629 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13630 . 10 beq%T1l-\n\t
13631 . 10 ld 2,%3(1)
13632 .---
13633 .142
13634
13635 ABI_V4:
13636 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13637 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13638 . 9 crset 2\n\t
13639 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13640 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13641 . 8 beq%T1l-
13642 .---
13643 .141 */
13644 static char str[160]; /* 8 spare */
13645 char *s = str;
13646 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13647
13648 if (DEFAULT_ABI == ABI_AIX)
13649 s += sprintf (s,
13650 "l%s 2,%%%u\n\t",
13651 ptrload, funop + 3);
13652
13653 /* We don't need the extra code to stop indirect call speculation if
13654 calling via LR. */
13655 bool speculate = (TARGET_MACHO
13656 || rs6000_speculate_indirect_jumps
13657 || (REG_P (operands[funop])
13658 && REGNO (operands[funop]) == LR_REGNO));
13659
13660 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13661 {
13662 const char *rel64 = TARGET_64BIT ? "64" : "";
13663 char tls[29];
13664 tls[0] = 0;
13665 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13666 {
13667 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13668 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13669 rel64, funop + 1);
13670 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13671 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13672 rel64);
13673 }
13674
13675 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13676 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13677 && flag_pic == 2 ? "+32768" : "");
13678 if (!speculate)
13679 {
13680 s += sprintf (s,
13681 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13682 tls, rel64, notoc, funop, addend);
13683 s += sprintf (s, "crset 2\n\t");
13684 }
13685 s += sprintf (s,
13686 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13687 tls, rel64, notoc, funop, addend);
13688 }
13689 else if (!speculate)
13690 s += sprintf (s, "crset 2\n\t");
13691
13692 if (rs6000_pcrel_p (cfun))
13693 {
13694 if (speculate)
13695 sprintf (s, "b%%T%ul", funop);
13696 else
13697 sprintf (s, "beq%%T%ul-", funop);
13698 }
13699 else if (DEFAULT_ABI == ABI_AIX)
13700 {
13701 if (speculate)
13702 sprintf (s,
13703 "b%%T%ul\n\t"
13704 "l%s 2,%%%u(1)",
13705 funop, ptrload, funop + 4);
13706 else
13707 sprintf (s,
13708 "beq%%T%ul-\n\t"
13709 "l%s 2,%%%u(1)",
13710 funop, ptrload, funop + 4);
13711 }
13712 else if (DEFAULT_ABI == ABI_ELFv2)
13713 {
13714 if (speculate)
13715 sprintf (s,
13716 "b%%T%ul\n\t"
13717 "l%s 2,%%%u(1)",
13718 funop, ptrload, funop + 3);
13719 else
13720 sprintf (s,
13721 "beq%%T%ul-\n\t"
13722 "l%s 2,%%%u(1)",
13723 funop, ptrload, funop + 3);
13724 }
13725 else
13726 {
13727 if (speculate)
13728 sprintf (s,
13729 "b%%T%u%s",
13730 funop, sibcall ? "" : "l");
13731 else
13732 sprintf (s,
13733 "beq%%T%u%s-%s",
13734 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13735 }
13736 return str;
13737 }
13738
13739 const char *
13740 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13741 {
13742 return rs6000_indirect_call_template_1 (operands, funop, false);
13743 }
13744
13745 const char *
13746 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13747 {
13748 return rs6000_indirect_call_template_1 (operands, funop, true);
13749 }
13750
13751 #if HAVE_AS_PLTSEQ
13752 /* Output indirect call insns. WHICH identifies the type of sequence. */
13753 const char *
13754 rs6000_pltseq_template (rtx *operands, int which)
13755 {
13756 const char *rel64 = TARGET_64BIT ? "64" : "";
13757 char tls[30];
13758 tls[0] = 0;
13759 if (GET_CODE (operands[3]) == UNSPEC)
13760 {
13761 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13762 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13763 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13764 off, rel64);
13765 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13766 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13767 off, rel64);
13768 }
13769
13770 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13771 static char str[96]; /* 10 spare */
13772 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13773 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13774 && flag_pic == 2 ? "+32768" : "");
13775 switch (which)
13776 {
13777 case RS6000_PLTSEQ_TOCSAVE:
13778 sprintf (str,
13779 "st%s\n\t"
13780 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13781 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13782 tls, rel64);
13783 break;
13784 case RS6000_PLTSEQ_PLT16_HA:
13785 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13786 sprintf (str,
13787 "lis %%0,0\n\t"
13788 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13789 tls, off, rel64);
13790 else
13791 sprintf (str,
13792 "addis %%0,%%1,0\n\t"
13793 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13794 tls, off, rel64, addend);
13795 break;
13796 case RS6000_PLTSEQ_PLT16_LO:
13797 sprintf (str,
13798 "l%s %%0,0(%%1)\n\t"
13799 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13800 TARGET_64BIT ? "d" : "wz",
13801 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13802 break;
13803 case RS6000_PLTSEQ_MTCTR:
13804 sprintf (str,
13805 "mtctr %%1\n\t"
13806 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13807 tls, rel64, addend);
13808 break;
13809 case RS6000_PLTSEQ_PLT_PCREL34:
13810 sprintf (str,
13811 "pl%s %%0,0(0),1\n\t"
13812 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13813 TARGET_64BIT ? "d" : "wz",
13814 tls, rel64);
13815 break;
13816 default:
13817 gcc_unreachable ();
13818 }
13819 return str;
13820 }
13821 #endif
13822 \f
13823 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13824 /* Emit an assembler directive to set symbol visibility for DECL to
13825 VISIBILITY_TYPE. */
13826
13827 static void
13828 rs6000_assemble_visibility (tree decl, int vis)
13829 {
13830 if (TARGET_XCOFF)
13831 return;
13832
13833 /* Functions need to have their entry point symbol visibility set as
13834 well as their descriptor symbol visibility. */
13835 if (DEFAULT_ABI == ABI_AIX
13836 && DOT_SYMBOLS
13837 && TREE_CODE (decl) == FUNCTION_DECL)
13838 {
13839 static const char * const visibility_types[] = {
13840 NULL, "protected", "hidden", "internal"
13841 };
13842
13843 const char *name, *type;
13844
13845 name = ((* targetm.strip_name_encoding)
13846 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13847 type = visibility_types[vis];
13848
13849 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13850 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13851 }
13852 else
13853 default_assemble_visibility (decl, vis);
13854 }
13855 #endif
13856 \f
13857 enum rtx_code
13858 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13859 {
13860 /* Reversal of FP compares takes care -- an ordered compare
13861 becomes an unordered compare and vice versa. */
13862 if (mode == CCFPmode
13863 && (!flag_finite_math_only
13864 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13865 || code == UNEQ || code == LTGT))
13866 return reverse_condition_maybe_unordered (code);
13867 else
13868 return reverse_condition (code);
13869 }
13870
13871 /* Generate a compare for CODE. Return a brand-new rtx that
13872 represents the result of the compare. */
13873
13874 static rtx
13875 rs6000_generate_compare (rtx cmp, machine_mode mode)
13876 {
13877 machine_mode comp_mode;
13878 rtx compare_result;
13879 enum rtx_code code = GET_CODE (cmp);
13880 rtx op0 = XEXP (cmp, 0);
13881 rtx op1 = XEXP (cmp, 1);
13882
13883 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13884 comp_mode = CCmode;
13885 else if (FLOAT_MODE_P (mode))
13886 comp_mode = CCFPmode;
13887 else if (code == GTU || code == LTU
13888 || code == GEU || code == LEU)
13889 comp_mode = CCUNSmode;
13890 else if ((code == EQ || code == NE)
13891 && unsigned_reg_p (op0)
13892 && (unsigned_reg_p (op1)
13893 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13894 /* These are unsigned values, perhaps there will be a later
13895 ordering compare that can be shared with this one. */
13896 comp_mode = CCUNSmode;
13897 else
13898 comp_mode = CCmode;
13899
13900 /* If we have an unsigned compare, make sure we don't have a signed value as
13901 an immediate. */
13902 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13903 && INTVAL (op1) < 0)
13904 {
13905 op0 = copy_rtx_if_shared (op0);
13906 op1 = force_reg (GET_MODE (op0), op1);
13907 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13908 }
13909
13910 /* First, the compare. */
13911 compare_result = gen_reg_rtx (comp_mode);
13912
13913 /* IEEE 128-bit support in VSX registers when we do not have hardware
13914 support. */
13915 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13916 {
13917 rtx libfunc = NULL_RTX;
13918 bool check_nan = false;
13919 rtx dest;
13920
13921 switch (code)
13922 {
13923 case EQ:
13924 case NE:
13925 libfunc = optab_libfunc (eq_optab, mode);
13926 break;
13927
13928 case GT:
13929 case GE:
13930 libfunc = optab_libfunc (ge_optab, mode);
13931 break;
13932
13933 case LT:
13934 case LE:
13935 libfunc = optab_libfunc (le_optab, mode);
13936 break;
13937
13938 case UNORDERED:
13939 case ORDERED:
13940 libfunc = optab_libfunc (unord_optab, mode);
13941 code = (code == UNORDERED) ? NE : EQ;
13942 break;
13943
13944 case UNGE:
13945 case UNGT:
13946 check_nan = true;
13947 libfunc = optab_libfunc (ge_optab, mode);
13948 code = (code == UNGE) ? GE : GT;
13949 break;
13950
13951 case UNLE:
13952 case UNLT:
13953 check_nan = true;
13954 libfunc = optab_libfunc (le_optab, mode);
13955 code = (code == UNLE) ? LE : LT;
13956 break;
13957
13958 case UNEQ:
13959 case LTGT:
13960 check_nan = true;
13961 libfunc = optab_libfunc (eq_optab, mode);
13962 code = (code = UNEQ) ? EQ : NE;
13963 break;
13964
13965 default:
13966 gcc_unreachable ();
13967 }
13968
13969 gcc_assert (libfunc);
13970
13971 if (!check_nan)
13972 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13973 SImode, op0, mode, op1, mode);
13974
13975 /* The library signals an exception for signalling NaNs, so we need to
13976 handle isgreater, etc. by first checking isordered. */
13977 else
13978 {
13979 rtx ne_rtx, normal_dest, unord_dest;
13980 rtx unord_func = optab_libfunc (unord_optab, mode);
13981 rtx join_label = gen_label_rtx ();
13982 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13983 rtx unord_cmp = gen_reg_rtx (comp_mode);
13984
13985
13986 /* Test for either value being a NaN. */
13987 gcc_assert (unord_func);
13988 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13989 SImode, op0, mode, op1, mode);
13990
13991 /* Set value (0) if either value is a NaN, and jump to the join
13992 label. */
13993 dest = gen_reg_rtx (SImode);
13994 emit_move_insn (dest, const1_rtx);
13995 emit_insn (gen_rtx_SET (unord_cmp,
13996 gen_rtx_COMPARE (comp_mode, unord_dest,
13997 const0_rtx)));
13998
13999 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14000 emit_jump_insn (gen_rtx_SET (pc_rtx,
14001 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14002 join_ref,
14003 pc_rtx)));
14004
14005 /* Do the normal comparison, knowing that the values are not
14006 NaNs. */
14007 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14008 SImode, op0, mode, op1, mode);
14009
14010 emit_insn (gen_cstoresi4 (dest,
14011 gen_rtx_fmt_ee (code, SImode, normal_dest,
14012 const0_rtx),
14013 normal_dest, const0_rtx));
14014
14015 /* Join NaN and non-Nan paths. Compare dest against 0. */
14016 emit_label (join_label);
14017 code = NE;
14018 }
14019
14020 emit_insn (gen_rtx_SET (compare_result,
14021 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14022 }
14023
14024 else
14025 {
14026 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14027 CLOBBERs to match cmptf_internal2 pattern. */
14028 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14029 && FLOAT128_IBM_P (GET_MODE (op0))
14030 && TARGET_HARD_FLOAT)
14031 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14032 gen_rtvec (10,
14033 gen_rtx_SET (compare_result,
14034 gen_rtx_COMPARE (comp_mode, op0, op1)),
14035 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14037 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14038 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14039 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14040 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14041 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14042 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14043 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14044 else if (GET_CODE (op1) == UNSPEC
14045 && XINT (op1, 1) == UNSPEC_SP_TEST)
14046 {
14047 rtx op1b = XVECEXP (op1, 0, 0);
14048 comp_mode = CCEQmode;
14049 compare_result = gen_reg_rtx (CCEQmode);
14050 if (TARGET_64BIT)
14051 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14052 else
14053 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14054 }
14055 else
14056 emit_insn (gen_rtx_SET (compare_result,
14057 gen_rtx_COMPARE (comp_mode, op0, op1)));
14058 }
14059
14060 validate_condition_mode (code, GET_MODE (compare_result));
14061
14062 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14063 }
14064
14065 \f
14066 /* Return the diagnostic message string if the binary operation OP is
14067 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14068
14069 static const char*
14070 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14071 const_tree type1,
14072 const_tree type2)
14073 {
14074 machine_mode mode1 = TYPE_MODE (type1);
14075 machine_mode mode2 = TYPE_MODE (type2);
14076
14077 /* For complex modes, use the inner type. */
14078 if (COMPLEX_MODE_P (mode1))
14079 mode1 = GET_MODE_INNER (mode1);
14080
14081 if (COMPLEX_MODE_P (mode2))
14082 mode2 = GET_MODE_INNER (mode2);
14083
14084 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14085 double to intermix unless -mfloat128-convert. */
14086 if (mode1 == mode2)
14087 return NULL;
14088
14089 if (!TARGET_FLOAT128_CVT)
14090 {
14091 if ((mode1 == KFmode && mode2 == IFmode)
14092 || (mode1 == IFmode && mode2 == KFmode))
14093 return N_("__float128 and __ibm128 cannot be used in the same "
14094 "expression");
14095
14096 if (TARGET_IEEEQUAD
14097 && ((mode1 == IFmode && mode2 == TFmode)
14098 || (mode1 == TFmode && mode2 == IFmode)))
14099 return N_("__ibm128 and long double cannot be used in the same "
14100 "expression");
14101
14102 if (!TARGET_IEEEQUAD
14103 && ((mode1 == KFmode && mode2 == TFmode)
14104 || (mode1 == TFmode && mode2 == KFmode)))
14105 return N_("__float128 and long double cannot be used in the same "
14106 "expression");
14107 }
14108
14109 return NULL;
14110 }
14111
14112 \f
14113 /* Expand floating point conversion to/from __float128 and __ibm128. */
14114
14115 void
14116 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14117 {
14118 machine_mode dest_mode = GET_MODE (dest);
14119 machine_mode src_mode = GET_MODE (src);
14120 convert_optab cvt = unknown_optab;
14121 bool do_move = false;
14122 rtx libfunc = NULL_RTX;
14123 rtx dest2;
14124 typedef rtx (*rtx_2func_t) (rtx, rtx);
14125 rtx_2func_t hw_convert = (rtx_2func_t)0;
14126 size_t kf_or_tf;
14127
14128 struct hw_conv_t {
14129 rtx_2func_t from_df;
14130 rtx_2func_t from_sf;
14131 rtx_2func_t from_si_sign;
14132 rtx_2func_t from_si_uns;
14133 rtx_2func_t from_di_sign;
14134 rtx_2func_t from_di_uns;
14135 rtx_2func_t to_df;
14136 rtx_2func_t to_sf;
14137 rtx_2func_t to_si_sign;
14138 rtx_2func_t to_si_uns;
14139 rtx_2func_t to_di_sign;
14140 rtx_2func_t to_di_uns;
14141 } hw_conversions[2] = {
14142 /* convertions to/from KFmode */
14143 {
14144 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14145 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14146 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14147 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14148 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14149 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14150 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14151 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14152 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14153 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14154 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14155 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14156 },
14157
14158 /* convertions to/from TFmode */
14159 {
14160 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14161 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14162 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14163 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14164 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14165 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14166 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14167 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14168 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14169 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14170 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14171 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14172 },
14173 };
14174
14175 if (dest_mode == src_mode)
14176 gcc_unreachable ();
14177
14178 /* Eliminate memory operations. */
14179 if (MEM_P (src))
14180 src = force_reg (src_mode, src);
14181
14182 if (MEM_P (dest))
14183 {
14184 rtx tmp = gen_reg_rtx (dest_mode);
14185 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14186 rs6000_emit_move (dest, tmp, dest_mode);
14187 return;
14188 }
14189
14190 /* Convert to IEEE 128-bit floating point. */
14191 if (FLOAT128_IEEE_P (dest_mode))
14192 {
14193 if (dest_mode == KFmode)
14194 kf_or_tf = 0;
14195 else if (dest_mode == TFmode)
14196 kf_or_tf = 1;
14197 else
14198 gcc_unreachable ();
14199
14200 switch (src_mode)
14201 {
14202 case E_DFmode:
14203 cvt = sext_optab;
14204 hw_convert = hw_conversions[kf_or_tf].from_df;
14205 break;
14206
14207 case E_SFmode:
14208 cvt = sext_optab;
14209 hw_convert = hw_conversions[kf_or_tf].from_sf;
14210 break;
14211
14212 case E_KFmode:
14213 case E_IFmode:
14214 case E_TFmode:
14215 if (FLOAT128_IBM_P (src_mode))
14216 cvt = sext_optab;
14217 else
14218 do_move = true;
14219 break;
14220
14221 case E_SImode:
14222 if (unsigned_p)
14223 {
14224 cvt = ufloat_optab;
14225 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14226 }
14227 else
14228 {
14229 cvt = sfloat_optab;
14230 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14231 }
14232 break;
14233
14234 case E_DImode:
14235 if (unsigned_p)
14236 {
14237 cvt = ufloat_optab;
14238 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14239 }
14240 else
14241 {
14242 cvt = sfloat_optab;
14243 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14244 }
14245 break;
14246
14247 default:
14248 gcc_unreachable ();
14249 }
14250 }
14251
14252 /* Convert from IEEE 128-bit floating point. */
14253 else if (FLOAT128_IEEE_P (src_mode))
14254 {
14255 if (src_mode == KFmode)
14256 kf_or_tf = 0;
14257 else if (src_mode == TFmode)
14258 kf_or_tf = 1;
14259 else
14260 gcc_unreachable ();
14261
14262 switch (dest_mode)
14263 {
14264 case E_DFmode:
14265 cvt = trunc_optab;
14266 hw_convert = hw_conversions[kf_or_tf].to_df;
14267 break;
14268
14269 case E_SFmode:
14270 cvt = trunc_optab;
14271 hw_convert = hw_conversions[kf_or_tf].to_sf;
14272 break;
14273
14274 case E_KFmode:
14275 case E_IFmode:
14276 case E_TFmode:
14277 if (FLOAT128_IBM_P (dest_mode))
14278 cvt = trunc_optab;
14279 else
14280 do_move = true;
14281 break;
14282
14283 case E_SImode:
14284 if (unsigned_p)
14285 {
14286 cvt = ufix_optab;
14287 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14288 }
14289 else
14290 {
14291 cvt = sfix_optab;
14292 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14293 }
14294 break;
14295
14296 case E_DImode:
14297 if (unsigned_p)
14298 {
14299 cvt = ufix_optab;
14300 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14301 }
14302 else
14303 {
14304 cvt = sfix_optab;
14305 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14306 }
14307 break;
14308
14309 default:
14310 gcc_unreachable ();
14311 }
14312 }
14313
14314 /* Both IBM format. */
14315 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14316 do_move = true;
14317
14318 else
14319 gcc_unreachable ();
14320
14321 /* Handle conversion between TFmode/KFmode/IFmode. */
14322 if (do_move)
14323 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14324
14325 /* Handle conversion if we have hardware support. */
14326 else if (TARGET_FLOAT128_HW && hw_convert)
14327 emit_insn ((hw_convert) (dest, src));
14328
14329 /* Call an external function to do the conversion. */
14330 else if (cvt != unknown_optab)
14331 {
14332 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14333 gcc_assert (libfunc != NULL_RTX);
14334
14335 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14336 src, src_mode);
14337
14338 gcc_assert (dest2 != NULL_RTX);
14339 if (!rtx_equal_p (dest, dest2))
14340 emit_move_insn (dest, dest2);
14341 }
14342
14343 else
14344 gcc_unreachable ();
14345
14346 return;
14347 }
14348
14349 \f
14350 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14351 can be used as that dest register. Return the dest register. */
14352
14353 rtx
14354 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14355 {
14356 if (op2 == const0_rtx)
14357 return op1;
14358
14359 if (GET_CODE (scratch) == SCRATCH)
14360 scratch = gen_reg_rtx (mode);
14361
14362 if (logical_operand (op2, mode))
14363 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14364 else
14365 emit_insn (gen_rtx_SET (scratch,
14366 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14367
14368 return scratch;
14369 }
14370
14371 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14372 requires this. The result is mode MODE. */
14373 rtx
14374 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14375 {
14376 rtx cond[2];
14377 int n = 0;
14378 if (code == LTGT || code == LE || code == UNLT)
14379 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14380 if (code == LTGT || code == GE || code == UNGT)
14381 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14382 if (code == LE || code == GE || code == UNEQ)
14383 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14384 if (code == UNLT || code == UNGT || code == UNEQ)
14385 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14386
14387 gcc_assert (n == 2);
14388
14389 rtx cc = gen_reg_rtx (CCEQmode);
14390 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14391 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14392
14393 return cc;
14394 }
14395
14396 void
14397 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14398 {
14399 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14400 rtx_code cond_code = GET_CODE (condition_rtx);
14401
14402 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14403 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14404 ;
14405 else if (cond_code == NE
14406 || cond_code == GE || cond_code == LE
14407 || cond_code == GEU || cond_code == LEU
14408 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14409 {
14410 rtx not_result = gen_reg_rtx (CCEQmode);
14411 rtx not_op, rev_cond_rtx;
14412 machine_mode cc_mode;
14413
14414 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14415
14416 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14417 SImode, XEXP (condition_rtx, 0), const0_rtx);
14418 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14419 emit_insn (gen_rtx_SET (not_result, not_op));
14420 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14421 }
14422
14423 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14424 if (op_mode == VOIDmode)
14425 op_mode = GET_MODE (XEXP (operands[1], 1));
14426
14427 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14428 {
14429 PUT_MODE (condition_rtx, DImode);
14430 convert_move (operands[0], condition_rtx, 0);
14431 }
14432 else
14433 {
14434 PUT_MODE (condition_rtx, SImode);
14435 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14436 }
14437 }
14438
14439 /* Emit a branch of kind CODE to location LOC. */
14440
14441 void
14442 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14443 {
14444 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14445 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14446 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14447 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14448 }
14449
14450 /* Return the string to output a conditional branch to LABEL, which is
14451 the operand template of the label, or NULL if the branch is really a
14452 conditional return.
14453
14454 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14455 condition code register and its mode specifies what kind of
14456 comparison we made.
14457
14458 REVERSED is nonzero if we should reverse the sense of the comparison.
14459
14460 INSN is the insn. */
14461
14462 char *
14463 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14464 {
14465 static char string[64];
14466 enum rtx_code code = GET_CODE (op);
14467 rtx cc_reg = XEXP (op, 0);
14468 machine_mode mode = GET_MODE (cc_reg);
14469 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14470 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14471 int really_reversed = reversed ^ need_longbranch;
14472 char *s = string;
14473 const char *ccode;
14474 const char *pred;
14475 rtx note;
14476
14477 validate_condition_mode (code, mode);
14478
14479 /* Work out which way this really branches. We could use
14480 reverse_condition_maybe_unordered here always but this
14481 makes the resulting assembler clearer. */
14482 if (really_reversed)
14483 {
14484 /* Reversal of FP compares takes care -- an ordered compare
14485 becomes an unordered compare and vice versa. */
14486 if (mode == CCFPmode)
14487 code = reverse_condition_maybe_unordered (code);
14488 else
14489 code = reverse_condition (code);
14490 }
14491
14492 switch (code)
14493 {
14494 /* Not all of these are actually distinct opcodes, but
14495 we distinguish them for clarity of the resulting assembler. */
14496 case NE: case LTGT:
14497 ccode = "ne"; break;
14498 case EQ: case UNEQ:
14499 ccode = "eq"; break;
14500 case GE: case GEU:
14501 ccode = "ge"; break;
14502 case GT: case GTU: case UNGT:
14503 ccode = "gt"; break;
14504 case LE: case LEU:
14505 ccode = "le"; break;
14506 case LT: case LTU: case UNLT:
14507 ccode = "lt"; break;
14508 case UNORDERED: ccode = "un"; break;
14509 case ORDERED: ccode = "nu"; break;
14510 case UNGE: ccode = "nl"; break;
14511 case UNLE: ccode = "ng"; break;
14512 default:
14513 gcc_unreachable ();
14514 }
14515
14516 /* Maybe we have a guess as to how likely the branch is. */
14517 pred = "";
14518 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14519 if (note != NULL_RTX)
14520 {
14521 /* PROB is the difference from 50%. */
14522 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14523 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14524
14525 /* Only hint for highly probable/improbable branches on newer cpus when
14526 we have real profile data, as static prediction overrides processor
14527 dynamic prediction. For older cpus we may as well always hint, but
14528 assume not taken for branches that are very close to 50% as a
14529 mispredicted taken branch is more expensive than a
14530 mispredicted not-taken branch. */
14531 if (rs6000_always_hint
14532 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14533 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14534 && br_prob_note_reliable_p (note)))
14535 {
14536 if (abs (prob) > REG_BR_PROB_BASE / 20
14537 && ((prob > 0) ^ need_longbranch))
14538 pred = "+";
14539 else
14540 pred = "-";
14541 }
14542 }
14543
14544 if (label == NULL)
14545 s += sprintf (s, "b%slr%s ", ccode, pred);
14546 else
14547 s += sprintf (s, "b%s%s ", ccode, pred);
14548
14549 /* We need to escape any '%' characters in the reg_names string.
14550 Assume they'd only be the first character.... */
14551 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14552 *s++ = '%';
14553 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14554
14555 if (label != NULL)
14556 {
14557 /* If the branch distance was too far, we may have to use an
14558 unconditional branch to go the distance. */
14559 if (need_longbranch)
14560 s += sprintf (s, ",$+8\n\tb %s", label);
14561 else
14562 s += sprintf (s, ",%s", label);
14563 }
14564
14565 return string;
14566 }
14567
14568 /* Return insn for VSX or Altivec comparisons. */
14569
14570 static rtx
14571 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14572 {
14573 rtx mask;
14574 machine_mode mode = GET_MODE (op0);
14575
14576 switch (code)
14577 {
14578 default:
14579 break;
14580
14581 case GE:
14582 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14583 return NULL_RTX;
14584 /* FALLTHRU */
14585
14586 case EQ:
14587 case GT:
14588 case GTU:
14589 case ORDERED:
14590 case UNORDERED:
14591 case UNEQ:
14592 case LTGT:
14593 mask = gen_reg_rtx (mode);
14594 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14595 return mask;
14596 }
14597
14598 return NULL_RTX;
14599 }
14600
14601 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14602 DMODE is expected destination mode. This is a recursive function. */
14603
14604 static rtx
14605 rs6000_emit_vector_compare (enum rtx_code rcode,
14606 rtx op0, rtx op1,
14607 machine_mode dmode)
14608 {
14609 rtx mask;
14610 bool swap_operands = false;
14611 bool try_again = false;
14612
14613 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14614 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14615
14616 /* See if the comparison works as is. */
14617 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14618 if (mask)
14619 return mask;
14620
14621 switch (rcode)
14622 {
14623 case LT:
14624 rcode = GT;
14625 swap_operands = true;
14626 try_again = true;
14627 break;
14628 case LTU:
14629 rcode = GTU;
14630 swap_operands = true;
14631 try_again = true;
14632 break;
14633 case NE:
14634 case UNLE:
14635 case UNLT:
14636 case UNGE:
14637 case UNGT:
14638 /* Invert condition and try again.
14639 e.g., A != B becomes ~(A==B). */
14640 {
14641 enum rtx_code rev_code;
14642 enum insn_code nor_code;
14643 rtx mask2;
14644
14645 rev_code = reverse_condition_maybe_unordered (rcode);
14646 if (rev_code == UNKNOWN)
14647 return NULL_RTX;
14648
14649 nor_code = optab_handler (one_cmpl_optab, dmode);
14650 if (nor_code == CODE_FOR_nothing)
14651 return NULL_RTX;
14652
14653 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14654 if (!mask2)
14655 return NULL_RTX;
14656
14657 mask = gen_reg_rtx (dmode);
14658 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14659 return mask;
14660 }
14661 break;
14662 case GE:
14663 case GEU:
14664 case LE:
14665 case LEU:
14666 /* Try GT/GTU/LT/LTU OR EQ */
14667 {
14668 rtx c_rtx, eq_rtx;
14669 enum insn_code ior_code;
14670 enum rtx_code new_code;
14671
14672 switch (rcode)
14673 {
14674 case GE:
14675 new_code = GT;
14676 break;
14677
14678 case GEU:
14679 new_code = GTU;
14680 break;
14681
14682 case LE:
14683 new_code = LT;
14684 break;
14685
14686 case LEU:
14687 new_code = LTU;
14688 break;
14689
14690 default:
14691 gcc_unreachable ();
14692 }
14693
14694 ior_code = optab_handler (ior_optab, dmode);
14695 if (ior_code == CODE_FOR_nothing)
14696 return NULL_RTX;
14697
14698 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14699 if (!c_rtx)
14700 return NULL_RTX;
14701
14702 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14703 if (!eq_rtx)
14704 return NULL_RTX;
14705
14706 mask = gen_reg_rtx (dmode);
14707 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14708 return mask;
14709 }
14710 break;
14711 default:
14712 return NULL_RTX;
14713 }
14714
14715 if (try_again)
14716 {
14717 if (swap_operands)
14718 std::swap (op0, op1);
14719
14720 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14721 if (mask)
14722 return mask;
14723 }
14724
14725 /* You only get two chances. */
14726 return NULL_RTX;
14727 }
14728
14729 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14730 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14731 operands for the relation operation COND. */
14732
14733 int
14734 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14735 rtx cond, rtx cc_op0, rtx cc_op1)
14736 {
14737 machine_mode dest_mode = GET_MODE (dest);
14738 machine_mode mask_mode = GET_MODE (cc_op0);
14739 enum rtx_code rcode = GET_CODE (cond);
14740 machine_mode cc_mode = CCmode;
14741 rtx mask;
14742 rtx cond2;
14743 bool invert_move = false;
14744
14745 if (VECTOR_UNIT_NONE_P (dest_mode))
14746 return 0;
14747
14748 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14749 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14750
14751 switch (rcode)
14752 {
14753 /* Swap operands if we can, and fall back to doing the operation as
14754 specified, and doing a NOR to invert the test. */
14755 case NE:
14756 case UNLE:
14757 case UNLT:
14758 case UNGE:
14759 case UNGT:
14760 /* Invert condition and try again.
14761 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14762 invert_move = true;
14763 rcode = reverse_condition_maybe_unordered (rcode);
14764 if (rcode == UNKNOWN)
14765 return 0;
14766 break;
14767
14768 case GE:
14769 case LE:
14770 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14771 {
14772 /* Invert condition to avoid compound test. */
14773 invert_move = true;
14774 rcode = reverse_condition (rcode);
14775 }
14776 break;
14777
14778 case GTU:
14779 case GEU:
14780 case LTU:
14781 case LEU:
14782 /* Mark unsigned tests with CCUNSmode. */
14783 cc_mode = CCUNSmode;
14784
14785 /* Invert condition to avoid compound test if necessary. */
14786 if (rcode == GEU || rcode == LEU)
14787 {
14788 invert_move = true;
14789 rcode = reverse_condition (rcode);
14790 }
14791 break;
14792
14793 default:
14794 break;
14795 }
14796
14797 /* Get the vector mask for the given relational operations. */
14798 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14799
14800 if (!mask)
14801 return 0;
14802
14803 if (invert_move)
14804 std::swap (op_true, op_false);
14805
14806 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14807 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14808 && (GET_CODE (op_true) == CONST_VECTOR
14809 || GET_CODE (op_false) == CONST_VECTOR))
14810 {
14811 rtx constant_0 = CONST0_RTX (dest_mode);
14812 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14813
14814 if (op_true == constant_m1 && op_false == constant_0)
14815 {
14816 emit_move_insn (dest, mask);
14817 return 1;
14818 }
14819
14820 else if (op_true == constant_0 && op_false == constant_m1)
14821 {
14822 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14823 return 1;
14824 }
14825
14826 /* If we can't use the vector comparison directly, perhaps we can use
14827 the mask for the true or false fields, instead of loading up a
14828 constant. */
14829 if (op_true == constant_m1)
14830 op_true = mask;
14831
14832 if (op_false == constant_0)
14833 op_false = mask;
14834 }
14835
14836 if (!REG_P (op_true) && !SUBREG_P (op_true))
14837 op_true = force_reg (dest_mode, op_true);
14838
14839 if (!REG_P (op_false) && !SUBREG_P (op_false))
14840 op_false = force_reg (dest_mode, op_false);
14841
14842 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14843 CONST0_RTX (dest_mode));
14844 emit_insn (gen_rtx_SET (dest,
14845 gen_rtx_IF_THEN_ELSE (dest_mode,
14846 cond2,
14847 op_true,
14848 op_false)));
14849 return 1;
14850 }
14851
14852 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14853 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14854 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14855 hardware has no such operation. */
14856
14857 static int
14858 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14859 {
14860 enum rtx_code code = GET_CODE (op);
14861 rtx op0 = XEXP (op, 0);
14862 rtx op1 = XEXP (op, 1);
14863 machine_mode compare_mode = GET_MODE (op0);
14864 machine_mode result_mode = GET_MODE (dest);
14865 bool max_p = false;
14866
14867 if (result_mode != compare_mode)
14868 return 0;
14869
14870 if (code == GE || code == GT)
14871 max_p = true;
14872 else if (code == LE || code == LT)
14873 max_p = false;
14874 else
14875 return 0;
14876
14877 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14878 ;
14879
14880 /* Only when NaNs and signed-zeros are not in effect, smax could be
14881 used for `op0 < op1 ? op1 : op0`, and smin could be used for
14882 `op0 > op1 ? op1 : op0`. */
14883 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
14884 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
14885 max_p = !max_p;
14886
14887 else
14888 return 0;
14889
14890 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14891 return 1;
14892 }
14893
14894 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14895 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14896 operands of the last comparison is nonzero/true, FALSE_COND if it is
14897 zero/false. Return 0 if the hardware has no such operation. */
14898
14899 static int
14900 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14901 {
14902 enum rtx_code code = GET_CODE (op);
14903 rtx op0 = XEXP (op, 0);
14904 rtx op1 = XEXP (op, 1);
14905 machine_mode result_mode = GET_MODE (dest);
14906 rtx compare_rtx;
14907 rtx cmove_rtx;
14908 rtx clobber_rtx;
14909
14910 if (!can_create_pseudo_p ())
14911 return 0;
14912
14913 switch (code)
14914 {
14915 case EQ:
14916 case GE:
14917 case GT:
14918 break;
14919
14920 case NE:
14921 case LT:
14922 case LE:
14923 code = swap_condition (code);
14924 std::swap (op0, op1);
14925 break;
14926
14927 default:
14928 return 0;
14929 }
14930
14931 /* Generate: [(parallel [(set (dest)
14932 (if_then_else (op (cmp1) (cmp2))
14933 (true)
14934 (false)))
14935 (clobber (scratch))])]. */
14936
14937 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14938 cmove_rtx = gen_rtx_SET (dest,
14939 gen_rtx_IF_THEN_ELSE (result_mode,
14940 compare_rtx,
14941 true_cond,
14942 false_cond));
14943
14944 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14945 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14946 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14947
14948 return 1;
14949 }
14950
14951 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14952 operands of the last comparison is nonzero/true, FALSE_COND if it
14953 is zero/false. Return 0 if the hardware has no such operation. */
14954
14955 int
14956 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14957 {
14958 enum rtx_code code = GET_CODE (op);
14959 rtx op0 = XEXP (op, 0);
14960 rtx op1 = XEXP (op, 1);
14961 machine_mode compare_mode = GET_MODE (op0);
14962 machine_mode result_mode = GET_MODE (dest);
14963 rtx temp;
14964 bool is_against_zero;
14965
14966 /* These modes should always match. */
14967 if (GET_MODE (op1) != compare_mode
14968 /* In the isel case however, we can use a compare immediate, so
14969 op1 may be a small constant. */
14970 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14971 return 0;
14972 if (GET_MODE (true_cond) != result_mode)
14973 return 0;
14974 if (GET_MODE (false_cond) != result_mode)
14975 return 0;
14976
14977 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14978 if (TARGET_P9_MINMAX
14979 && (compare_mode == SFmode || compare_mode == DFmode)
14980 && (result_mode == SFmode || result_mode == DFmode))
14981 {
14982 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14983 return 1;
14984
14985 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14986 return 1;
14987 }
14988
14989 /* Don't allow using floating point comparisons for integer results for
14990 now. */
14991 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14992 return 0;
14993
14994 /* First, work out if the hardware can do this at all, or
14995 if it's too slow.... */
14996 if (!FLOAT_MODE_P (compare_mode))
14997 {
14998 if (TARGET_ISEL)
14999 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15000 return 0;
15001 }
15002
15003 is_against_zero = op1 == CONST0_RTX (compare_mode);
15004
15005 /* A floating-point subtract might overflow, underflow, or produce
15006 an inexact result, thus changing the floating-point flags, so it
15007 can't be generated if we care about that. It's safe if one side
15008 of the construct is zero, since then no subtract will be
15009 generated. */
15010 if (SCALAR_FLOAT_MODE_P (compare_mode)
15011 && flag_trapping_math && ! is_against_zero)
15012 return 0;
15013
15014 /* Eliminate half of the comparisons by switching operands, this
15015 makes the remaining code simpler. */
15016 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15017 || code == LTGT || code == LT || code == UNLE)
15018 {
15019 code = reverse_condition_maybe_unordered (code);
15020 temp = true_cond;
15021 true_cond = false_cond;
15022 false_cond = temp;
15023 }
15024
15025 /* UNEQ and LTGT take four instructions for a comparison with zero,
15026 it'll probably be faster to use a branch here too. */
15027 if (code == UNEQ && HONOR_NANS (compare_mode))
15028 return 0;
15029
15030 /* We're going to try to implement comparisons by performing
15031 a subtract, then comparing against zero. Unfortunately,
15032 Inf - Inf is NaN which is not zero, and so if we don't
15033 know that the operand is finite and the comparison
15034 would treat EQ different to UNORDERED, we can't do it. */
15035 if (HONOR_INFINITIES (compare_mode)
15036 && code != GT && code != UNGE
15037 && (!CONST_DOUBLE_P (op1)
15038 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15039 /* Constructs of the form (a OP b ? a : b) are safe. */
15040 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15041 || (! rtx_equal_p (op0, true_cond)
15042 && ! rtx_equal_p (op1, true_cond))))
15043 return 0;
15044
15045 /* At this point we know we can use fsel. */
15046
15047 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15048 is no fsel instruction. */
15049 if (compare_mode != SFmode && compare_mode != DFmode)
15050 return 0;
15051
15052 /* Reduce the comparison to a comparison against zero. */
15053 if (! is_against_zero)
15054 {
15055 temp = gen_reg_rtx (compare_mode);
15056 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15057 op0 = temp;
15058 op1 = CONST0_RTX (compare_mode);
15059 }
15060
15061 /* If we don't care about NaNs we can reduce some of the comparisons
15062 down to faster ones. */
15063 if (! HONOR_NANS (compare_mode))
15064 switch (code)
15065 {
15066 case GT:
15067 code = LE;
15068 temp = true_cond;
15069 true_cond = false_cond;
15070 false_cond = temp;
15071 break;
15072 case UNGE:
15073 code = GE;
15074 break;
15075 case UNEQ:
15076 code = EQ;
15077 break;
15078 default:
15079 break;
15080 }
15081
15082 /* Now, reduce everything down to a GE. */
15083 switch (code)
15084 {
15085 case GE:
15086 break;
15087
15088 case LE:
15089 temp = gen_reg_rtx (compare_mode);
15090 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15091 op0 = temp;
15092 break;
15093
15094 case ORDERED:
15095 temp = gen_reg_rtx (compare_mode);
15096 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15097 op0 = temp;
15098 break;
15099
15100 case EQ:
15101 temp = gen_reg_rtx (compare_mode);
15102 emit_insn (gen_rtx_SET (temp,
15103 gen_rtx_NEG (compare_mode,
15104 gen_rtx_ABS (compare_mode, op0))));
15105 op0 = temp;
15106 break;
15107
15108 case UNGE:
15109 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15110 temp = gen_reg_rtx (result_mode);
15111 emit_insn (gen_rtx_SET (temp,
15112 gen_rtx_IF_THEN_ELSE (result_mode,
15113 gen_rtx_GE (VOIDmode,
15114 op0, op1),
15115 true_cond, false_cond)));
15116 false_cond = true_cond;
15117 true_cond = temp;
15118
15119 temp = gen_reg_rtx (compare_mode);
15120 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15121 op0 = temp;
15122 break;
15123
15124 case GT:
15125 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15126 temp = gen_reg_rtx (result_mode);
15127 emit_insn (gen_rtx_SET (temp,
15128 gen_rtx_IF_THEN_ELSE (result_mode,
15129 gen_rtx_GE (VOIDmode,
15130 op0, op1),
15131 true_cond, false_cond)));
15132 true_cond = false_cond;
15133 false_cond = temp;
15134
15135 temp = gen_reg_rtx (compare_mode);
15136 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15137 op0 = temp;
15138 break;
15139
15140 default:
15141 gcc_unreachable ();
15142 }
15143
15144 emit_insn (gen_rtx_SET (dest,
15145 gen_rtx_IF_THEN_ELSE (result_mode,
15146 gen_rtx_GE (VOIDmode,
15147 op0, op1),
15148 true_cond, false_cond)));
15149 return 1;
15150 }
15151
15152 /* Same as above, but for ints (isel). */
15153
15154 int
15155 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15156 {
15157 rtx condition_rtx, cr;
15158 machine_mode mode = GET_MODE (dest);
15159 enum rtx_code cond_code;
15160 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15161 bool signedp;
15162
15163 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15164 return 0;
15165
15166 /* We still have to do the compare, because isel doesn't do a
15167 compare, it just looks at the CRx bits set by a previous compare
15168 instruction. */
15169 condition_rtx = rs6000_generate_compare (op, mode);
15170 cond_code = GET_CODE (condition_rtx);
15171 cr = XEXP (condition_rtx, 0);
15172 signedp = GET_MODE (cr) == CCmode;
15173
15174 isel_func = (mode == SImode
15175 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15176 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15177
15178 switch (cond_code)
15179 {
15180 case LT: case GT: case LTU: case GTU: case EQ:
15181 /* isel handles these directly. */
15182 break;
15183
15184 default:
15185 /* We need to swap the sense of the comparison. */
15186 {
15187 std::swap (false_cond, true_cond);
15188 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15189 }
15190 break;
15191 }
15192
15193 false_cond = force_reg (mode, false_cond);
15194 if (true_cond != const0_rtx)
15195 true_cond = force_reg (mode, true_cond);
15196
15197 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15198
15199 return 1;
15200 }
15201
15202 void
15203 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15204 {
15205 machine_mode mode = GET_MODE (op0);
15206 enum rtx_code c;
15207 rtx target;
15208
15209 /* VSX/altivec have direct min/max insns. */
15210 if ((code == SMAX || code == SMIN)
15211 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15212 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15213 {
15214 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15215 return;
15216 }
15217
15218 if (code == SMAX || code == SMIN)
15219 c = GE;
15220 else
15221 c = GEU;
15222
15223 if (code == SMAX || code == UMAX)
15224 target = emit_conditional_move (dest, c, op0, op1, mode,
15225 op0, op1, mode, 0);
15226 else
15227 target = emit_conditional_move (dest, c, op0, op1, mode,
15228 op1, op0, mode, 0);
15229 gcc_assert (target);
15230 if (target != dest)
15231 emit_move_insn (dest, target);
15232 }
15233
15234 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15235 COND is true. Mark the jump as unlikely to be taken. */
15236
15237 static void
15238 emit_unlikely_jump (rtx cond, rtx label)
15239 {
15240 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15241 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15242 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15243 }
15244
15245 /* A subroutine of the atomic operation splitters. Emit a load-locked
15246 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15247 the zero_extend operation. */
15248
15249 static void
15250 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15251 {
15252 rtx (*fn) (rtx, rtx) = NULL;
15253
15254 switch (mode)
15255 {
15256 case E_QImode:
15257 fn = gen_load_lockedqi;
15258 break;
15259 case E_HImode:
15260 fn = gen_load_lockedhi;
15261 break;
15262 case E_SImode:
15263 if (GET_MODE (mem) == QImode)
15264 fn = gen_load_lockedqi_si;
15265 else if (GET_MODE (mem) == HImode)
15266 fn = gen_load_lockedhi_si;
15267 else
15268 fn = gen_load_lockedsi;
15269 break;
15270 case E_DImode:
15271 fn = gen_load_lockeddi;
15272 break;
15273 case E_TImode:
15274 fn = gen_load_lockedti;
15275 break;
15276 default:
15277 gcc_unreachable ();
15278 }
15279 emit_insn (fn (reg, mem));
15280 }
15281
15282 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15283 instruction in MODE. */
15284
15285 static void
15286 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15287 {
15288 rtx (*fn) (rtx, rtx, rtx) = NULL;
15289
15290 switch (mode)
15291 {
15292 case E_QImode:
15293 fn = gen_store_conditionalqi;
15294 break;
15295 case E_HImode:
15296 fn = gen_store_conditionalhi;
15297 break;
15298 case E_SImode:
15299 fn = gen_store_conditionalsi;
15300 break;
15301 case E_DImode:
15302 fn = gen_store_conditionaldi;
15303 break;
15304 case E_TImode:
15305 fn = gen_store_conditionalti;
15306 break;
15307 default:
15308 gcc_unreachable ();
15309 }
15310
15311 /* Emit sync before stwcx. to address PPC405 Erratum. */
15312 if (PPC405_ERRATUM77)
15313 emit_insn (gen_hwsync ());
15314
15315 emit_insn (fn (res, mem, val));
15316 }
15317
15318 /* Expand barriers before and after a load_locked/store_cond sequence. */
15319
15320 static rtx
15321 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15322 {
15323 rtx addr = XEXP (mem, 0);
15324
15325 if (!legitimate_indirect_address_p (addr, reload_completed)
15326 && !legitimate_indexed_address_p (addr, reload_completed))
15327 {
15328 addr = force_reg (Pmode, addr);
15329 mem = replace_equiv_address_nv (mem, addr);
15330 }
15331
15332 switch (model)
15333 {
15334 case MEMMODEL_RELAXED:
15335 case MEMMODEL_CONSUME:
15336 case MEMMODEL_ACQUIRE:
15337 break;
15338 case MEMMODEL_RELEASE:
15339 case MEMMODEL_ACQ_REL:
15340 emit_insn (gen_lwsync ());
15341 break;
15342 case MEMMODEL_SEQ_CST:
15343 emit_insn (gen_hwsync ());
15344 break;
15345 default:
15346 gcc_unreachable ();
15347 }
15348 return mem;
15349 }
15350
15351 static void
15352 rs6000_post_atomic_barrier (enum memmodel model)
15353 {
15354 switch (model)
15355 {
15356 case MEMMODEL_RELAXED:
15357 case MEMMODEL_CONSUME:
15358 case MEMMODEL_RELEASE:
15359 break;
15360 case MEMMODEL_ACQUIRE:
15361 case MEMMODEL_ACQ_REL:
15362 case MEMMODEL_SEQ_CST:
15363 emit_insn (gen_isync ());
15364 break;
15365 default:
15366 gcc_unreachable ();
15367 }
15368 }
15369
15370 /* A subroutine of the various atomic expanders. For sub-word operations,
15371 we must adjust things to operate on SImode. Given the original MEM,
15372 return a new aligned memory. Also build and return the quantities by
15373 which to shift and mask. */
15374
15375 static rtx
15376 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15377 {
15378 rtx addr, align, shift, mask, mem;
15379 HOST_WIDE_INT shift_mask;
15380 machine_mode mode = GET_MODE (orig_mem);
15381
15382 /* For smaller modes, we have to implement this via SImode. */
15383 shift_mask = (mode == QImode ? 0x18 : 0x10);
15384
15385 addr = XEXP (orig_mem, 0);
15386 addr = force_reg (GET_MODE (addr), addr);
15387
15388 /* Aligned memory containing subword. Generate a new memory. We
15389 do not want any of the existing MEM_ATTR data, as we're now
15390 accessing memory outside the original object. */
15391 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15392 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15393 mem = gen_rtx_MEM (SImode, align);
15394 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15395 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15396 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15397
15398 /* Shift amount for subword relative to aligned word. */
15399 shift = gen_reg_rtx (SImode);
15400 addr = gen_lowpart (SImode, addr);
15401 rtx tmp = gen_reg_rtx (SImode);
15402 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15403 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15404 if (BYTES_BIG_ENDIAN)
15405 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15406 shift, 1, OPTAB_LIB_WIDEN);
15407 *pshift = shift;
15408
15409 /* Mask for insertion. */
15410 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15411 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15412 *pmask = mask;
15413
15414 return mem;
15415 }
15416
15417 /* A subroutine of the various atomic expanders. For sub-word operands,
15418 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15419
15420 static rtx
15421 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15422 {
15423 rtx x;
15424
15425 x = gen_reg_rtx (SImode);
15426 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15427 gen_rtx_NOT (SImode, mask),
15428 oldval)));
15429
15430 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15431
15432 return x;
15433 }
15434
15435 /* A subroutine of the various atomic expanders. For sub-word operands,
15436 extract WIDE to NARROW via SHIFT. */
15437
15438 static void
15439 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15440 {
15441 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15442 wide, 1, OPTAB_LIB_WIDEN);
15443 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15444 }
15445
15446 /* Expand an atomic compare and swap operation. */
15447
15448 void
15449 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15450 {
15451 rtx boolval, retval, mem, oldval, newval, cond;
15452 rtx label1, label2, x, mask, shift;
15453 machine_mode mode, orig_mode;
15454 enum memmodel mod_s, mod_f;
15455 bool is_weak;
15456
15457 boolval = operands[0];
15458 retval = operands[1];
15459 mem = operands[2];
15460 oldval = operands[3];
15461 newval = operands[4];
15462 is_weak = (INTVAL (operands[5]) != 0);
15463 mod_s = memmodel_base (INTVAL (operands[6]));
15464 mod_f = memmodel_base (INTVAL (operands[7]));
15465 orig_mode = mode = GET_MODE (mem);
15466
15467 mask = shift = NULL_RTX;
15468 if (mode == QImode || mode == HImode)
15469 {
15470 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15471 lwarx and shift/mask operations. With power8, we need to do the
15472 comparison in SImode, but the store is still done in QI/HImode. */
15473 oldval = convert_modes (SImode, mode, oldval, 1);
15474
15475 if (!TARGET_SYNC_HI_QI)
15476 {
15477 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15478
15479 /* Shift and mask OLDVAL into position with the word. */
15480 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15481 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15482
15483 /* Shift and mask NEWVAL into position within the word. */
15484 newval = convert_modes (SImode, mode, newval, 1);
15485 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15486 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15487 }
15488
15489 /* Prepare to adjust the return value. */
15490 retval = gen_reg_rtx (SImode);
15491 mode = SImode;
15492 }
15493 else if (reg_overlap_mentioned_p (retval, oldval))
15494 oldval = copy_to_reg (oldval);
15495
15496 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15497 oldval = copy_to_mode_reg (mode, oldval);
15498
15499 if (reg_overlap_mentioned_p (retval, newval))
15500 newval = copy_to_reg (newval);
15501
15502 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15503
15504 label1 = NULL_RTX;
15505 if (!is_weak)
15506 {
15507 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15508 emit_label (XEXP (label1, 0));
15509 }
15510 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15511
15512 emit_load_locked (mode, retval, mem);
15513
15514 x = retval;
15515 if (mask)
15516 x = expand_simple_binop (SImode, AND, retval, mask,
15517 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15518
15519 cond = gen_reg_rtx (CCmode);
15520 /* If we have TImode, synthesize a comparison. */
15521 if (mode != TImode)
15522 x = gen_rtx_COMPARE (CCmode, x, oldval);
15523 else
15524 {
15525 rtx xor1_result = gen_reg_rtx (DImode);
15526 rtx xor2_result = gen_reg_rtx (DImode);
15527 rtx or_result = gen_reg_rtx (DImode);
15528 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15529 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15530 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15531 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15532
15533 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15534 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15535 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15536 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15537 }
15538
15539 emit_insn (gen_rtx_SET (cond, x));
15540
15541 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15542 emit_unlikely_jump (x, label2);
15543
15544 x = newval;
15545 if (mask)
15546 x = rs6000_mask_atomic_subword (retval, newval, mask);
15547
15548 emit_store_conditional (orig_mode, cond, mem, x);
15549
15550 if (!is_weak)
15551 {
15552 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15553 emit_unlikely_jump (x, label1);
15554 }
15555
15556 if (!is_mm_relaxed (mod_f))
15557 emit_label (XEXP (label2, 0));
15558
15559 rs6000_post_atomic_barrier (mod_s);
15560
15561 if (is_mm_relaxed (mod_f))
15562 emit_label (XEXP (label2, 0));
15563
15564 if (shift)
15565 rs6000_finish_atomic_subword (operands[1], retval, shift);
15566 else if (mode != GET_MODE (operands[1]))
15567 convert_move (operands[1], retval, 1);
15568
15569 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15570 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15571 emit_insn (gen_rtx_SET (boolval, x));
15572 }
15573
15574 /* Expand an atomic exchange operation. */
15575
15576 void
15577 rs6000_expand_atomic_exchange (rtx operands[])
15578 {
15579 rtx retval, mem, val, cond;
15580 machine_mode mode;
15581 enum memmodel model;
15582 rtx label, x, mask, shift;
15583
15584 retval = operands[0];
15585 mem = operands[1];
15586 val = operands[2];
15587 model = memmodel_base (INTVAL (operands[3]));
15588 mode = GET_MODE (mem);
15589
15590 mask = shift = NULL_RTX;
15591 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15592 {
15593 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15594
15595 /* Shift and mask VAL into position with the word. */
15596 val = convert_modes (SImode, mode, val, 1);
15597 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15598 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15599
15600 /* Prepare to adjust the return value. */
15601 retval = gen_reg_rtx (SImode);
15602 mode = SImode;
15603 }
15604
15605 mem = rs6000_pre_atomic_barrier (mem, model);
15606
15607 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15608 emit_label (XEXP (label, 0));
15609
15610 emit_load_locked (mode, retval, mem);
15611
15612 x = val;
15613 if (mask)
15614 x = rs6000_mask_atomic_subword (retval, val, mask);
15615
15616 cond = gen_reg_rtx (CCmode);
15617 emit_store_conditional (mode, cond, mem, x);
15618
15619 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15620 emit_unlikely_jump (x, label);
15621
15622 rs6000_post_atomic_barrier (model);
15623
15624 if (shift)
15625 rs6000_finish_atomic_subword (operands[0], retval, shift);
15626 }
15627
15628 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15629 to perform. MEM is the memory on which to operate. VAL is the second
15630 operand of the binary operator. BEFORE and AFTER are optional locations to
15631 return the value of MEM either before of after the operation. MODEL_RTX
15632 is a CONST_INT containing the memory model to use. */
15633
15634 void
15635 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15636 rtx orig_before, rtx orig_after, rtx model_rtx)
15637 {
15638 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15639 machine_mode mode = GET_MODE (mem);
15640 machine_mode store_mode = mode;
15641 rtx label, x, cond, mask, shift;
15642 rtx before = orig_before, after = orig_after;
15643
15644 mask = shift = NULL_RTX;
15645 /* On power8, we want to use SImode for the operation. On previous systems,
15646 use the operation in a subword and shift/mask to get the proper byte or
15647 halfword. */
15648 if (mode == QImode || mode == HImode)
15649 {
15650 if (TARGET_SYNC_HI_QI)
15651 {
15652 val = convert_modes (SImode, mode, val, 1);
15653
15654 /* Prepare to adjust the return value. */
15655 before = gen_reg_rtx (SImode);
15656 if (after)
15657 after = gen_reg_rtx (SImode);
15658 mode = SImode;
15659 }
15660 else
15661 {
15662 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15663
15664 /* Shift and mask VAL into position with the word. */
15665 val = convert_modes (SImode, mode, val, 1);
15666 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15667 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15668
15669 switch (code)
15670 {
15671 case IOR:
15672 case XOR:
15673 /* We've already zero-extended VAL. That is sufficient to
15674 make certain that it does not affect other bits. */
15675 mask = NULL;
15676 break;
15677
15678 case AND:
15679 /* If we make certain that all of the other bits in VAL are
15680 set, that will be sufficient to not affect other bits. */
15681 x = gen_rtx_NOT (SImode, mask);
15682 x = gen_rtx_IOR (SImode, x, val);
15683 emit_insn (gen_rtx_SET (val, x));
15684 mask = NULL;
15685 break;
15686
15687 case NOT:
15688 case PLUS:
15689 case MINUS:
15690 /* These will all affect bits outside the field and need
15691 adjustment via MASK within the loop. */
15692 break;
15693
15694 default:
15695 gcc_unreachable ();
15696 }
15697
15698 /* Prepare to adjust the return value. */
15699 before = gen_reg_rtx (SImode);
15700 if (after)
15701 after = gen_reg_rtx (SImode);
15702 store_mode = mode = SImode;
15703 }
15704 }
15705
15706 mem = rs6000_pre_atomic_barrier (mem, model);
15707
15708 label = gen_label_rtx ();
15709 emit_label (label);
15710 label = gen_rtx_LABEL_REF (VOIDmode, label);
15711
15712 if (before == NULL_RTX)
15713 before = gen_reg_rtx (mode);
15714
15715 emit_load_locked (mode, before, mem);
15716
15717 if (code == NOT)
15718 {
15719 x = expand_simple_binop (mode, AND, before, val,
15720 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15721 after = expand_simple_unop (mode, NOT, x, after, 1);
15722 }
15723 else
15724 {
15725 after = expand_simple_binop (mode, code, before, val,
15726 after, 1, OPTAB_LIB_WIDEN);
15727 }
15728
15729 x = after;
15730 if (mask)
15731 {
15732 x = expand_simple_binop (SImode, AND, after, mask,
15733 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15734 x = rs6000_mask_atomic_subword (before, x, mask);
15735 }
15736 else if (store_mode != mode)
15737 x = convert_modes (store_mode, mode, x, 1);
15738
15739 cond = gen_reg_rtx (CCmode);
15740 emit_store_conditional (store_mode, cond, mem, x);
15741
15742 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15743 emit_unlikely_jump (x, label);
15744
15745 rs6000_post_atomic_barrier (model);
15746
15747 if (shift)
15748 {
15749 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15750 then do the calcuations in a SImode register. */
15751 if (orig_before)
15752 rs6000_finish_atomic_subword (orig_before, before, shift);
15753 if (orig_after)
15754 rs6000_finish_atomic_subword (orig_after, after, shift);
15755 }
15756 else if (store_mode != mode)
15757 {
15758 /* QImode/HImode on machines with lbarx/lharx where we do the native
15759 operation and then do the calcuations in a SImode register. */
15760 if (orig_before)
15761 convert_move (orig_before, before, 1);
15762 if (orig_after)
15763 convert_move (orig_after, after, 1);
15764 }
15765 else if (orig_after && after != orig_after)
15766 emit_move_insn (orig_after, after);
15767 }
15768
15769 /* Emit instructions to move SRC to DST. Called by splitters for
15770 multi-register moves. It will emit at most one instruction for
15771 each register that is accessed; that is, it won't emit li/lis pairs
15772 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15773 register. */
15774
15775 void
15776 rs6000_split_multireg_move (rtx dst, rtx src)
15777 {
15778 /* The register number of the first register being moved. */
15779 int reg;
15780 /* The mode that is to be moved. */
15781 machine_mode mode;
15782 /* The mode that the move is being done in, and its size. */
15783 machine_mode reg_mode;
15784 int reg_mode_size;
15785 /* The number of registers that will be moved. */
15786 int nregs;
15787
15788 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15789 mode = GET_MODE (dst);
15790 nregs = hard_regno_nregs (reg, mode);
15791 if (FP_REGNO_P (reg))
15792 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15793 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15794 else if (ALTIVEC_REGNO_P (reg))
15795 reg_mode = V16QImode;
15796 else
15797 reg_mode = word_mode;
15798 reg_mode_size = GET_MODE_SIZE (reg_mode);
15799
15800 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15801
15802 /* TDmode residing in FP registers is special, since the ISA requires that
15803 the lower-numbered word of a register pair is always the most significant
15804 word, even in little-endian mode. This does not match the usual subreg
15805 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15806 the appropriate constituent registers "by hand" in little-endian mode.
15807
15808 Note we do not need to check for destructive overlap here since TDmode
15809 can only reside in even/odd register pairs. */
15810 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15811 {
15812 rtx p_src, p_dst;
15813 int i;
15814
15815 for (i = 0; i < nregs; i++)
15816 {
15817 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15818 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15819 else
15820 p_src = simplify_gen_subreg (reg_mode, src, mode,
15821 i * reg_mode_size);
15822
15823 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15824 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15825 else
15826 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15827 i * reg_mode_size);
15828
15829 emit_insn (gen_rtx_SET (p_dst, p_src));
15830 }
15831
15832 return;
15833 }
15834
15835 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15836 {
15837 /* Move register range backwards, if we might have destructive
15838 overlap. */
15839 int i;
15840 for (i = nregs - 1; i >= 0; i--)
15841 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15842 i * reg_mode_size),
15843 simplify_gen_subreg (reg_mode, src, mode,
15844 i * reg_mode_size)));
15845 }
15846 else
15847 {
15848 int i;
15849 int j = -1;
15850 bool used_update = false;
15851 rtx restore_basereg = NULL_RTX;
15852
15853 if (MEM_P (src) && INT_REGNO_P (reg))
15854 {
15855 rtx breg;
15856
15857 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15858 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15859 {
15860 rtx delta_rtx;
15861 breg = XEXP (XEXP (src, 0), 0);
15862 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15863 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15864 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15865 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15866 src = replace_equiv_address (src, breg);
15867 }
15868 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15869 {
15870 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15871 {
15872 rtx basereg = XEXP (XEXP (src, 0), 0);
15873 if (TARGET_UPDATE)
15874 {
15875 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15876 emit_insn (gen_rtx_SET (ndst,
15877 gen_rtx_MEM (reg_mode,
15878 XEXP (src, 0))));
15879 used_update = true;
15880 }
15881 else
15882 emit_insn (gen_rtx_SET (basereg,
15883 XEXP (XEXP (src, 0), 1)));
15884 src = replace_equiv_address (src, basereg);
15885 }
15886 else
15887 {
15888 rtx basereg = gen_rtx_REG (Pmode, reg);
15889 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15890 src = replace_equiv_address (src, basereg);
15891 }
15892 }
15893
15894 breg = XEXP (src, 0);
15895 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15896 breg = XEXP (breg, 0);
15897
15898 /* If the base register we are using to address memory is
15899 also a destination reg, then change that register last. */
15900 if (REG_P (breg)
15901 && REGNO (breg) >= REGNO (dst)
15902 && REGNO (breg) < REGNO (dst) + nregs)
15903 j = REGNO (breg) - REGNO (dst);
15904 }
15905 else if (MEM_P (dst) && INT_REGNO_P (reg))
15906 {
15907 rtx breg;
15908
15909 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15910 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15911 {
15912 rtx delta_rtx;
15913 breg = XEXP (XEXP (dst, 0), 0);
15914 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15915 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15916 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15917
15918 /* We have to update the breg before doing the store.
15919 Use store with update, if available. */
15920
15921 if (TARGET_UPDATE)
15922 {
15923 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15924 emit_insn (TARGET_32BIT
15925 ? (TARGET_POWERPC64
15926 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15927 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15928 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15929 used_update = true;
15930 }
15931 else
15932 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15933 dst = replace_equiv_address (dst, breg);
15934 }
15935 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15936 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15937 {
15938 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15939 {
15940 rtx basereg = XEXP (XEXP (dst, 0), 0);
15941 if (TARGET_UPDATE)
15942 {
15943 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15944 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15945 XEXP (dst, 0)),
15946 nsrc));
15947 used_update = true;
15948 }
15949 else
15950 emit_insn (gen_rtx_SET (basereg,
15951 XEXP (XEXP (dst, 0), 1)));
15952 dst = replace_equiv_address (dst, basereg);
15953 }
15954 else
15955 {
15956 rtx basereg = XEXP (XEXP (dst, 0), 0);
15957 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15958 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15959 && REG_P (basereg)
15960 && REG_P (offsetreg)
15961 && REGNO (basereg) != REGNO (offsetreg));
15962 if (REGNO (basereg) == 0)
15963 {
15964 rtx tmp = offsetreg;
15965 offsetreg = basereg;
15966 basereg = tmp;
15967 }
15968 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15969 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15970 dst = replace_equiv_address (dst, basereg);
15971 }
15972 }
15973 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15974 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15975 }
15976
15977 for (i = 0; i < nregs; i++)
15978 {
15979 /* Calculate index to next subword. */
15980 ++j;
15981 if (j == nregs)
15982 j = 0;
15983
15984 /* If compiler already emitted move of first word by
15985 store with update, no need to do anything. */
15986 if (j == 0 && used_update)
15987 continue;
15988
15989 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15990 j * reg_mode_size),
15991 simplify_gen_subreg (reg_mode, src, mode,
15992 j * reg_mode_size)));
15993 }
15994 if (restore_basereg != NULL_RTX)
15995 emit_insn (restore_basereg);
15996 }
15997 }
15998
15999 static GTY(()) alias_set_type TOC_alias_set = -1;
16000
16001 alias_set_type
16002 get_TOC_alias_set (void)
16003 {
16004 if (TOC_alias_set == -1)
16005 TOC_alias_set = new_alias_set ();
16006 return TOC_alias_set;
16007 }
16008
16009 /* The mode the ABI uses for a word. This is not the same as word_mode
16010 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16011
16012 static scalar_int_mode
16013 rs6000_abi_word_mode (void)
16014 {
16015 return TARGET_32BIT ? SImode : DImode;
16016 }
16017
16018 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16019 static char *
16020 rs6000_offload_options (void)
16021 {
16022 if (TARGET_64BIT)
16023 return xstrdup ("-foffload-abi=lp64");
16024 else
16025 return xstrdup ("-foffload-abi=ilp32");
16026 }
16027
16028 \f
16029 /* A quick summary of the various types of 'constant-pool tables'
16030 under PowerPC:
16031
16032 Target Flags Name One table per
16033 AIX (none) AIX TOC object file
16034 AIX -mfull-toc AIX TOC object file
16035 AIX -mminimal-toc AIX minimal TOC translation unit
16036 SVR4/EABI (none) SVR4 SDATA object file
16037 SVR4/EABI -fpic SVR4 pic object file
16038 SVR4/EABI -fPIC SVR4 PIC translation unit
16039 SVR4/EABI -mrelocatable EABI TOC function
16040 SVR4/EABI -maix AIX TOC object file
16041 SVR4/EABI -maix -mminimal-toc
16042 AIX minimal TOC translation unit
16043
16044 Name Reg. Set by entries contains:
16045 made by addrs? fp? sum?
16046
16047 AIX TOC 2 crt0 as Y option option
16048 AIX minimal TOC 30 prolog gcc Y Y option
16049 SVR4 SDATA 13 crt0 gcc N Y N
16050 SVR4 pic 30 prolog ld Y not yet N
16051 SVR4 PIC 30 prolog gcc Y option option
16052 EABI TOC 30 prolog gcc Y option option
16053
16054 */
16055
16056 /* Hash functions for the hash table. */
16057
16058 static unsigned
16059 rs6000_hash_constant (rtx k)
16060 {
16061 enum rtx_code code = GET_CODE (k);
16062 machine_mode mode = GET_MODE (k);
16063 unsigned result = (code << 3) ^ mode;
16064 const char *format;
16065 int flen, fidx;
16066
16067 format = GET_RTX_FORMAT (code);
16068 flen = strlen (format);
16069 fidx = 0;
16070
16071 switch (code)
16072 {
16073 case LABEL_REF:
16074 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16075
16076 case CONST_WIDE_INT:
16077 {
16078 int i;
16079 flen = CONST_WIDE_INT_NUNITS (k);
16080 for (i = 0; i < flen; i++)
16081 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16082 return result;
16083 }
16084
16085 case CONST_DOUBLE:
16086 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16087
16088 case CODE_LABEL:
16089 fidx = 3;
16090 break;
16091
16092 default:
16093 break;
16094 }
16095
16096 for (; fidx < flen; fidx++)
16097 switch (format[fidx])
16098 {
16099 case 's':
16100 {
16101 unsigned i, len;
16102 const char *str = XSTR (k, fidx);
16103 len = strlen (str);
16104 result = result * 613 + len;
16105 for (i = 0; i < len; i++)
16106 result = result * 613 + (unsigned) str[i];
16107 break;
16108 }
16109 case 'u':
16110 case 'e':
16111 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16112 break;
16113 case 'i':
16114 case 'n':
16115 result = result * 613 + (unsigned) XINT (k, fidx);
16116 break;
16117 case 'w':
16118 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16119 result = result * 613 + (unsigned) XWINT (k, fidx);
16120 else
16121 {
16122 size_t i;
16123 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16124 result = result * 613 + (unsigned) (XWINT (k, fidx)
16125 >> CHAR_BIT * i);
16126 }
16127 break;
16128 case '0':
16129 break;
16130 default:
16131 gcc_unreachable ();
16132 }
16133
16134 return result;
16135 }
16136
16137 hashval_t
16138 toc_hasher::hash (toc_hash_struct *thc)
16139 {
16140 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16141 }
16142
16143 /* Compare H1 and H2 for equivalence. */
16144
16145 bool
16146 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16147 {
16148 rtx r1 = h1->key;
16149 rtx r2 = h2->key;
16150
16151 if (h1->key_mode != h2->key_mode)
16152 return 0;
16153
16154 return rtx_equal_p (r1, r2);
16155 }
16156
16157 /* These are the names given by the C++ front-end to vtables, and
16158 vtable-like objects. Ideally, this logic should not be here;
16159 instead, there should be some programmatic way of inquiring as
16160 to whether or not an object is a vtable. */
16161
16162 #define VTABLE_NAME_P(NAME) \
16163 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16164 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16165 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16166 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16167 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16168
16169 #ifdef NO_DOLLAR_IN_LABEL
16170 /* Return a GGC-allocated character string translating dollar signs in
16171 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16172
16173 const char *
16174 rs6000_xcoff_strip_dollar (const char *name)
16175 {
16176 char *strip, *p;
16177 const char *q;
16178 size_t len;
16179
16180 q = (const char *) strchr (name, '$');
16181
16182 if (q == 0 || q == name)
16183 return name;
16184
16185 len = strlen (name);
16186 strip = XALLOCAVEC (char, len + 1);
16187 strcpy (strip, name);
16188 p = strip + (q - name);
16189 while (p)
16190 {
16191 *p = '_';
16192 p = strchr (p + 1, '$');
16193 }
16194
16195 return ggc_alloc_string (strip, len);
16196 }
16197 #endif
16198
16199 void
16200 rs6000_output_symbol_ref (FILE *file, rtx x)
16201 {
16202 const char *name = XSTR (x, 0);
16203
16204 /* Currently C++ toc references to vtables can be emitted before it
16205 is decided whether the vtable is public or private. If this is
16206 the case, then the linker will eventually complain that there is
16207 a reference to an unknown section. Thus, for vtables only,
16208 we emit the TOC reference to reference the identifier and not the
16209 symbol. */
16210 if (VTABLE_NAME_P (name))
16211 {
16212 RS6000_OUTPUT_BASENAME (file, name);
16213 }
16214 else
16215 assemble_name (file, name);
16216 }
16217
16218 /* Output a TOC entry. We derive the entry name from what is being
16219 written. */
16220
16221 void
16222 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16223 {
16224 char buf[256];
16225 const char *name = buf;
16226 rtx base = x;
16227 HOST_WIDE_INT offset = 0;
16228
16229 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16230
16231 /* When the linker won't eliminate them, don't output duplicate
16232 TOC entries (this happens on AIX if there is any kind of TOC,
16233 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16234 CODE_LABELs. */
16235 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16236 {
16237 struct toc_hash_struct *h;
16238
16239 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16240 time because GGC is not initialized at that point. */
16241 if (toc_hash_table == NULL)
16242 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16243
16244 h = ggc_alloc<toc_hash_struct> ();
16245 h->key = x;
16246 h->key_mode = mode;
16247 h->labelno = labelno;
16248
16249 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16250 if (*found == NULL)
16251 *found = h;
16252 else /* This is indeed a duplicate.
16253 Set this label equal to that label. */
16254 {
16255 fputs ("\t.set ", file);
16256 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16257 fprintf (file, "%d,", labelno);
16258 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16259 fprintf (file, "%d\n", ((*found)->labelno));
16260
16261 #ifdef HAVE_AS_TLS
16262 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16263 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16264 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16265 {
16266 fputs ("\t.set ", file);
16267 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16268 fprintf (file, "%d,", labelno);
16269 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16270 fprintf (file, "%d\n", ((*found)->labelno));
16271 }
16272 #endif
16273 return;
16274 }
16275 }
16276
16277 /* If we're going to put a double constant in the TOC, make sure it's
16278 aligned properly when strict alignment is on. */
16279 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16280 && STRICT_ALIGNMENT
16281 && GET_MODE_BITSIZE (mode) >= 64
16282 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16283 ASM_OUTPUT_ALIGN (file, 3);
16284 }
16285
16286 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16287
16288 /* Handle FP constants specially. Note that if we have a minimal
16289 TOC, things we put here aren't actually in the TOC, so we can allow
16290 FP constants. */
16291 if (CONST_DOUBLE_P (x)
16292 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16293 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16294 {
16295 long k[4];
16296
16297 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16298 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16299 else
16300 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16301
16302 if (TARGET_64BIT)
16303 {
16304 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16305 fputs (DOUBLE_INT_ASM_OP, file);
16306 else
16307 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16308 k[0] & 0xffffffff, k[1] & 0xffffffff,
16309 k[2] & 0xffffffff, k[3] & 0xffffffff);
16310 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16311 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16312 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16313 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16314 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16315 return;
16316 }
16317 else
16318 {
16319 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16320 fputs ("\t.long ", file);
16321 else
16322 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16323 k[0] & 0xffffffff, k[1] & 0xffffffff,
16324 k[2] & 0xffffffff, k[3] & 0xffffffff);
16325 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16326 k[0] & 0xffffffff, k[1] & 0xffffffff,
16327 k[2] & 0xffffffff, k[3] & 0xffffffff);
16328 return;
16329 }
16330 }
16331 else if (CONST_DOUBLE_P (x)
16332 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16333 {
16334 long k[2];
16335
16336 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16337 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16338 else
16339 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16340
16341 if (TARGET_64BIT)
16342 {
16343 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16344 fputs (DOUBLE_INT_ASM_OP, file);
16345 else
16346 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16347 k[0] & 0xffffffff, k[1] & 0xffffffff);
16348 fprintf (file, "0x%lx%08lx\n",
16349 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16350 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16351 return;
16352 }
16353 else
16354 {
16355 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16356 fputs ("\t.long ", file);
16357 else
16358 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16359 k[0] & 0xffffffff, k[1] & 0xffffffff);
16360 fprintf (file, "0x%lx,0x%lx\n",
16361 k[0] & 0xffffffff, k[1] & 0xffffffff);
16362 return;
16363 }
16364 }
16365 else if (CONST_DOUBLE_P (x)
16366 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16367 {
16368 long l;
16369
16370 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16371 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16372 else
16373 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16374
16375 if (TARGET_64BIT)
16376 {
16377 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16378 fputs (DOUBLE_INT_ASM_OP, file);
16379 else
16380 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16381 if (WORDS_BIG_ENDIAN)
16382 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16383 else
16384 fprintf (file, "0x%lx\n", l & 0xffffffff);
16385 return;
16386 }
16387 else
16388 {
16389 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16390 fputs ("\t.long ", file);
16391 else
16392 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16393 fprintf (file, "0x%lx\n", l & 0xffffffff);
16394 return;
16395 }
16396 }
16397 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16398 {
16399 unsigned HOST_WIDE_INT low;
16400 HOST_WIDE_INT high;
16401
16402 low = INTVAL (x) & 0xffffffff;
16403 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16404
16405 /* TOC entries are always Pmode-sized, so when big-endian
16406 smaller integer constants in the TOC need to be padded.
16407 (This is still a win over putting the constants in
16408 a separate constant pool, because then we'd have
16409 to have both a TOC entry _and_ the actual constant.)
16410
16411 For a 32-bit target, CONST_INT values are loaded and shifted
16412 entirely within `low' and can be stored in one TOC entry. */
16413
16414 /* It would be easy to make this work, but it doesn't now. */
16415 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16416
16417 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16418 {
16419 low |= high << 32;
16420 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16421 high = (HOST_WIDE_INT) low >> 32;
16422 low &= 0xffffffff;
16423 }
16424
16425 if (TARGET_64BIT)
16426 {
16427 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16428 fputs (DOUBLE_INT_ASM_OP, file);
16429 else
16430 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16431 (long) high & 0xffffffff, (long) low & 0xffffffff);
16432 fprintf (file, "0x%lx%08lx\n",
16433 (long) high & 0xffffffff, (long) low & 0xffffffff);
16434 return;
16435 }
16436 else
16437 {
16438 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16439 {
16440 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16441 fputs ("\t.long ", file);
16442 else
16443 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16444 (long) high & 0xffffffff, (long) low & 0xffffffff);
16445 fprintf (file, "0x%lx,0x%lx\n",
16446 (long) high & 0xffffffff, (long) low & 0xffffffff);
16447 }
16448 else
16449 {
16450 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16451 fputs ("\t.long ", file);
16452 else
16453 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16454 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16455 }
16456 return;
16457 }
16458 }
16459
16460 if (GET_CODE (x) == CONST)
16461 {
16462 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16463 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16464
16465 base = XEXP (XEXP (x, 0), 0);
16466 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16467 }
16468
16469 switch (GET_CODE (base))
16470 {
16471 case SYMBOL_REF:
16472 name = XSTR (base, 0);
16473 break;
16474
16475 case LABEL_REF:
16476 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16477 CODE_LABEL_NUMBER (XEXP (base, 0)));
16478 break;
16479
16480 case CODE_LABEL:
16481 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16482 break;
16483
16484 default:
16485 gcc_unreachable ();
16486 }
16487
16488 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16489 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16490 else
16491 {
16492 fputs ("\t.tc ", file);
16493 RS6000_OUTPUT_BASENAME (file, name);
16494
16495 if (offset < 0)
16496 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16497 else if (offset)
16498 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16499
16500 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16501 after other TOC symbols, reducing overflow of small TOC access
16502 to [TC] symbols. */
16503 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16504 ? "[TE]," : "[TC],", file);
16505 }
16506
16507 /* Currently C++ toc references to vtables can be emitted before it
16508 is decided whether the vtable is public or private. If this is
16509 the case, then the linker will eventually complain that there is
16510 a TOC reference to an unknown section. Thus, for vtables only,
16511 we emit the TOC reference to reference the symbol and not the
16512 section. */
16513 if (VTABLE_NAME_P (name))
16514 {
16515 RS6000_OUTPUT_BASENAME (file, name);
16516 if (offset < 0)
16517 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16518 else if (offset > 0)
16519 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16520 }
16521 else
16522 output_addr_const (file, x);
16523
16524 #if HAVE_AS_TLS
16525 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16526 {
16527 switch (SYMBOL_REF_TLS_MODEL (base))
16528 {
16529 case 0:
16530 break;
16531 case TLS_MODEL_LOCAL_EXEC:
16532 fputs ("@le", file);
16533 break;
16534 case TLS_MODEL_INITIAL_EXEC:
16535 fputs ("@ie", file);
16536 break;
16537 /* Use global-dynamic for local-dynamic. */
16538 case TLS_MODEL_GLOBAL_DYNAMIC:
16539 case TLS_MODEL_LOCAL_DYNAMIC:
16540 putc ('\n', file);
16541 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16542 fputs ("\t.tc .", file);
16543 RS6000_OUTPUT_BASENAME (file, name);
16544 fputs ("[TC],", file);
16545 output_addr_const (file, x);
16546 fputs ("@m", file);
16547 break;
16548 default:
16549 gcc_unreachable ();
16550 }
16551 }
16552 #endif
16553
16554 putc ('\n', file);
16555 }
16556 \f
16557 /* Output an assembler pseudo-op to write an ASCII string of N characters
16558 starting at P to FILE.
16559
16560 On the RS/6000, we have to do this using the .byte operation and
16561 write out special characters outside the quoted string.
16562 Also, the assembler is broken; very long strings are truncated,
16563 so we must artificially break them up early. */
16564
16565 void
16566 output_ascii (FILE *file, const char *p, int n)
16567 {
16568 char c;
16569 int i, count_string;
16570 const char *for_string = "\t.byte \"";
16571 const char *for_decimal = "\t.byte ";
16572 const char *to_close = NULL;
16573
16574 count_string = 0;
16575 for (i = 0; i < n; i++)
16576 {
16577 c = *p++;
16578 if (c >= ' ' && c < 0177)
16579 {
16580 if (for_string)
16581 fputs (for_string, file);
16582 putc (c, file);
16583
16584 /* Write two quotes to get one. */
16585 if (c == '"')
16586 {
16587 putc (c, file);
16588 ++count_string;
16589 }
16590
16591 for_string = NULL;
16592 for_decimal = "\"\n\t.byte ";
16593 to_close = "\"\n";
16594 ++count_string;
16595
16596 if (count_string >= 512)
16597 {
16598 fputs (to_close, file);
16599
16600 for_string = "\t.byte \"";
16601 for_decimal = "\t.byte ";
16602 to_close = NULL;
16603 count_string = 0;
16604 }
16605 }
16606 else
16607 {
16608 if (for_decimal)
16609 fputs (for_decimal, file);
16610 fprintf (file, "%d", c);
16611
16612 for_string = "\n\t.byte \"";
16613 for_decimal = ", ";
16614 to_close = "\n";
16615 count_string = 0;
16616 }
16617 }
16618
16619 /* Now close the string if we have written one. Then end the line. */
16620 if (to_close)
16621 fputs (to_close, file);
16622 }
16623 \f
16624 /* Generate a unique section name for FILENAME for a section type
16625 represented by SECTION_DESC. Output goes into BUF.
16626
16627 SECTION_DESC can be any string, as long as it is different for each
16628 possible section type.
16629
16630 We name the section in the same manner as xlc. The name begins with an
16631 underscore followed by the filename (after stripping any leading directory
16632 names) with the last period replaced by the string SECTION_DESC. If
16633 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16634 the name. */
16635
16636 void
16637 rs6000_gen_section_name (char **buf, const char *filename,
16638 const char *section_desc)
16639 {
16640 const char *q, *after_last_slash, *last_period = 0;
16641 char *p;
16642 int len;
16643
16644 after_last_slash = filename;
16645 for (q = filename; *q; q++)
16646 {
16647 if (*q == '/')
16648 after_last_slash = q + 1;
16649 else if (*q == '.')
16650 last_period = q;
16651 }
16652
16653 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16654 *buf = (char *) xmalloc (len);
16655
16656 p = *buf;
16657 *p++ = '_';
16658
16659 for (q = after_last_slash; *q; q++)
16660 {
16661 if (q == last_period)
16662 {
16663 strcpy (p, section_desc);
16664 p += strlen (section_desc);
16665 break;
16666 }
16667
16668 else if (ISALNUM (*q))
16669 *p++ = *q;
16670 }
16671
16672 if (last_period == 0)
16673 strcpy (p, section_desc);
16674 else
16675 *p = '\0';
16676 }
16677 \f
16678 /* Emit profile function. */
16679
16680 void
16681 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16682 {
16683 /* Non-standard profiling for kernels, which just saves LR then calls
16684 _mcount without worrying about arg saves. The idea is to change
16685 the function prologue as little as possible as it isn't easy to
16686 account for arg save/restore code added just for _mcount. */
16687 if (TARGET_PROFILE_KERNEL)
16688 return;
16689
16690 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16691 {
16692 #ifndef NO_PROFILE_COUNTERS
16693 # define NO_PROFILE_COUNTERS 0
16694 #endif
16695 if (NO_PROFILE_COUNTERS)
16696 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16697 LCT_NORMAL, VOIDmode);
16698 else
16699 {
16700 char buf[30];
16701 const char *label_name;
16702 rtx fun;
16703
16704 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16705 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16706 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16707
16708 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16709 LCT_NORMAL, VOIDmode, fun, Pmode);
16710 }
16711 }
16712 else if (DEFAULT_ABI == ABI_DARWIN)
16713 {
16714 const char *mcount_name = RS6000_MCOUNT;
16715 int caller_addr_regno = LR_REGNO;
16716
16717 /* Be conservative and always set this, at least for now. */
16718 crtl->uses_pic_offset_table = 1;
16719
16720 #if TARGET_MACHO
16721 /* For PIC code, set up a stub and collect the caller's address
16722 from r0, which is where the prologue puts it. */
16723 if (MACHOPIC_INDIRECT
16724 && crtl->uses_pic_offset_table)
16725 caller_addr_regno = 0;
16726 #endif
16727 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16728 LCT_NORMAL, VOIDmode,
16729 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16730 }
16731 }
16732
16733 /* Write function profiler code. */
16734
16735 void
16736 output_function_profiler (FILE *file, int labelno)
16737 {
16738 char buf[100];
16739
16740 switch (DEFAULT_ABI)
16741 {
16742 default:
16743 gcc_unreachable ();
16744
16745 case ABI_V4:
16746 if (!TARGET_32BIT)
16747 {
16748 warning (0, "no profiling of 64-bit code for this ABI");
16749 return;
16750 }
16751 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16752 fprintf (file, "\tmflr %s\n", reg_names[0]);
16753 if (NO_PROFILE_COUNTERS)
16754 {
16755 asm_fprintf (file, "\tstw %s,4(%s)\n",
16756 reg_names[0], reg_names[1]);
16757 }
16758 else if (TARGET_SECURE_PLT && flag_pic)
16759 {
16760 if (TARGET_LINK_STACK)
16761 {
16762 char name[32];
16763 get_ppc476_thunk_name (name);
16764 asm_fprintf (file, "\tbl %s\n", name);
16765 }
16766 else
16767 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16768 asm_fprintf (file, "\tstw %s,4(%s)\n",
16769 reg_names[0], reg_names[1]);
16770 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16771 asm_fprintf (file, "\taddis %s,%s,",
16772 reg_names[12], reg_names[12]);
16773 assemble_name (file, buf);
16774 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16775 assemble_name (file, buf);
16776 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16777 }
16778 else if (flag_pic == 1)
16779 {
16780 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16781 asm_fprintf (file, "\tstw %s,4(%s)\n",
16782 reg_names[0], reg_names[1]);
16783 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16784 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16785 assemble_name (file, buf);
16786 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16787 }
16788 else if (flag_pic > 1)
16789 {
16790 asm_fprintf (file, "\tstw %s,4(%s)\n",
16791 reg_names[0], reg_names[1]);
16792 /* Now, we need to get the address of the label. */
16793 if (TARGET_LINK_STACK)
16794 {
16795 char name[32];
16796 get_ppc476_thunk_name (name);
16797 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16798 assemble_name (file, buf);
16799 fputs ("-.\n1:", file);
16800 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16801 asm_fprintf (file, "\taddi %s,%s,4\n",
16802 reg_names[11], reg_names[11]);
16803 }
16804 else
16805 {
16806 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16807 assemble_name (file, buf);
16808 fputs ("-.\n1:", file);
16809 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16810 }
16811 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16812 reg_names[0], reg_names[11]);
16813 asm_fprintf (file, "\tadd %s,%s,%s\n",
16814 reg_names[0], reg_names[0], reg_names[11]);
16815 }
16816 else
16817 {
16818 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16819 assemble_name (file, buf);
16820 fputs ("@ha\n", file);
16821 asm_fprintf (file, "\tstw %s,4(%s)\n",
16822 reg_names[0], reg_names[1]);
16823 asm_fprintf (file, "\tla %s,", reg_names[0]);
16824 assemble_name (file, buf);
16825 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16826 }
16827
16828 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16829 fprintf (file, "\tbl %s%s\n",
16830 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16831 break;
16832
16833 case ABI_AIX:
16834 case ABI_ELFv2:
16835 case ABI_DARWIN:
16836 /* Don't do anything, done in output_profile_hook (). */
16837 break;
16838 }
16839 }
16840
16841 \f
16842
16843 /* The following variable value is the last issued insn. */
16844
16845 static rtx_insn *last_scheduled_insn;
16846
16847 /* The following variable helps to balance issuing of load and
16848 store instructions */
16849
16850 static int load_store_pendulum;
16851
16852 /* The following variable helps pair divide insns during scheduling. */
16853 static int divide_cnt;
16854 /* The following variable helps pair and alternate vector and vector load
16855 insns during scheduling. */
16856 static int vec_pairing;
16857
16858
16859 /* Power4 load update and store update instructions are cracked into a
16860 load or store and an integer insn which are executed in the same cycle.
16861 Branches have their own dispatch slot which does not count against the
16862 GCC issue rate, but it changes the program flow so there are no other
16863 instructions to issue in this cycle. */
16864
16865 static int
16866 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16867 {
16868 last_scheduled_insn = insn;
16869 if (GET_CODE (PATTERN (insn)) == USE
16870 || GET_CODE (PATTERN (insn)) == CLOBBER)
16871 {
16872 cached_can_issue_more = more;
16873 return cached_can_issue_more;
16874 }
16875
16876 if (insn_terminates_group_p (insn, current_group))
16877 {
16878 cached_can_issue_more = 0;
16879 return cached_can_issue_more;
16880 }
16881
16882 /* If no reservation, but reach here */
16883 if (recog_memoized (insn) < 0)
16884 return more;
16885
16886 if (rs6000_sched_groups)
16887 {
16888 if (is_microcoded_insn (insn))
16889 cached_can_issue_more = 0;
16890 else if (is_cracked_insn (insn))
16891 cached_can_issue_more = more > 2 ? more - 2 : 0;
16892 else
16893 cached_can_issue_more = more - 1;
16894
16895 return cached_can_issue_more;
16896 }
16897
16898 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16899 return 0;
16900
16901 cached_can_issue_more = more - 1;
16902 return cached_can_issue_more;
16903 }
16904
16905 static int
16906 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16907 {
16908 int r = rs6000_variable_issue_1 (insn, more);
16909 if (verbose)
16910 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16911 return r;
16912 }
16913
16914 /* Adjust the cost of a scheduling dependency. Return the new cost of
16915 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16916
16917 static int
16918 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16919 unsigned int)
16920 {
16921 enum attr_type attr_type;
16922
16923 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16924 return cost;
16925
16926 switch (dep_type)
16927 {
16928 case REG_DEP_TRUE:
16929 {
16930 /* Data dependency; DEP_INSN writes a register that INSN reads
16931 some cycles later. */
16932
16933 /* Separate a load from a narrower, dependent store. */
16934 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16935 || rs6000_tune == PROCESSOR_FUTURE)
16936 && GET_CODE (PATTERN (insn)) == SET
16937 && GET_CODE (PATTERN (dep_insn)) == SET
16938 && MEM_P (XEXP (PATTERN (insn), 1))
16939 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16940 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16941 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16942 return cost + 14;
16943
16944 attr_type = get_attr_type (insn);
16945
16946 switch (attr_type)
16947 {
16948 case TYPE_JMPREG:
16949 /* Tell the first scheduling pass about the latency between
16950 a mtctr and bctr (and mtlr and br/blr). The first
16951 scheduling pass will not know about this latency since
16952 the mtctr instruction, which has the latency associated
16953 to it, will be generated by reload. */
16954 return 4;
16955 case TYPE_BRANCH:
16956 /* Leave some extra cycles between a compare and its
16957 dependent branch, to inhibit expensive mispredicts. */
16958 if ((rs6000_tune == PROCESSOR_PPC603
16959 || rs6000_tune == PROCESSOR_PPC604
16960 || rs6000_tune == PROCESSOR_PPC604e
16961 || rs6000_tune == PROCESSOR_PPC620
16962 || rs6000_tune == PROCESSOR_PPC630
16963 || rs6000_tune == PROCESSOR_PPC750
16964 || rs6000_tune == PROCESSOR_PPC7400
16965 || rs6000_tune == PROCESSOR_PPC7450
16966 || rs6000_tune == PROCESSOR_PPCE5500
16967 || rs6000_tune == PROCESSOR_PPCE6500
16968 || rs6000_tune == PROCESSOR_POWER4
16969 || rs6000_tune == PROCESSOR_POWER5
16970 || rs6000_tune == PROCESSOR_POWER7
16971 || rs6000_tune == PROCESSOR_POWER8
16972 || rs6000_tune == PROCESSOR_POWER9
16973 || rs6000_tune == PROCESSOR_FUTURE
16974 || rs6000_tune == PROCESSOR_CELL)
16975 && recog_memoized (dep_insn)
16976 && (INSN_CODE (dep_insn) >= 0))
16977
16978 switch (get_attr_type (dep_insn))
16979 {
16980 case TYPE_CMP:
16981 case TYPE_FPCOMPARE:
16982 case TYPE_CR_LOGICAL:
16983 return cost + 2;
16984 case TYPE_EXTS:
16985 case TYPE_MUL:
16986 if (get_attr_dot (dep_insn) == DOT_YES)
16987 return cost + 2;
16988 else
16989 break;
16990 case TYPE_SHIFT:
16991 if (get_attr_dot (dep_insn) == DOT_YES
16992 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16993 return cost + 2;
16994 else
16995 break;
16996 default:
16997 break;
16998 }
16999 break;
17000
17001 case TYPE_STORE:
17002 case TYPE_FPSTORE:
17003 if ((rs6000_tune == PROCESSOR_POWER6)
17004 && recog_memoized (dep_insn)
17005 && (INSN_CODE (dep_insn) >= 0))
17006 {
17007
17008 if (GET_CODE (PATTERN (insn)) != SET)
17009 /* If this happens, we have to extend this to schedule
17010 optimally. Return default for now. */
17011 return cost;
17012
17013 /* Adjust the cost for the case where the value written
17014 by a fixed point operation is used as the address
17015 gen value on a store. */
17016 switch (get_attr_type (dep_insn))
17017 {
17018 case TYPE_LOAD:
17019 case TYPE_CNTLZ:
17020 {
17021 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17022 return get_attr_sign_extend (dep_insn)
17023 == SIGN_EXTEND_YES ? 6 : 4;
17024 break;
17025 }
17026 case TYPE_SHIFT:
17027 {
17028 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17029 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17030 6 : 3;
17031 break;
17032 }
17033 case TYPE_INTEGER:
17034 case TYPE_ADD:
17035 case TYPE_LOGICAL:
17036 case TYPE_EXTS:
17037 case TYPE_INSERT:
17038 {
17039 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17040 return 3;
17041 break;
17042 }
17043 case TYPE_STORE:
17044 case TYPE_FPLOAD:
17045 case TYPE_FPSTORE:
17046 {
17047 if (get_attr_update (dep_insn) == UPDATE_YES
17048 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17049 return 3;
17050 break;
17051 }
17052 case TYPE_MUL:
17053 {
17054 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17055 return 17;
17056 break;
17057 }
17058 case TYPE_DIV:
17059 {
17060 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17061 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17062 break;
17063 }
17064 default:
17065 break;
17066 }
17067 }
17068 break;
17069
17070 case TYPE_LOAD:
17071 if ((rs6000_tune == PROCESSOR_POWER6)
17072 && recog_memoized (dep_insn)
17073 && (INSN_CODE (dep_insn) >= 0))
17074 {
17075
17076 /* Adjust the cost for the case where the value written
17077 by a fixed point instruction is used within the address
17078 gen portion of a subsequent load(u)(x) */
17079 switch (get_attr_type (dep_insn))
17080 {
17081 case TYPE_LOAD:
17082 case TYPE_CNTLZ:
17083 {
17084 if (set_to_load_agen (dep_insn, insn))
17085 return get_attr_sign_extend (dep_insn)
17086 == SIGN_EXTEND_YES ? 6 : 4;
17087 break;
17088 }
17089 case TYPE_SHIFT:
17090 {
17091 if (set_to_load_agen (dep_insn, insn))
17092 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17093 6 : 3;
17094 break;
17095 }
17096 case TYPE_INTEGER:
17097 case TYPE_ADD:
17098 case TYPE_LOGICAL:
17099 case TYPE_EXTS:
17100 case TYPE_INSERT:
17101 {
17102 if (set_to_load_agen (dep_insn, insn))
17103 return 3;
17104 break;
17105 }
17106 case TYPE_STORE:
17107 case TYPE_FPLOAD:
17108 case TYPE_FPSTORE:
17109 {
17110 if (get_attr_update (dep_insn) == UPDATE_YES
17111 && set_to_load_agen (dep_insn, insn))
17112 return 3;
17113 break;
17114 }
17115 case TYPE_MUL:
17116 {
17117 if (set_to_load_agen (dep_insn, insn))
17118 return 17;
17119 break;
17120 }
17121 case TYPE_DIV:
17122 {
17123 if (set_to_load_agen (dep_insn, insn))
17124 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17125 break;
17126 }
17127 default:
17128 break;
17129 }
17130 }
17131 break;
17132
17133 case TYPE_FPLOAD:
17134 if ((rs6000_tune == PROCESSOR_POWER6)
17135 && get_attr_update (insn) == UPDATE_NO
17136 && recog_memoized (dep_insn)
17137 && (INSN_CODE (dep_insn) >= 0)
17138 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17139 return 2;
17140
17141 default:
17142 break;
17143 }
17144
17145 /* Fall out to return default cost. */
17146 }
17147 break;
17148
17149 case REG_DEP_OUTPUT:
17150 /* Output dependency; DEP_INSN writes a register that INSN writes some
17151 cycles later. */
17152 if ((rs6000_tune == PROCESSOR_POWER6)
17153 && recog_memoized (dep_insn)
17154 && (INSN_CODE (dep_insn) >= 0))
17155 {
17156 attr_type = get_attr_type (insn);
17157
17158 switch (attr_type)
17159 {
17160 case TYPE_FP:
17161 case TYPE_FPSIMPLE:
17162 if (get_attr_type (dep_insn) == TYPE_FP
17163 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17164 return 1;
17165 break;
17166 case TYPE_FPLOAD:
17167 if (get_attr_update (insn) == UPDATE_NO
17168 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17169 return 2;
17170 break;
17171 default:
17172 break;
17173 }
17174 }
17175 /* Fall through, no cost for output dependency. */
17176 /* FALLTHRU */
17177
17178 case REG_DEP_ANTI:
17179 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17180 cycles later. */
17181 return 0;
17182
17183 default:
17184 gcc_unreachable ();
17185 }
17186
17187 return cost;
17188 }
17189
17190 /* Debug version of rs6000_adjust_cost. */
17191
17192 static int
17193 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17194 int cost, unsigned int dw)
17195 {
17196 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17197
17198 if (ret != cost)
17199 {
17200 const char *dep;
17201
17202 switch (dep_type)
17203 {
17204 default: dep = "unknown depencency"; break;
17205 case REG_DEP_TRUE: dep = "data dependency"; break;
17206 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17207 case REG_DEP_ANTI: dep = "anti depencency"; break;
17208 }
17209
17210 fprintf (stderr,
17211 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17212 "%s, insn:\n", ret, cost, dep);
17213
17214 debug_rtx (insn);
17215 }
17216
17217 return ret;
17218 }
17219
17220 /* The function returns a true if INSN is microcoded.
17221 Return false otherwise. */
17222
17223 static bool
17224 is_microcoded_insn (rtx_insn *insn)
17225 {
17226 if (!insn || !NONDEBUG_INSN_P (insn)
17227 || GET_CODE (PATTERN (insn)) == USE
17228 || GET_CODE (PATTERN (insn)) == CLOBBER)
17229 return false;
17230
17231 if (rs6000_tune == PROCESSOR_CELL)
17232 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17233
17234 if (rs6000_sched_groups
17235 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17236 {
17237 enum attr_type type = get_attr_type (insn);
17238 if ((type == TYPE_LOAD
17239 && get_attr_update (insn) == UPDATE_YES
17240 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17241 || ((type == TYPE_LOAD || type == TYPE_STORE)
17242 && get_attr_update (insn) == UPDATE_YES
17243 && get_attr_indexed (insn) == INDEXED_YES)
17244 || type == TYPE_MFCR)
17245 return true;
17246 }
17247
17248 return false;
17249 }
17250
17251 /* The function returns true if INSN is cracked into 2 instructions
17252 by the processor (and therefore occupies 2 issue slots). */
17253
17254 static bool
17255 is_cracked_insn (rtx_insn *insn)
17256 {
17257 if (!insn || !NONDEBUG_INSN_P (insn)
17258 || GET_CODE (PATTERN (insn)) == USE
17259 || GET_CODE (PATTERN (insn)) == CLOBBER)
17260 return false;
17261
17262 if (rs6000_sched_groups
17263 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17264 {
17265 enum attr_type type = get_attr_type (insn);
17266 if ((type == TYPE_LOAD
17267 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17268 && get_attr_update (insn) == UPDATE_NO)
17269 || (type == TYPE_LOAD
17270 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17271 && get_attr_update (insn) == UPDATE_YES
17272 && get_attr_indexed (insn) == INDEXED_NO)
17273 || (type == TYPE_STORE
17274 && get_attr_update (insn) == UPDATE_YES
17275 && get_attr_indexed (insn) == INDEXED_NO)
17276 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17277 && get_attr_update (insn) == UPDATE_YES)
17278 || (type == TYPE_CR_LOGICAL
17279 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17280 || (type == TYPE_EXTS
17281 && get_attr_dot (insn) == DOT_YES)
17282 || (type == TYPE_SHIFT
17283 && get_attr_dot (insn) == DOT_YES
17284 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17285 || (type == TYPE_MUL
17286 && get_attr_dot (insn) == DOT_YES)
17287 || type == TYPE_DIV
17288 || (type == TYPE_INSERT
17289 && get_attr_size (insn) == SIZE_32))
17290 return true;
17291 }
17292
17293 return false;
17294 }
17295
17296 /* The function returns true if INSN can be issued only from
17297 the branch slot. */
17298
17299 static bool
17300 is_branch_slot_insn (rtx_insn *insn)
17301 {
17302 if (!insn || !NONDEBUG_INSN_P (insn)
17303 || GET_CODE (PATTERN (insn)) == USE
17304 || GET_CODE (PATTERN (insn)) == CLOBBER)
17305 return false;
17306
17307 if (rs6000_sched_groups)
17308 {
17309 enum attr_type type = get_attr_type (insn);
17310 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17311 return true;
17312 return false;
17313 }
17314
17315 return false;
17316 }
17317
17318 /* The function returns true if out_inst sets a value that is
17319 used in the address generation computation of in_insn */
17320 static bool
17321 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17322 {
17323 rtx out_set, in_set;
17324
17325 /* For performance reasons, only handle the simple case where
17326 both loads are a single_set. */
17327 out_set = single_set (out_insn);
17328 if (out_set)
17329 {
17330 in_set = single_set (in_insn);
17331 if (in_set)
17332 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17333 }
17334
17335 return false;
17336 }
17337
17338 /* Try to determine base/offset/size parts of the given MEM.
17339 Return true if successful, false if all the values couldn't
17340 be determined.
17341
17342 This function only looks for REG or REG+CONST address forms.
17343 REG+REG address form will return false. */
17344
17345 static bool
17346 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17347 HOST_WIDE_INT *size)
17348 {
17349 rtx addr_rtx;
17350 if MEM_SIZE_KNOWN_P (mem)
17351 *size = MEM_SIZE (mem);
17352 else
17353 return false;
17354
17355 addr_rtx = (XEXP (mem, 0));
17356 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17357 addr_rtx = XEXP (addr_rtx, 1);
17358
17359 *offset = 0;
17360 while (GET_CODE (addr_rtx) == PLUS
17361 && CONST_INT_P (XEXP (addr_rtx, 1)))
17362 {
17363 *offset += INTVAL (XEXP (addr_rtx, 1));
17364 addr_rtx = XEXP (addr_rtx, 0);
17365 }
17366 if (!REG_P (addr_rtx))
17367 return false;
17368
17369 *base = addr_rtx;
17370 return true;
17371 }
17372
17373 /* The function returns true if the target storage location of
17374 mem1 is adjacent to the target storage location of mem2 */
17375 /* Return 1 if memory locations are adjacent. */
17376
17377 static bool
17378 adjacent_mem_locations (rtx mem1, rtx mem2)
17379 {
17380 rtx reg1, reg2;
17381 HOST_WIDE_INT off1, size1, off2, size2;
17382
17383 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17384 && get_memref_parts (mem2, &reg2, &off2, &size2))
17385 return ((REGNO (reg1) == REGNO (reg2))
17386 && ((off1 + size1 == off2)
17387 || (off2 + size2 == off1)));
17388
17389 return false;
17390 }
17391
17392 /* This function returns true if it can be determined that the two MEM
17393 locations overlap by at least 1 byte based on base reg/offset/size. */
17394
17395 static bool
17396 mem_locations_overlap (rtx mem1, rtx mem2)
17397 {
17398 rtx reg1, reg2;
17399 HOST_WIDE_INT off1, size1, off2, size2;
17400
17401 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17402 && get_memref_parts (mem2, &reg2, &off2, &size2))
17403 return ((REGNO (reg1) == REGNO (reg2))
17404 && (((off1 <= off2) && (off1 + size1 > off2))
17405 || ((off2 <= off1) && (off2 + size2 > off1))));
17406
17407 return false;
17408 }
17409
17410 /* A C statement (sans semicolon) to update the integer scheduling
17411 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17412 INSN earlier, reduce the priority to execute INSN later. Do not
17413 define this macro if you do not need to adjust the scheduling
17414 priorities of insns. */
17415
17416 static int
17417 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17418 {
17419 rtx load_mem, str_mem;
17420 /* On machines (like the 750) which have asymmetric integer units,
17421 where one integer unit can do multiply and divides and the other
17422 can't, reduce the priority of multiply/divide so it is scheduled
17423 before other integer operations. */
17424
17425 #if 0
17426 if (! INSN_P (insn))
17427 return priority;
17428
17429 if (GET_CODE (PATTERN (insn)) == USE)
17430 return priority;
17431
17432 switch (rs6000_tune) {
17433 case PROCESSOR_PPC750:
17434 switch (get_attr_type (insn))
17435 {
17436 default:
17437 break;
17438
17439 case TYPE_MUL:
17440 case TYPE_DIV:
17441 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17442 priority, priority);
17443 if (priority >= 0 && priority < 0x01000000)
17444 priority >>= 3;
17445 break;
17446 }
17447 }
17448 #endif
17449
17450 if (insn_must_be_first_in_group (insn)
17451 && reload_completed
17452 && current_sched_info->sched_max_insns_priority
17453 && rs6000_sched_restricted_insns_priority)
17454 {
17455
17456 /* Prioritize insns that can be dispatched only in the first
17457 dispatch slot. */
17458 if (rs6000_sched_restricted_insns_priority == 1)
17459 /* Attach highest priority to insn. This means that in
17460 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17461 precede 'priority' (critical path) considerations. */
17462 return current_sched_info->sched_max_insns_priority;
17463 else if (rs6000_sched_restricted_insns_priority == 2)
17464 /* Increase priority of insn by a minimal amount. This means that in
17465 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17466 considerations precede dispatch-slot restriction considerations. */
17467 return (priority + 1);
17468 }
17469
17470 if (rs6000_tune == PROCESSOR_POWER6
17471 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17472 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17473 /* Attach highest priority to insn if the scheduler has just issued two
17474 stores and this instruction is a load, or two loads and this instruction
17475 is a store. Power6 wants loads and stores scheduled alternately
17476 when possible */
17477 return current_sched_info->sched_max_insns_priority;
17478
17479 return priority;
17480 }
17481
17482 /* Return true if the instruction is nonpipelined on the Cell. */
17483 static bool
17484 is_nonpipeline_insn (rtx_insn *insn)
17485 {
17486 enum attr_type type;
17487 if (!insn || !NONDEBUG_INSN_P (insn)
17488 || GET_CODE (PATTERN (insn)) == USE
17489 || GET_CODE (PATTERN (insn)) == CLOBBER)
17490 return false;
17491
17492 type = get_attr_type (insn);
17493 if (type == TYPE_MUL
17494 || type == TYPE_DIV
17495 || type == TYPE_SDIV
17496 || type == TYPE_DDIV
17497 || type == TYPE_SSQRT
17498 || type == TYPE_DSQRT
17499 || type == TYPE_MFCR
17500 || type == TYPE_MFCRF
17501 || type == TYPE_MFJMPR)
17502 {
17503 return true;
17504 }
17505 return false;
17506 }
17507
17508
17509 /* Return how many instructions the machine can issue per cycle. */
17510
17511 static int
17512 rs6000_issue_rate (void)
17513 {
17514 /* Unless scheduling for register pressure, use issue rate of 1 for
17515 first scheduling pass to decrease degradation. */
17516 if (!reload_completed && !flag_sched_pressure)
17517 return 1;
17518
17519 switch (rs6000_tune) {
17520 case PROCESSOR_RS64A:
17521 case PROCESSOR_PPC601: /* ? */
17522 case PROCESSOR_PPC7450:
17523 return 3;
17524 case PROCESSOR_PPC440:
17525 case PROCESSOR_PPC603:
17526 case PROCESSOR_PPC750:
17527 case PROCESSOR_PPC7400:
17528 case PROCESSOR_PPC8540:
17529 case PROCESSOR_PPC8548:
17530 case PROCESSOR_CELL:
17531 case PROCESSOR_PPCE300C2:
17532 case PROCESSOR_PPCE300C3:
17533 case PROCESSOR_PPCE500MC:
17534 case PROCESSOR_PPCE500MC64:
17535 case PROCESSOR_PPCE5500:
17536 case PROCESSOR_PPCE6500:
17537 case PROCESSOR_TITAN:
17538 return 2;
17539 case PROCESSOR_PPC476:
17540 case PROCESSOR_PPC604:
17541 case PROCESSOR_PPC604e:
17542 case PROCESSOR_PPC620:
17543 case PROCESSOR_PPC630:
17544 return 4;
17545 case PROCESSOR_POWER4:
17546 case PROCESSOR_POWER5:
17547 case PROCESSOR_POWER6:
17548 case PROCESSOR_POWER7:
17549 return 5;
17550 case PROCESSOR_POWER8:
17551 return 7;
17552 case PROCESSOR_POWER9:
17553 case PROCESSOR_FUTURE:
17554 return 6;
17555 default:
17556 return 1;
17557 }
17558 }
17559
17560 /* Return how many instructions to look ahead for better insn
17561 scheduling. */
17562
17563 static int
17564 rs6000_use_sched_lookahead (void)
17565 {
17566 switch (rs6000_tune)
17567 {
17568 case PROCESSOR_PPC8540:
17569 case PROCESSOR_PPC8548:
17570 return 4;
17571
17572 case PROCESSOR_CELL:
17573 return (reload_completed ? 8 : 0);
17574
17575 default:
17576 return 0;
17577 }
17578 }
17579
17580 /* We are choosing insn from the ready queue. Return zero if INSN can be
17581 chosen. */
17582 static int
17583 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17584 {
17585 if (ready_index == 0)
17586 return 0;
17587
17588 if (rs6000_tune != PROCESSOR_CELL)
17589 return 0;
17590
17591 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17592
17593 if (!reload_completed
17594 || is_nonpipeline_insn (insn)
17595 || is_microcoded_insn (insn))
17596 return 1;
17597
17598 return 0;
17599 }
17600
17601 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17602 and return true. */
17603
17604 static bool
17605 find_mem_ref (rtx pat, rtx *mem_ref)
17606 {
17607 const char * fmt;
17608 int i, j;
17609
17610 /* stack_tie does not produce any real memory traffic. */
17611 if (tie_operand (pat, VOIDmode))
17612 return false;
17613
17614 if (MEM_P (pat))
17615 {
17616 *mem_ref = pat;
17617 return true;
17618 }
17619
17620 /* Recursively process the pattern. */
17621 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17622
17623 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17624 {
17625 if (fmt[i] == 'e')
17626 {
17627 if (find_mem_ref (XEXP (pat, i), mem_ref))
17628 return true;
17629 }
17630 else if (fmt[i] == 'E')
17631 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17632 {
17633 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17634 return true;
17635 }
17636 }
17637
17638 return false;
17639 }
17640
17641 /* Determine if PAT is a PATTERN of a load insn. */
17642
17643 static bool
17644 is_load_insn1 (rtx pat, rtx *load_mem)
17645 {
17646 if (!pat || pat == NULL_RTX)
17647 return false;
17648
17649 if (GET_CODE (pat) == SET)
17650 return find_mem_ref (SET_SRC (pat), load_mem);
17651
17652 if (GET_CODE (pat) == PARALLEL)
17653 {
17654 int i;
17655
17656 for (i = 0; i < XVECLEN (pat, 0); i++)
17657 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17658 return true;
17659 }
17660
17661 return false;
17662 }
17663
17664 /* Determine if INSN loads from memory. */
17665
17666 static bool
17667 is_load_insn (rtx insn, rtx *load_mem)
17668 {
17669 if (!insn || !INSN_P (insn))
17670 return false;
17671
17672 if (CALL_P (insn))
17673 return false;
17674
17675 return is_load_insn1 (PATTERN (insn), load_mem);
17676 }
17677
17678 /* Determine if PAT is a PATTERN of a store insn. */
17679
17680 static bool
17681 is_store_insn1 (rtx pat, rtx *str_mem)
17682 {
17683 if (!pat || pat == NULL_RTX)
17684 return false;
17685
17686 if (GET_CODE (pat) == SET)
17687 return find_mem_ref (SET_DEST (pat), str_mem);
17688
17689 if (GET_CODE (pat) == PARALLEL)
17690 {
17691 int i;
17692
17693 for (i = 0; i < XVECLEN (pat, 0); i++)
17694 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17695 return true;
17696 }
17697
17698 return false;
17699 }
17700
17701 /* Determine if INSN stores to memory. */
17702
17703 static bool
17704 is_store_insn (rtx insn, rtx *str_mem)
17705 {
17706 if (!insn || !INSN_P (insn))
17707 return false;
17708
17709 return is_store_insn1 (PATTERN (insn), str_mem);
17710 }
17711
17712 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17713
17714 static bool
17715 is_power9_pairable_vec_type (enum attr_type type)
17716 {
17717 switch (type)
17718 {
17719 case TYPE_VECSIMPLE:
17720 case TYPE_VECCOMPLEX:
17721 case TYPE_VECDIV:
17722 case TYPE_VECCMP:
17723 case TYPE_VECPERM:
17724 case TYPE_VECFLOAT:
17725 case TYPE_VECFDIV:
17726 case TYPE_VECDOUBLE:
17727 return true;
17728 default:
17729 break;
17730 }
17731 return false;
17732 }
17733
17734 /* Returns whether the dependence between INSN and NEXT is considered
17735 costly by the given target. */
17736
17737 static bool
17738 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17739 {
17740 rtx insn;
17741 rtx next;
17742 rtx load_mem, str_mem;
17743
17744 /* If the flag is not enabled - no dependence is considered costly;
17745 allow all dependent insns in the same group.
17746 This is the most aggressive option. */
17747 if (rs6000_sched_costly_dep == no_dep_costly)
17748 return false;
17749
17750 /* If the flag is set to 1 - a dependence is always considered costly;
17751 do not allow dependent instructions in the same group.
17752 This is the most conservative option. */
17753 if (rs6000_sched_costly_dep == all_deps_costly)
17754 return true;
17755
17756 insn = DEP_PRO (dep);
17757 next = DEP_CON (dep);
17758
17759 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17760 && is_load_insn (next, &load_mem)
17761 && is_store_insn (insn, &str_mem))
17762 /* Prevent load after store in the same group. */
17763 return true;
17764
17765 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17766 && is_load_insn (next, &load_mem)
17767 && is_store_insn (insn, &str_mem)
17768 && DEP_TYPE (dep) == REG_DEP_TRUE
17769 && mem_locations_overlap(str_mem, load_mem))
17770 /* Prevent load after store in the same group if it is a true
17771 dependence. */
17772 return true;
17773
17774 /* The flag is set to X; dependences with latency >= X are considered costly,
17775 and will not be scheduled in the same group. */
17776 if (rs6000_sched_costly_dep <= max_dep_latency
17777 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17778 return true;
17779
17780 return false;
17781 }
17782
17783 /* Return the next insn after INSN that is found before TAIL is reached,
17784 skipping any "non-active" insns - insns that will not actually occupy
17785 an issue slot. Return NULL_RTX if such an insn is not found. */
17786
17787 static rtx_insn *
17788 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17789 {
17790 if (insn == NULL_RTX || insn == tail)
17791 return NULL;
17792
17793 while (1)
17794 {
17795 insn = NEXT_INSN (insn);
17796 if (insn == NULL_RTX || insn == tail)
17797 return NULL;
17798
17799 if (CALL_P (insn)
17800 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17801 || (NONJUMP_INSN_P (insn)
17802 && GET_CODE (PATTERN (insn)) != USE
17803 && GET_CODE (PATTERN (insn)) != CLOBBER
17804 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17805 break;
17806 }
17807 return insn;
17808 }
17809
17810 /* Move instruction at POS to the end of the READY list. */
17811
17812 static void
17813 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17814 {
17815 rtx_insn *tmp;
17816 int i;
17817
17818 tmp = ready[pos];
17819 for (i = pos; i < lastpos; i++)
17820 ready[i] = ready[i + 1];
17821 ready[lastpos] = tmp;
17822 }
17823
17824 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17825
17826 static int
17827 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17828 {
17829 /* For Power6, we need to handle some special cases to try and keep the
17830 store queue from overflowing and triggering expensive flushes.
17831
17832 This code monitors how load and store instructions are being issued
17833 and skews the ready list one way or the other to increase the likelihood
17834 that a desired instruction is issued at the proper time.
17835
17836 A couple of things are done. First, we maintain a "load_store_pendulum"
17837 to track the current state of load/store issue.
17838
17839 - If the pendulum is at zero, then no loads or stores have been
17840 issued in the current cycle so we do nothing.
17841
17842 - If the pendulum is 1, then a single load has been issued in this
17843 cycle and we attempt to locate another load in the ready list to
17844 issue with it.
17845
17846 - If the pendulum is -2, then two stores have already been
17847 issued in this cycle, so we increase the priority of the first load
17848 in the ready list to increase it's likelihood of being chosen first
17849 in the next cycle.
17850
17851 - If the pendulum is -1, then a single store has been issued in this
17852 cycle and we attempt to locate another store in the ready list to
17853 issue with it, preferring a store to an adjacent memory location to
17854 facilitate store pairing in the store queue.
17855
17856 - If the pendulum is 2, then two loads have already been
17857 issued in this cycle, so we increase the priority of the first store
17858 in the ready list to increase it's likelihood of being chosen first
17859 in the next cycle.
17860
17861 - If the pendulum < -2 or > 2, then do nothing.
17862
17863 Note: This code covers the most common scenarios. There exist non
17864 load/store instructions which make use of the LSU and which
17865 would need to be accounted for to strictly model the behavior
17866 of the machine. Those instructions are currently unaccounted
17867 for to help minimize compile time overhead of this code.
17868 */
17869 int pos;
17870 rtx load_mem, str_mem;
17871
17872 if (is_store_insn (last_scheduled_insn, &str_mem))
17873 /* Issuing a store, swing the load_store_pendulum to the left */
17874 load_store_pendulum--;
17875 else if (is_load_insn (last_scheduled_insn, &load_mem))
17876 /* Issuing a load, swing the load_store_pendulum to the right */
17877 load_store_pendulum++;
17878 else
17879 return cached_can_issue_more;
17880
17881 /* If the pendulum is balanced, or there is only one instruction on
17882 the ready list, then all is well, so return. */
17883 if ((load_store_pendulum == 0) || (lastpos <= 0))
17884 return cached_can_issue_more;
17885
17886 if (load_store_pendulum == 1)
17887 {
17888 /* A load has been issued in this cycle. Scan the ready list
17889 for another load to issue with it */
17890 pos = lastpos;
17891
17892 while (pos >= 0)
17893 {
17894 if (is_load_insn (ready[pos], &load_mem))
17895 {
17896 /* Found a load. Move it to the head of the ready list,
17897 and adjust it's priority so that it is more likely to
17898 stay there */
17899 move_to_end_of_ready (ready, pos, lastpos);
17900
17901 if (!sel_sched_p ()
17902 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17903 INSN_PRIORITY (ready[lastpos])++;
17904 break;
17905 }
17906 pos--;
17907 }
17908 }
17909 else if (load_store_pendulum == -2)
17910 {
17911 /* Two stores have been issued in this cycle. Increase the
17912 priority of the first load in the ready list to favor it for
17913 issuing in the next cycle. */
17914 pos = lastpos;
17915
17916 while (pos >= 0)
17917 {
17918 if (is_load_insn (ready[pos], &load_mem)
17919 && !sel_sched_p ()
17920 && INSN_PRIORITY_KNOWN (ready[pos]))
17921 {
17922 INSN_PRIORITY (ready[pos])++;
17923
17924 /* Adjust the pendulum to account for the fact that a load
17925 was found and increased in priority. This is to prevent
17926 increasing the priority of multiple loads */
17927 load_store_pendulum--;
17928
17929 break;
17930 }
17931 pos--;
17932 }
17933 }
17934 else if (load_store_pendulum == -1)
17935 {
17936 /* A store has been issued in this cycle. Scan the ready list for
17937 another store to issue with it, preferring a store to an adjacent
17938 memory location */
17939 int first_store_pos = -1;
17940
17941 pos = lastpos;
17942
17943 while (pos >= 0)
17944 {
17945 if (is_store_insn (ready[pos], &str_mem))
17946 {
17947 rtx str_mem2;
17948 /* Maintain the index of the first store found on the
17949 list */
17950 if (first_store_pos == -1)
17951 first_store_pos = pos;
17952
17953 if (is_store_insn (last_scheduled_insn, &str_mem2)
17954 && adjacent_mem_locations (str_mem, str_mem2))
17955 {
17956 /* Found an adjacent store. Move it to the head of the
17957 ready list, and adjust it's priority so that it is
17958 more likely to stay there */
17959 move_to_end_of_ready (ready, pos, lastpos);
17960
17961 if (!sel_sched_p ()
17962 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17963 INSN_PRIORITY (ready[lastpos])++;
17964
17965 first_store_pos = -1;
17966
17967 break;
17968 };
17969 }
17970 pos--;
17971 }
17972
17973 if (first_store_pos >= 0)
17974 {
17975 /* An adjacent store wasn't found, but a non-adjacent store was,
17976 so move the non-adjacent store to the front of the ready
17977 list, and adjust its priority so that it is more likely to
17978 stay there. */
17979 move_to_end_of_ready (ready, first_store_pos, lastpos);
17980 if (!sel_sched_p ()
17981 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17982 INSN_PRIORITY (ready[lastpos])++;
17983 }
17984 }
17985 else if (load_store_pendulum == 2)
17986 {
17987 /* Two loads have been issued in this cycle. Increase the priority
17988 of the first store in the ready list to favor it for issuing in
17989 the next cycle. */
17990 pos = lastpos;
17991
17992 while (pos >= 0)
17993 {
17994 if (is_store_insn (ready[pos], &str_mem)
17995 && !sel_sched_p ()
17996 && INSN_PRIORITY_KNOWN (ready[pos]))
17997 {
17998 INSN_PRIORITY (ready[pos])++;
17999
18000 /* Adjust the pendulum to account for the fact that a store
18001 was found and increased in priority. This is to prevent
18002 increasing the priority of multiple stores */
18003 load_store_pendulum++;
18004
18005 break;
18006 }
18007 pos--;
18008 }
18009 }
18010
18011 return cached_can_issue_more;
18012 }
18013
18014 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18015
18016 static int
18017 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18018 {
18019 int pos;
18020 enum attr_type type, type2;
18021
18022 type = get_attr_type (last_scheduled_insn);
18023
18024 /* Try to issue fixed point divides back-to-back in pairs so they will be
18025 routed to separate execution units and execute in parallel. */
18026 if (type == TYPE_DIV && divide_cnt == 0)
18027 {
18028 /* First divide has been scheduled. */
18029 divide_cnt = 1;
18030
18031 /* Scan the ready list looking for another divide, if found move it
18032 to the end of the list so it is chosen next. */
18033 pos = lastpos;
18034 while (pos >= 0)
18035 {
18036 if (recog_memoized (ready[pos]) >= 0
18037 && get_attr_type (ready[pos]) == TYPE_DIV)
18038 {
18039 move_to_end_of_ready (ready, pos, lastpos);
18040 break;
18041 }
18042 pos--;
18043 }
18044 }
18045 else
18046 {
18047 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18048 divide_cnt = 0;
18049
18050 /* The best dispatch throughput for vector and vector load insns can be
18051 achieved by interleaving a vector and vector load such that they'll
18052 dispatch to the same superslice. If this pairing cannot be achieved
18053 then it is best to pair vector insns together and vector load insns
18054 together.
18055
18056 To aid in this pairing, vec_pairing maintains the current state with
18057 the following values:
18058
18059 0 : Initial state, no vecload/vector pairing has been started.
18060
18061 1 : A vecload or vector insn has been issued and a candidate for
18062 pairing has been found and moved to the end of the ready
18063 list. */
18064 if (type == TYPE_VECLOAD)
18065 {
18066 /* Issued a vecload. */
18067 if (vec_pairing == 0)
18068 {
18069 int vecload_pos = -1;
18070 /* We issued a single vecload, look for a vector insn to pair it
18071 with. If one isn't found, try to pair another vecload. */
18072 pos = lastpos;
18073 while (pos >= 0)
18074 {
18075 if (recog_memoized (ready[pos]) >= 0)
18076 {
18077 type2 = get_attr_type (ready[pos]);
18078 if (is_power9_pairable_vec_type (type2))
18079 {
18080 /* Found a vector insn to pair with, move it to the
18081 end of the ready list so it is scheduled next. */
18082 move_to_end_of_ready (ready, pos, lastpos);
18083 vec_pairing = 1;
18084 return cached_can_issue_more;
18085 }
18086 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18087 /* Remember position of first vecload seen. */
18088 vecload_pos = pos;
18089 }
18090 pos--;
18091 }
18092 if (vecload_pos >= 0)
18093 {
18094 /* Didn't find a vector to pair with but did find a vecload,
18095 move it to the end of the ready list. */
18096 move_to_end_of_ready (ready, vecload_pos, lastpos);
18097 vec_pairing = 1;
18098 return cached_can_issue_more;
18099 }
18100 }
18101 }
18102 else if (is_power9_pairable_vec_type (type))
18103 {
18104 /* Issued a vector operation. */
18105 if (vec_pairing == 0)
18106 {
18107 int vec_pos = -1;
18108 /* We issued a single vector insn, look for a vecload to pair it
18109 with. If one isn't found, try to pair another vector. */
18110 pos = lastpos;
18111 while (pos >= 0)
18112 {
18113 if (recog_memoized (ready[pos]) >= 0)
18114 {
18115 type2 = get_attr_type (ready[pos]);
18116 if (type2 == TYPE_VECLOAD)
18117 {
18118 /* Found a vecload insn to pair with, move it to the
18119 end of the ready list so it is scheduled next. */
18120 move_to_end_of_ready (ready, pos, lastpos);
18121 vec_pairing = 1;
18122 return cached_can_issue_more;
18123 }
18124 else if (is_power9_pairable_vec_type (type2)
18125 && vec_pos == -1)
18126 /* Remember position of first vector insn seen. */
18127 vec_pos = pos;
18128 }
18129 pos--;
18130 }
18131 if (vec_pos >= 0)
18132 {
18133 /* Didn't find a vecload to pair with but did find a vector
18134 insn, move it to the end of the ready list. */
18135 move_to_end_of_ready (ready, vec_pos, lastpos);
18136 vec_pairing = 1;
18137 return cached_can_issue_more;
18138 }
18139 }
18140 }
18141
18142 /* We've either finished a vec/vecload pair, couldn't find an insn to
18143 continue the current pair, or the last insn had nothing to do with
18144 with pairing. In any case, reset the state. */
18145 vec_pairing = 0;
18146 }
18147
18148 return cached_can_issue_more;
18149 }
18150
18151 /* We are about to begin issuing insns for this clock cycle. */
18152
18153 static int
18154 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18155 rtx_insn **ready ATTRIBUTE_UNUSED,
18156 int *pn_ready ATTRIBUTE_UNUSED,
18157 int clock_var ATTRIBUTE_UNUSED)
18158 {
18159 int n_ready = *pn_ready;
18160
18161 if (sched_verbose)
18162 fprintf (dump, "// rs6000_sched_reorder :\n");
18163
18164 /* Reorder the ready list, if the second to last ready insn
18165 is a nonepipeline insn. */
18166 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18167 {
18168 if (is_nonpipeline_insn (ready[n_ready - 1])
18169 && (recog_memoized (ready[n_ready - 2]) > 0))
18170 /* Simply swap first two insns. */
18171 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18172 }
18173
18174 if (rs6000_tune == PROCESSOR_POWER6)
18175 load_store_pendulum = 0;
18176
18177 return rs6000_issue_rate ();
18178 }
18179
18180 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18181
18182 static int
18183 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18184 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18185 {
18186 if (sched_verbose)
18187 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18188
18189 /* Do Power6 dependent reordering if necessary. */
18190 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18191 return power6_sched_reorder2 (ready, *pn_ready - 1);
18192
18193 /* Do Power9 dependent reordering if necessary. */
18194 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18195 && recog_memoized (last_scheduled_insn) >= 0)
18196 return power9_sched_reorder2 (ready, *pn_ready - 1);
18197
18198 return cached_can_issue_more;
18199 }
18200
18201 /* Return whether the presence of INSN causes a dispatch group termination
18202 of group WHICH_GROUP.
18203
18204 If WHICH_GROUP == current_group, this function will return true if INSN
18205 causes the termination of the current group (i.e, the dispatch group to
18206 which INSN belongs). This means that INSN will be the last insn in the
18207 group it belongs to.
18208
18209 If WHICH_GROUP == previous_group, this function will return true if INSN
18210 causes the termination of the previous group (i.e, the dispatch group that
18211 precedes the group to which INSN belongs). This means that INSN will be
18212 the first insn in the group it belongs to). */
18213
18214 static bool
18215 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18216 {
18217 bool first, last;
18218
18219 if (! insn)
18220 return false;
18221
18222 first = insn_must_be_first_in_group (insn);
18223 last = insn_must_be_last_in_group (insn);
18224
18225 if (first && last)
18226 return true;
18227
18228 if (which_group == current_group)
18229 return last;
18230 else if (which_group == previous_group)
18231 return first;
18232
18233 return false;
18234 }
18235
18236
18237 static bool
18238 insn_must_be_first_in_group (rtx_insn *insn)
18239 {
18240 enum attr_type type;
18241
18242 if (!insn
18243 || NOTE_P (insn)
18244 || DEBUG_INSN_P (insn)
18245 || GET_CODE (PATTERN (insn)) == USE
18246 || GET_CODE (PATTERN (insn)) == CLOBBER)
18247 return false;
18248
18249 switch (rs6000_tune)
18250 {
18251 case PROCESSOR_POWER5:
18252 if (is_cracked_insn (insn))
18253 return true;
18254 /* FALLTHRU */
18255 case PROCESSOR_POWER4:
18256 if (is_microcoded_insn (insn))
18257 return true;
18258
18259 if (!rs6000_sched_groups)
18260 return false;
18261
18262 type = get_attr_type (insn);
18263
18264 switch (type)
18265 {
18266 case TYPE_MFCR:
18267 case TYPE_MFCRF:
18268 case TYPE_MTCR:
18269 case TYPE_CR_LOGICAL:
18270 case TYPE_MTJMPR:
18271 case TYPE_MFJMPR:
18272 case TYPE_DIV:
18273 case TYPE_LOAD_L:
18274 case TYPE_STORE_C:
18275 case TYPE_ISYNC:
18276 case TYPE_SYNC:
18277 return true;
18278 default:
18279 break;
18280 }
18281 break;
18282 case PROCESSOR_POWER6:
18283 type = get_attr_type (insn);
18284
18285 switch (type)
18286 {
18287 case TYPE_EXTS:
18288 case TYPE_CNTLZ:
18289 case TYPE_TRAP:
18290 case TYPE_MUL:
18291 case TYPE_INSERT:
18292 case TYPE_FPCOMPARE:
18293 case TYPE_MFCR:
18294 case TYPE_MTCR:
18295 case TYPE_MFJMPR:
18296 case TYPE_MTJMPR:
18297 case TYPE_ISYNC:
18298 case TYPE_SYNC:
18299 case TYPE_LOAD_L:
18300 case TYPE_STORE_C:
18301 return true;
18302 case TYPE_SHIFT:
18303 if (get_attr_dot (insn) == DOT_NO
18304 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18305 return true;
18306 else
18307 break;
18308 case TYPE_DIV:
18309 if (get_attr_size (insn) == SIZE_32)
18310 return true;
18311 else
18312 break;
18313 case TYPE_LOAD:
18314 case TYPE_STORE:
18315 case TYPE_FPLOAD:
18316 case TYPE_FPSTORE:
18317 if (get_attr_update (insn) == UPDATE_YES)
18318 return true;
18319 else
18320 break;
18321 default:
18322 break;
18323 }
18324 break;
18325 case PROCESSOR_POWER7:
18326 type = get_attr_type (insn);
18327
18328 switch (type)
18329 {
18330 case TYPE_CR_LOGICAL:
18331 case TYPE_MFCR:
18332 case TYPE_MFCRF:
18333 case TYPE_MTCR:
18334 case TYPE_DIV:
18335 case TYPE_ISYNC:
18336 case TYPE_LOAD_L:
18337 case TYPE_STORE_C:
18338 case TYPE_MFJMPR:
18339 case TYPE_MTJMPR:
18340 return true;
18341 case TYPE_MUL:
18342 case TYPE_SHIFT:
18343 case TYPE_EXTS:
18344 if (get_attr_dot (insn) == DOT_YES)
18345 return true;
18346 else
18347 break;
18348 case TYPE_LOAD:
18349 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18350 || get_attr_update (insn) == UPDATE_YES)
18351 return true;
18352 else
18353 break;
18354 case TYPE_STORE:
18355 case TYPE_FPLOAD:
18356 case TYPE_FPSTORE:
18357 if (get_attr_update (insn) == UPDATE_YES)
18358 return true;
18359 else
18360 break;
18361 default:
18362 break;
18363 }
18364 break;
18365 case PROCESSOR_POWER8:
18366 type = get_attr_type (insn);
18367
18368 switch (type)
18369 {
18370 case TYPE_CR_LOGICAL:
18371 case TYPE_MFCR:
18372 case TYPE_MFCRF:
18373 case TYPE_MTCR:
18374 case TYPE_SYNC:
18375 case TYPE_ISYNC:
18376 case TYPE_LOAD_L:
18377 case TYPE_STORE_C:
18378 case TYPE_VECSTORE:
18379 case TYPE_MFJMPR:
18380 case TYPE_MTJMPR:
18381 return true;
18382 case TYPE_SHIFT:
18383 case TYPE_EXTS:
18384 case TYPE_MUL:
18385 if (get_attr_dot (insn) == DOT_YES)
18386 return true;
18387 else
18388 break;
18389 case TYPE_LOAD:
18390 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18391 || get_attr_update (insn) == UPDATE_YES)
18392 return true;
18393 else
18394 break;
18395 case TYPE_STORE:
18396 if (get_attr_update (insn) == UPDATE_YES
18397 && get_attr_indexed (insn) == INDEXED_YES)
18398 return true;
18399 else
18400 break;
18401 default:
18402 break;
18403 }
18404 break;
18405 default:
18406 break;
18407 }
18408
18409 return false;
18410 }
18411
18412 static bool
18413 insn_must_be_last_in_group (rtx_insn *insn)
18414 {
18415 enum attr_type type;
18416
18417 if (!insn
18418 || NOTE_P (insn)
18419 || DEBUG_INSN_P (insn)
18420 || GET_CODE (PATTERN (insn)) == USE
18421 || GET_CODE (PATTERN (insn)) == CLOBBER)
18422 return false;
18423
18424 switch (rs6000_tune) {
18425 case PROCESSOR_POWER4:
18426 case PROCESSOR_POWER5:
18427 if (is_microcoded_insn (insn))
18428 return true;
18429
18430 if (is_branch_slot_insn (insn))
18431 return true;
18432
18433 break;
18434 case PROCESSOR_POWER6:
18435 type = get_attr_type (insn);
18436
18437 switch (type)
18438 {
18439 case TYPE_EXTS:
18440 case TYPE_CNTLZ:
18441 case TYPE_TRAP:
18442 case TYPE_MUL:
18443 case TYPE_FPCOMPARE:
18444 case TYPE_MFCR:
18445 case TYPE_MTCR:
18446 case TYPE_MFJMPR:
18447 case TYPE_MTJMPR:
18448 case TYPE_ISYNC:
18449 case TYPE_SYNC:
18450 case TYPE_LOAD_L:
18451 case TYPE_STORE_C:
18452 return true;
18453 case TYPE_SHIFT:
18454 if (get_attr_dot (insn) == DOT_NO
18455 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18456 return true;
18457 else
18458 break;
18459 case TYPE_DIV:
18460 if (get_attr_size (insn) == SIZE_32)
18461 return true;
18462 else
18463 break;
18464 default:
18465 break;
18466 }
18467 break;
18468 case PROCESSOR_POWER7:
18469 type = get_attr_type (insn);
18470
18471 switch (type)
18472 {
18473 case TYPE_ISYNC:
18474 case TYPE_SYNC:
18475 case TYPE_LOAD_L:
18476 case TYPE_STORE_C:
18477 return true;
18478 case TYPE_LOAD:
18479 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18480 && get_attr_update (insn) == UPDATE_YES)
18481 return true;
18482 else
18483 break;
18484 case TYPE_STORE:
18485 if (get_attr_update (insn) == UPDATE_YES
18486 && get_attr_indexed (insn) == INDEXED_YES)
18487 return true;
18488 else
18489 break;
18490 default:
18491 break;
18492 }
18493 break;
18494 case PROCESSOR_POWER8:
18495 type = get_attr_type (insn);
18496
18497 switch (type)
18498 {
18499 case TYPE_MFCR:
18500 case TYPE_MTCR:
18501 case TYPE_ISYNC:
18502 case TYPE_SYNC:
18503 case TYPE_LOAD_L:
18504 case TYPE_STORE_C:
18505 return true;
18506 case TYPE_LOAD:
18507 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18508 && get_attr_update (insn) == UPDATE_YES)
18509 return true;
18510 else
18511 break;
18512 case TYPE_STORE:
18513 if (get_attr_update (insn) == UPDATE_YES
18514 && get_attr_indexed (insn) == INDEXED_YES)
18515 return true;
18516 else
18517 break;
18518 default:
18519 break;
18520 }
18521 break;
18522 default:
18523 break;
18524 }
18525
18526 return false;
18527 }
18528
18529 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18530 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18531
18532 static bool
18533 is_costly_group (rtx *group_insns, rtx next_insn)
18534 {
18535 int i;
18536 int issue_rate = rs6000_issue_rate ();
18537
18538 for (i = 0; i < issue_rate; i++)
18539 {
18540 sd_iterator_def sd_it;
18541 dep_t dep;
18542 rtx insn = group_insns[i];
18543
18544 if (!insn)
18545 continue;
18546
18547 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18548 {
18549 rtx next = DEP_CON (dep);
18550
18551 if (next == next_insn
18552 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18553 return true;
18554 }
18555 }
18556
18557 return false;
18558 }
18559
18560 /* Utility of the function redefine_groups.
18561 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18562 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18563 to keep it "far" (in a separate group) from GROUP_INSNS, following
18564 one of the following schemes, depending on the value of the flag
18565 -minsert_sched_nops = X:
18566 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18567 in order to force NEXT_INSN into a separate group.
18568 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18569 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18570 insertion (has a group just ended, how many vacant issue slots remain in the
18571 last group, and how many dispatch groups were encountered so far). */
18572
18573 static int
18574 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18575 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18576 int *group_count)
18577 {
18578 rtx nop;
18579 bool force;
18580 int issue_rate = rs6000_issue_rate ();
18581 bool end = *group_end;
18582 int i;
18583
18584 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18585 return can_issue_more;
18586
18587 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18588 return can_issue_more;
18589
18590 force = is_costly_group (group_insns, next_insn);
18591 if (!force)
18592 return can_issue_more;
18593
18594 if (sched_verbose > 6)
18595 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18596 *group_count ,can_issue_more);
18597
18598 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18599 {
18600 if (*group_end)
18601 can_issue_more = 0;
18602
18603 /* Since only a branch can be issued in the last issue_slot, it is
18604 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18605 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18606 in this case the last nop will start a new group and the branch
18607 will be forced to the new group. */
18608 if (can_issue_more && !is_branch_slot_insn (next_insn))
18609 can_issue_more--;
18610
18611 /* Do we have a special group ending nop? */
18612 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18613 || rs6000_tune == PROCESSOR_POWER8)
18614 {
18615 nop = gen_group_ending_nop ();
18616 emit_insn_before (nop, next_insn);
18617 can_issue_more = 0;
18618 }
18619 else
18620 while (can_issue_more > 0)
18621 {
18622 nop = gen_nop ();
18623 emit_insn_before (nop, next_insn);
18624 can_issue_more--;
18625 }
18626
18627 *group_end = true;
18628 return 0;
18629 }
18630
18631 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18632 {
18633 int n_nops = rs6000_sched_insert_nops;
18634
18635 /* Nops can't be issued from the branch slot, so the effective
18636 issue_rate for nops is 'issue_rate - 1'. */
18637 if (can_issue_more == 0)
18638 can_issue_more = issue_rate;
18639 can_issue_more--;
18640 if (can_issue_more == 0)
18641 {
18642 can_issue_more = issue_rate - 1;
18643 (*group_count)++;
18644 end = true;
18645 for (i = 0; i < issue_rate; i++)
18646 {
18647 group_insns[i] = 0;
18648 }
18649 }
18650
18651 while (n_nops > 0)
18652 {
18653 nop = gen_nop ();
18654 emit_insn_before (nop, next_insn);
18655 if (can_issue_more == issue_rate - 1) /* new group begins */
18656 end = false;
18657 can_issue_more--;
18658 if (can_issue_more == 0)
18659 {
18660 can_issue_more = issue_rate - 1;
18661 (*group_count)++;
18662 end = true;
18663 for (i = 0; i < issue_rate; i++)
18664 {
18665 group_insns[i] = 0;
18666 }
18667 }
18668 n_nops--;
18669 }
18670
18671 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18672 can_issue_more++;
18673
18674 /* Is next_insn going to start a new group? */
18675 *group_end
18676 = (end
18677 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18678 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18679 || (can_issue_more < issue_rate &&
18680 insn_terminates_group_p (next_insn, previous_group)));
18681 if (*group_end && end)
18682 (*group_count)--;
18683
18684 if (sched_verbose > 6)
18685 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18686 *group_count, can_issue_more);
18687 return can_issue_more;
18688 }
18689
18690 return can_issue_more;
18691 }
18692
18693 /* This function tries to synch the dispatch groups that the compiler "sees"
18694 with the dispatch groups that the processor dispatcher is expected to
18695 form in practice. It tries to achieve this synchronization by forcing the
18696 estimated processor grouping on the compiler (as opposed to the function
18697 'pad_goups' which tries to force the scheduler's grouping on the processor).
18698
18699 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18700 examines the (estimated) dispatch groups that will be formed by the processor
18701 dispatcher. It marks these group boundaries to reflect the estimated
18702 processor grouping, overriding the grouping that the scheduler had marked.
18703 Depending on the value of the flag '-minsert-sched-nops' this function can
18704 force certain insns into separate groups or force a certain distance between
18705 them by inserting nops, for example, if there exists a "costly dependence"
18706 between the insns.
18707
18708 The function estimates the group boundaries that the processor will form as
18709 follows: It keeps track of how many vacant issue slots are available after
18710 each insn. A subsequent insn will start a new group if one of the following
18711 4 cases applies:
18712 - no more vacant issue slots remain in the current dispatch group.
18713 - only the last issue slot, which is the branch slot, is vacant, but the next
18714 insn is not a branch.
18715 - only the last 2 or less issue slots, including the branch slot, are vacant,
18716 which means that a cracked insn (which occupies two issue slots) can't be
18717 issued in this group.
18718 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18719 start a new group. */
18720
18721 static int
18722 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18723 rtx_insn *tail)
18724 {
18725 rtx_insn *insn, *next_insn;
18726 int issue_rate;
18727 int can_issue_more;
18728 int slot, i;
18729 bool group_end;
18730 int group_count = 0;
18731 rtx *group_insns;
18732
18733 /* Initialize. */
18734 issue_rate = rs6000_issue_rate ();
18735 group_insns = XALLOCAVEC (rtx, issue_rate);
18736 for (i = 0; i < issue_rate; i++)
18737 {
18738 group_insns[i] = 0;
18739 }
18740 can_issue_more = issue_rate;
18741 slot = 0;
18742 insn = get_next_active_insn (prev_head_insn, tail);
18743 group_end = false;
18744
18745 while (insn != NULL_RTX)
18746 {
18747 slot = (issue_rate - can_issue_more);
18748 group_insns[slot] = insn;
18749 can_issue_more =
18750 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18751 if (insn_terminates_group_p (insn, current_group))
18752 can_issue_more = 0;
18753
18754 next_insn = get_next_active_insn (insn, tail);
18755 if (next_insn == NULL_RTX)
18756 return group_count + 1;
18757
18758 /* Is next_insn going to start a new group? */
18759 group_end
18760 = (can_issue_more == 0
18761 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18762 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18763 || (can_issue_more < issue_rate &&
18764 insn_terminates_group_p (next_insn, previous_group)));
18765
18766 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18767 next_insn, &group_end, can_issue_more,
18768 &group_count);
18769
18770 if (group_end)
18771 {
18772 group_count++;
18773 can_issue_more = 0;
18774 for (i = 0; i < issue_rate; i++)
18775 {
18776 group_insns[i] = 0;
18777 }
18778 }
18779
18780 if (GET_MODE (next_insn) == TImode && can_issue_more)
18781 PUT_MODE (next_insn, VOIDmode);
18782 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18783 PUT_MODE (next_insn, TImode);
18784
18785 insn = next_insn;
18786 if (can_issue_more == 0)
18787 can_issue_more = issue_rate;
18788 } /* while */
18789
18790 return group_count;
18791 }
18792
18793 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18794 dispatch group boundaries that the scheduler had marked. Pad with nops
18795 any dispatch groups which have vacant issue slots, in order to force the
18796 scheduler's grouping on the processor dispatcher. The function
18797 returns the number of dispatch groups found. */
18798
18799 static int
18800 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18801 rtx_insn *tail)
18802 {
18803 rtx_insn *insn, *next_insn;
18804 rtx nop;
18805 int issue_rate;
18806 int can_issue_more;
18807 int group_end;
18808 int group_count = 0;
18809
18810 /* Initialize issue_rate. */
18811 issue_rate = rs6000_issue_rate ();
18812 can_issue_more = issue_rate;
18813
18814 insn = get_next_active_insn (prev_head_insn, tail);
18815 next_insn = get_next_active_insn (insn, tail);
18816
18817 while (insn != NULL_RTX)
18818 {
18819 can_issue_more =
18820 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18821
18822 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18823
18824 if (next_insn == NULL_RTX)
18825 break;
18826
18827 if (group_end)
18828 {
18829 /* If the scheduler had marked group termination at this location
18830 (between insn and next_insn), and neither insn nor next_insn will
18831 force group termination, pad the group with nops to force group
18832 termination. */
18833 if (can_issue_more
18834 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18835 && !insn_terminates_group_p (insn, current_group)
18836 && !insn_terminates_group_p (next_insn, previous_group))
18837 {
18838 if (!is_branch_slot_insn (next_insn))
18839 can_issue_more--;
18840
18841 while (can_issue_more)
18842 {
18843 nop = gen_nop ();
18844 emit_insn_before (nop, next_insn);
18845 can_issue_more--;
18846 }
18847 }
18848
18849 can_issue_more = issue_rate;
18850 group_count++;
18851 }
18852
18853 insn = next_insn;
18854 next_insn = get_next_active_insn (insn, tail);
18855 }
18856
18857 return group_count;
18858 }
18859
18860 /* We're beginning a new block. Initialize data structures as necessary. */
18861
18862 static void
18863 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18864 int sched_verbose ATTRIBUTE_UNUSED,
18865 int max_ready ATTRIBUTE_UNUSED)
18866 {
18867 last_scheduled_insn = NULL;
18868 load_store_pendulum = 0;
18869 divide_cnt = 0;
18870 vec_pairing = 0;
18871 }
18872
18873 /* The following function is called at the end of scheduling BB.
18874 After reload, it inserts nops at insn group bundling. */
18875
18876 static void
18877 rs6000_sched_finish (FILE *dump, int sched_verbose)
18878 {
18879 int n_groups;
18880
18881 if (sched_verbose)
18882 fprintf (dump, "=== Finishing schedule.\n");
18883
18884 if (reload_completed && rs6000_sched_groups)
18885 {
18886 /* Do not run sched_finish hook when selective scheduling enabled. */
18887 if (sel_sched_p ())
18888 return;
18889
18890 if (rs6000_sched_insert_nops == sched_finish_none)
18891 return;
18892
18893 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18894 n_groups = pad_groups (dump, sched_verbose,
18895 current_sched_info->prev_head,
18896 current_sched_info->next_tail);
18897 else
18898 n_groups = redefine_groups (dump, sched_verbose,
18899 current_sched_info->prev_head,
18900 current_sched_info->next_tail);
18901
18902 if (sched_verbose >= 6)
18903 {
18904 fprintf (dump, "ngroups = %d\n", n_groups);
18905 print_rtl (dump, current_sched_info->prev_head);
18906 fprintf (dump, "Done finish_sched\n");
18907 }
18908 }
18909 }
18910
18911 struct rs6000_sched_context
18912 {
18913 short cached_can_issue_more;
18914 rtx_insn *last_scheduled_insn;
18915 int load_store_pendulum;
18916 int divide_cnt;
18917 int vec_pairing;
18918 };
18919
18920 typedef struct rs6000_sched_context rs6000_sched_context_def;
18921 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18922
18923 /* Allocate store for new scheduling context. */
18924 static void *
18925 rs6000_alloc_sched_context (void)
18926 {
18927 return xmalloc (sizeof (rs6000_sched_context_def));
18928 }
18929
18930 /* If CLEAN_P is true then initializes _SC with clean data,
18931 and from the global context otherwise. */
18932 static void
18933 rs6000_init_sched_context (void *_sc, bool clean_p)
18934 {
18935 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18936
18937 if (clean_p)
18938 {
18939 sc->cached_can_issue_more = 0;
18940 sc->last_scheduled_insn = NULL;
18941 sc->load_store_pendulum = 0;
18942 sc->divide_cnt = 0;
18943 sc->vec_pairing = 0;
18944 }
18945 else
18946 {
18947 sc->cached_can_issue_more = cached_can_issue_more;
18948 sc->last_scheduled_insn = last_scheduled_insn;
18949 sc->load_store_pendulum = load_store_pendulum;
18950 sc->divide_cnt = divide_cnt;
18951 sc->vec_pairing = vec_pairing;
18952 }
18953 }
18954
18955 /* Sets the global scheduling context to the one pointed to by _SC. */
18956 static void
18957 rs6000_set_sched_context (void *_sc)
18958 {
18959 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18960
18961 gcc_assert (sc != NULL);
18962
18963 cached_can_issue_more = sc->cached_can_issue_more;
18964 last_scheduled_insn = sc->last_scheduled_insn;
18965 load_store_pendulum = sc->load_store_pendulum;
18966 divide_cnt = sc->divide_cnt;
18967 vec_pairing = sc->vec_pairing;
18968 }
18969
18970 /* Free _SC. */
18971 static void
18972 rs6000_free_sched_context (void *_sc)
18973 {
18974 gcc_assert (_sc != NULL);
18975
18976 free (_sc);
18977 }
18978
18979 static bool
18980 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18981 {
18982 switch (get_attr_type (insn))
18983 {
18984 case TYPE_DIV:
18985 case TYPE_SDIV:
18986 case TYPE_DDIV:
18987 case TYPE_VECDIV:
18988 case TYPE_SSQRT:
18989 case TYPE_DSQRT:
18990 return false;
18991
18992 default:
18993 return true;
18994 }
18995 }
18996 \f
18997 /* Length in units of the trampoline for entering a nested function. */
18998
18999 int
19000 rs6000_trampoline_size (void)
19001 {
19002 int ret = 0;
19003
19004 switch (DEFAULT_ABI)
19005 {
19006 default:
19007 gcc_unreachable ();
19008
19009 case ABI_AIX:
19010 ret = (TARGET_32BIT) ? 12 : 24;
19011 break;
19012
19013 case ABI_ELFv2:
19014 gcc_assert (!TARGET_32BIT);
19015 ret = 32;
19016 break;
19017
19018 case ABI_DARWIN:
19019 case ABI_V4:
19020 ret = (TARGET_32BIT) ? 40 : 48;
19021 break;
19022 }
19023
19024 return ret;
19025 }
19026
19027 /* Emit RTL insns to initialize the variable parts of a trampoline.
19028 FNADDR is an RTX for the address of the function's pure code.
19029 CXT is an RTX for the static chain value for the function. */
19030
19031 static void
19032 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19033 {
19034 int regsize = (TARGET_32BIT) ? 4 : 8;
19035 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19036 rtx ctx_reg = force_reg (Pmode, cxt);
19037 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19038
19039 switch (DEFAULT_ABI)
19040 {
19041 default:
19042 gcc_unreachable ();
19043
19044 /* Under AIX, just build the 3 word function descriptor */
19045 case ABI_AIX:
19046 {
19047 rtx fnmem, fn_reg, toc_reg;
19048
19049 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19050 error ("you cannot take the address of a nested function if you use "
19051 "the %qs option", "-mno-pointers-to-nested-functions");
19052
19053 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19054 fn_reg = gen_reg_rtx (Pmode);
19055 toc_reg = gen_reg_rtx (Pmode);
19056
19057 /* Macro to shorten the code expansions below. */
19058 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19059
19060 m_tramp = replace_equiv_address (m_tramp, addr);
19061
19062 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19063 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19064 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19065 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19066 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19067
19068 # undef MEM_PLUS
19069 }
19070 break;
19071
19072 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19073 case ABI_ELFv2:
19074 case ABI_DARWIN:
19075 case ABI_V4:
19076 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19077 LCT_NORMAL, VOIDmode,
19078 addr, Pmode,
19079 GEN_INT (rs6000_trampoline_size ()), SImode,
19080 fnaddr, Pmode,
19081 ctx_reg, Pmode);
19082 break;
19083 }
19084 }
19085
19086 \f
19087 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19088 identifier as an argument, so the front end shouldn't look it up. */
19089
19090 static bool
19091 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19092 {
19093 return is_attribute_p ("altivec", attr_id);
19094 }
19095
19096 /* Handle the "altivec" attribute. The attribute may have
19097 arguments as follows:
19098
19099 __attribute__((altivec(vector__)))
19100 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19101 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19102
19103 and may appear more than once (e.g., 'vector bool char') in a
19104 given declaration. */
19105
19106 static tree
19107 rs6000_handle_altivec_attribute (tree *node,
19108 tree name ATTRIBUTE_UNUSED,
19109 tree args,
19110 int flags ATTRIBUTE_UNUSED,
19111 bool *no_add_attrs)
19112 {
19113 tree type = *node, result = NULL_TREE;
19114 machine_mode mode;
19115 int unsigned_p;
19116 char altivec_type
19117 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19118 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19119 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19120 : '?');
19121
19122 while (POINTER_TYPE_P (type)
19123 || TREE_CODE (type) == FUNCTION_TYPE
19124 || TREE_CODE (type) == METHOD_TYPE
19125 || TREE_CODE (type) == ARRAY_TYPE)
19126 type = TREE_TYPE (type);
19127
19128 mode = TYPE_MODE (type);
19129
19130 /* Check for invalid AltiVec type qualifiers. */
19131 if (type == long_double_type_node)
19132 error ("use of %<long double%> in AltiVec types is invalid");
19133 else if (type == boolean_type_node)
19134 error ("use of boolean types in AltiVec types is invalid");
19135 else if (TREE_CODE (type) == COMPLEX_TYPE)
19136 error ("use of %<complex%> in AltiVec types is invalid");
19137 else if (DECIMAL_FLOAT_MODE_P (mode))
19138 error ("use of decimal floating point types in AltiVec types is invalid");
19139 else if (!TARGET_VSX)
19140 {
19141 if (type == long_unsigned_type_node || type == long_integer_type_node)
19142 {
19143 if (TARGET_64BIT)
19144 error ("use of %<long%> in AltiVec types is invalid for "
19145 "64-bit code without %qs", "-mvsx");
19146 else if (rs6000_warn_altivec_long)
19147 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19148 "use %<int%>");
19149 }
19150 else if (type == long_long_unsigned_type_node
19151 || type == long_long_integer_type_node)
19152 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19153 "-mvsx");
19154 else if (type == double_type_node)
19155 error ("use of %<double%> in AltiVec types is invalid without %qs",
19156 "-mvsx");
19157 }
19158
19159 switch (altivec_type)
19160 {
19161 case 'v':
19162 unsigned_p = TYPE_UNSIGNED (type);
19163 switch (mode)
19164 {
19165 case E_TImode:
19166 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19167 break;
19168 case E_DImode:
19169 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19170 break;
19171 case E_SImode:
19172 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19173 break;
19174 case E_HImode:
19175 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19176 break;
19177 case E_QImode:
19178 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19179 break;
19180 case E_SFmode: result = V4SF_type_node; break;
19181 case E_DFmode: result = V2DF_type_node; break;
19182 /* If the user says 'vector int bool', we may be handed the 'bool'
19183 attribute _before_ the 'vector' attribute, and so select the
19184 proper type in the 'b' case below. */
19185 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19186 case E_V2DImode: case E_V2DFmode:
19187 result = type;
19188 default: break;
19189 }
19190 break;
19191 case 'b':
19192 switch (mode)
19193 {
19194 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19195 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19196 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19197 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19198 default: break;
19199 }
19200 break;
19201 case 'p':
19202 switch (mode)
19203 {
19204 case E_V8HImode: result = pixel_V8HI_type_node;
19205 default: break;
19206 }
19207 default: break;
19208 }
19209
19210 /* Propagate qualifiers attached to the element type
19211 onto the vector type. */
19212 if (result && result != type && TYPE_QUALS (type))
19213 result = build_qualified_type (result, TYPE_QUALS (type));
19214
19215 *no_add_attrs = true; /* No need to hang on to the attribute. */
19216
19217 if (result)
19218 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19219
19220 return NULL_TREE;
19221 }
19222
19223 /* AltiVec defines five built-in scalar types that serve as vector
19224 elements; we must teach the compiler how to mangle them. The 128-bit
19225 floating point mangling is target-specific as well. */
19226
19227 static const char *
19228 rs6000_mangle_type (const_tree type)
19229 {
19230 type = TYPE_MAIN_VARIANT (type);
19231
19232 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19233 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19234 return NULL;
19235
19236 if (type == bool_char_type_node) return "U6__boolc";
19237 if (type == bool_short_type_node) return "U6__bools";
19238 if (type == pixel_type_node) return "u7__pixel";
19239 if (type == bool_int_type_node) return "U6__booli";
19240 if (type == bool_long_long_type_node) return "U6__boolx";
19241
19242 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19243 return "g";
19244 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19245 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19246
19247 /* For all other types, use the default mangling. */
19248 return NULL;
19249 }
19250
19251 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19252 struct attribute_spec.handler. */
19253
19254 static tree
19255 rs6000_handle_longcall_attribute (tree *node, tree name,
19256 tree args ATTRIBUTE_UNUSED,
19257 int flags ATTRIBUTE_UNUSED,
19258 bool *no_add_attrs)
19259 {
19260 if (TREE_CODE (*node) != FUNCTION_TYPE
19261 && TREE_CODE (*node) != FIELD_DECL
19262 && TREE_CODE (*node) != TYPE_DECL)
19263 {
19264 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19265 name);
19266 *no_add_attrs = true;
19267 }
19268
19269 return NULL_TREE;
19270 }
19271
19272 /* Set longcall attributes on all functions declared when
19273 rs6000_default_long_calls is true. */
19274 static void
19275 rs6000_set_default_type_attributes (tree type)
19276 {
19277 if (rs6000_default_long_calls
19278 && (TREE_CODE (type) == FUNCTION_TYPE
19279 || TREE_CODE (type) == METHOD_TYPE))
19280 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19281 NULL_TREE,
19282 TYPE_ATTRIBUTES (type));
19283
19284 #if TARGET_MACHO
19285 darwin_set_default_type_attributes (type);
19286 #endif
19287 }
19288
19289 /* Return a reference suitable for calling a function with the
19290 longcall attribute. */
19291
19292 static rtx
19293 rs6000_longcall_ref (rtx call_ref, rtx arg)
19294 {
19295 /* System V adds '.' to the internal name, so skip them. */
19296 const char *call_name = XSTR (call_ref, 0);
19297 if (*call_name == '.')
19298 {
19299 while (*call_name == '.')
19300 call_name++;
19301
19302 tree node = get_identifier (call_name);
19303 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19304 }
19305
19306 if (TARGET_PLTSEQ)
19307 {
19308 rtx base = const0_rtx;
19309 int regno = 12;
19310 if (rs6000_pcrel_p (cfun))
19311 {
19312 rtx reg = gen_rtx_REG (Pmode, regno);
19313 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19314 gen_rtvec (3, base, call_ref, arg),
19315 UNSPECV_PLT_PCREL);
19316 emit_insn (gen_rtx_SET (reg, u));
19317 return reg;
19318 }
19319
19320 if (DEFAULT_ABI == ABI_ELFv2)
19321 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19322 else
19323 {
19324 if (flag_pic)
19325 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19326 regno = 11;
19327 }
19328 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19329 may be used by a function global entry point. For SysV4, r11
19330 is used by __glink_PLTresolve lazy resolver entry. */
19331 rtx reg = gen_rtx_REG (Pmode, regno);
19332 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19333 UNSPEC_PLT16_HA);
19334 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
19335 gen_rtvec (3, reg, call_ref, arg),
19336 UNSPECV_PLT16_LO);
19337 emit_insn (gen_rtx_SET (reg, hi));
19338 emit_insn (gen_rtx_SET (reg, lo));
19339 return reg;
19340 }
19341
19342 return force_reg (Pmode, call_ref);
19343 }
19344 \f
19345 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19346 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19347 #endif
19348
19349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19350 struct attribute_spec.handler. */
19351 static tree
19352 rs6000_handle_struct_attribute (tree *node, tree name,
19353 tree args ATTRIBUTE_UNUSED,
19354 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19355 {
19356 tree *type = NULL;
19357 if (DECL_P (*node))
19358 {
19359 if (TREE_CODE (*node) == TYPE_DECL)
19360 type = &TREE_TYPE (*node);
19361 }
19362 else
19363 type = node;
19364
19365 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19366 || TREE_CODE (*type) == UNION_TYPE)))
19367 {
19368 warning (OPT_Wattributes, "%qE attribute ignored", name);
19369 *no_add_attrs = true;
19370 }
19371
19372 else if ((is_attribute_p ("ms_struct", name)
19373 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19374 || ((is_attribute_p ("gcc_struct", name)
19375 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19376 {
19377 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19378 name);
19379 *no_add_attrs = true;
19380 }
19381
19382 return NULL_TREE;
19383 }
19384
19385 static bool
19386 rs6000_ms_bitfield_layout_p (const_tree record_type)
19387 {
19388 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19389 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19390 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19391 }
19392 \f
19393 #ifdef USING_ELFOS_H
19394
19395 /* A get_unnamed_section callback, used for switching to toc_section. */
19396
19397 static void
19398 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19399 {
19400 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19401 && TARGET_MINIMAL_TOC)
19402 {
19403 if (!toc_initialized)
19404 {
19405 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19406 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19407 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19408 fprintf (asm_out_file, "\t.tc ");
19409 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19410 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19411 fprintf (asm_out_file, "\n");
19412
19413 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19414 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19415 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19416 fprintf (asm_out_file, " = .+32768\n");
19417 toc_initialized = 1;
19418 }
19419 else
19420 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19421 }
19422 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19423 {
19424 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19425 if (!toc_initialized)
19426 {
19427 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19428 toc_initialized = 1;
19429 }
19430 }
19431 else
19432 {
19433 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19434 if (!toc_initialized)
19435 {
19436 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19437 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19438 fprintf (asm_out_file, " = .+32768\n");
19439 toc_initialized = 1;
19440 }
19441 }
19442 }
19443
19444 /* Implement TARGET_ASM_INIT_SECTIONS. */
19445
19446 static void
19447 rs6000_elf_asm_init_sections (void)
19448 {
19449 toc_section
19450 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19451
19452 sdata2_section
19453 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19454 SDATA2_SECTION_ASM_OP);
19455 }
19456
19457 /* Implement TARGET_SELECT_RTX_SECTION. */
19458
19459 static section *
19460 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19461 unsigned HOST_WIDE_INT align)
19462 {
19463 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19464 return toc_section;
19465 else
19466 return default_elf_select_rtx_section (mode, x, align);
19467 }
19468 \f
19469 /* For a SYMBOL_REF, set generic flags and then perform some
19470 target-specific processing.
19471
19472 When the AIX ABI is requested on a non-AIX system, replace the
19473 function name with the real name (with a leading .) rather than the
19474 function descriptor name. This saves a lot of overriding code to
19475 read the prefixes. */
19476
19477 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19478 static void
19479 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19480 {
19481 default_encode_section_info (decl, rtl, first);
19482
19483 if (first
19484 && TREE_CODE (decl) == FUNCTION_DECL
19485 && !TARGET_AIX
19486 && DEFAULT_ABI == ABI_AIX)
19487 {
19488 rtx sym_ref = XEXP (rtl, 0);
19489 size_t len = strlen (XSTR (sym_ref, 0));
19490 char *str = XALLOCAVEC (char, len + 2);
19491 str[0] = '.';
19492 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19493 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19494 }
19495 }
19496
19497 static inline bool
19498 compare_section_name (const char *section, const char *templ)
19499 {
19500 int len;
19501
19502 len = strlen (templ);
19503 return (strncmp (section, templ, len) == 0
19504 && (section[len] == 0 || section[len] == '.'));
19505 }
19506
19507 bool
19508 rs6000_elf_in_small_data_p (const_tree decl)
19509 {
19510 if (rs6000_sdata == SDATA_NONE)
19511 return false;
19512
19513 /* We want to merge strings, so we never consider them small data. */
19514 if (TREE_CODE (decl) == STRING_CST)
19515 return false;
19516
19517 /* Functions are never in the small data area. */
19518 if (TREE_CODE (decl) == FUNCTION_DECL)
19519 return false;
19520
19521 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19522 {
19523 const char *section = DECL_SECTION_NAME (decl);
19524 if (compare_section_name (section, ".sdata")
19525 || compare_section_name (section, ".sdata2")
19526 || compare_section_name (section, ".gnu.linkonce.s")
19527 || compare_section_name (section, ".sbss")
19528 || compare_section_name (section, ".sbss2")
19529 || compare_section_name (section, ".gnu.linkonce.sb")
19530 || strcmp (section, ".PPC.EMB.sdata0") == 0
19531 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19532 return true;
19533 }
19534 else
19535 {
19536 /* If we are told not to put readonly data in sdata, then don't. */
19537 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19538 && !rs6000_readonly_in_sdata)
19539 return false;
19540
19541 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19542
19543 if (size > 0
19544 && size <= g_switch_value
19545 /* If it's not public, and we're not going to reference it there,
19546 there's no need to put it in the small data section. */
19547 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19548 return true;
19549 }
19550
19551 return false;
19552 }
19553
19554 #endif /* USING_ELFOS_H */
19555 \f
19556 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19557
19558 static bool
19559 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19560 {
19561 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19562 }
19563
19564 /* Do not place thread-local symbols refs in the object blocks. */
19565
19566 static bool
19567 rs6000_use_blocks_for_decl_p (const_tree decl)
19568 {
19569 return !DECL_THREAD_LOCAL_P (decl);
19570 }
19571 \f
19572 /* Return a REG that occurs in ADDR with coefficient 1.
19573 ADDR can be effectively incremented by incrementing REG.
19574
19575 r0 is special and we must not select it as an address
19576 register by this routine since our caller will try to
19577 increment the returned register via an "la" instruction. */
19578
19579 rtx
19580 find_addr_reg (rtx addr)
19581 {
19582 while (GET_CODE (addr) == PLUS)
19583 {
19584 if (REG_P (XEXP (addr, 0))
19585 && REGNO (XEXP (addr, 0)) != 0)
19586 addr = XEXP (addr, 0);
19587 else if (REG_P (XEXP (addr, 1))
19588 && REGNO (XEXP (addr, 1)) != 0)
19589 addr = XEXP (addr, 1);
19590 else if (CONSTANT_P (XEXP (addr, 0)))
19591 addr = XEXP (addr, 1);
19592 else if (CONSTANT_P (XEXP (addr, 1)))
19593 addr = XEXP (addr, 0);
19594 else
19595 gcc_unreachable ();
19596 }
19597 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19598 return addr;
19599 }
19600
19601 void
19602 rs6000_fatal_bad_address (rtx op)
19603 {
19604 fatal_insn ("bad address", op);
19605 }
19606
19607 #if TARGET_MACHO
19608
19609 vec<branch_island, va_gc> *branch_islands;
19610
19611 /* Remember to generate a branch island for far calls to the given
19612 function. */
19613
19614 static void
19615 add_compiler_branch_island (tree label_name, tree function_name,
19616 int line_number)
19617 {
19618 branch_island bi = {function_name, label_name, line_number};
19619 vec_safe_push (branch_islands, bi);
19620 }
19621
19622 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19623 already there or not. */
19624
19625 static int
19626 no_previous_def (tree function_name)
19627 {
19628 branch_island *bi;
19629 unsigned ix;
19630
19631 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19632 if (function_name == bi->function_name)
19633 return 0;
19634 return 1;
19635 }
19636
19637 /* GET_PREV_LABEL gets the label name from the previous definition of
19638 the function. */
19639
19640 static tree
19641 get_prev_label (tree function_name)
19642 {
19643 branch_island *bi;
19644 unsigned ix;
19645
19646 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19647 if (function_name == bi->function_name)
19648 return bi->label_name;
19649 return NULL_TREE;
19650 }
19651
19652 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19653
19654 void
19655 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19656 {
19657 unsigned int length;
19658 char *symbol_name, *lazy_ptr_name;
19659 char *local_label_0;
19660 static unsigned label = 0;
19661
19662 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19663 symb = (*targetm.strip_name_encoding) (symb);
19664
19665 length = strlen (symb);
19666 symbol_name = XALLOCAVEC (char, length + 32);
19667 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19668
19669 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19670 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19671
19672 if (MACHOPIC_PURE)
19673 {
19674 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19675 fprintf (file, "\t.align 5\n");
19676
19677 fprintf (file, "%s:\n", stub);
19678 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19679
19680 label++;
19681 local_label_0 = XALLOCAVEC (char, 16);
19682 sprintf (local_label_0, "L%u$spb", label);
19683
19684 fprintf (file, "\tmflr r0\n");
19685 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19686 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19687 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19688 lazy_ptr_name, local_label_0);
19689 fprintf (file, "\tmtlr r0\n");
19690 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19691 (TARGET_64BIT ? "ldu" : "lwzu"),
19692 lazy_ptr_name, local_label_0);
19693 fprintf (file, "\tmtctr r12\n");
19694 fprintf (file, "\tbctr\n");
19695 }
19696 else /* mdynamic-no-pic or mkernel. */
19697 {
19698 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19699 fprintf (file, "\t.align 4\n");
19700
19701 fprintf (file, "%s:\n", stub);
19702 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19703
19704 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19705 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19706 (TARGET_64BIT ? "ldu" : "lwzu"),
19707 lazy_ptr_name);
19708 fprintf (file, "\tmtctr r12\n");
19709 fprintf (file, "\tbctr\n");
19710 }
19711
19712 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19713 fprintf (file, "%s:\n", lazy_ptr_name);
19714 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19715 fprintf (file, "%sdyld_stub_binding_helper\n",
19716 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19717 }
19718
19719 /* Legitimize PIC addresses. If the address is already
19720 position-independent, we return ORIG. Newly generated
19721 position-independent addresses go into a reg. This is REG if non
19722 zero, otherwise we allocate register(s) as necessary. */
19723
19724 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19725
19726 rtx
19727 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19728 rtx reg)
19729 {
19730 rtx base, offset;
19731
19732 if (reg == NULL && !reload_completed)
19733 reg = gen_reg_rtx (Pmode);
19734
19735 if (GET_CODE (orig) == CONST)
19736 {
19737 rtx reg_temp;
19738
19739 if (GET_CODE (XEXP (orig, 0)) == PLUS
19740 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19741 return orig;
19742
19743 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19744
19745 /* Use a different reg for the intermediate value, as
19746 it will be marked UNCHANGING. */
19747 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19748 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19749 Pmode, reg_temp);
19750 offset =
19751 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19752 Pmode, reg);
19753
19754 if (CONST_INT_P (offset))
19755 {
19756 if (SMALL_INT (offset))
19757 return plus_constant (Pmode, base, INTVAL (offset));
19758 else if (!reload_completed)
19759 offset = force_reg (Pmode, offset);
19760 else
19761 {
19762 rtx mem = force_const_mem (Pmode, orig);
19763 return machopic_legitimize_pic_address (mem, Pmode, reg);
19764 }
19765 }
19766 return gen_rtx_PLUS (Pmode, base, offset);
19767 }
19768
19769 /* Fall back on generic machopic code. */
19770 return machopic_legitimize_pic_address (orig, mode, reg);
19771 }
19772
19773 /* Output a .machine directive for the Darwin assembler, and call
19774 the generic start_file routine. */
19775
19776 static void
19777 rs6000_darwin_file_start (void)
19778 {
19779 static const struct
19780 {
19781 const char *arg;
19782 const char *name;
19783 HOST_WIDE_INT if_set;
19784 } mapping[] = {
19785 { "ppc64", "ppc64", MASK_64BIT },
19786 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19787 { "power4", "ppc970", 0 },
19788 { "G5", "ppc970", 0 },
19789 { "7450", "ppc7450", 0 },
19790 { "7400", "ppc7400", MASK_ALTIVEC },
19791 { "G4", "ppc7400", 0 },
19792 { "750", "ppc750", 0 },
19793 { "740", "ppc750", 0 },
19794 { "G3", "ppc750", 0 },
19795 { "604e", "ppc604e", 0 },
19796 { "604", "ppc604", 0 },
19797 { "603e", "ppc603", 0 },
19798 { "603", "ppc603", 0 },
19799 { "601", "ppc601", 0 },
19800 { NULL, "ppc", 0 } };
19801 const char *cpu_id = "";
19802 size_t i;
19803
19804 rs6000_file_start ();
19805 darwin_file_start ();
19806
19807 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19808
19809 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19810 cpu_id = rs6000_default_cpu;
19811
19812 if (global_options_set.x_rs6000_cpu_index)
19813 cpu_id = processor_target_table[rs6000_cpu_index].name;
19814
19815 /* Look through the mapping array. Pick the first name that either
19816 matches the argument, has a bit set in IF_SET that is also set
19817 in the target flags, or has a NULL name. */
19818
19819 i = 0;
19820 while (mapping[i].arg != NULL
19821 && strcmp (mapping[i].arg, cpu_id) != 0
19822 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19823 i++;
19824
19825 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19826 }
19827
19828 #endif /* TARGET_MACHO */
19829
19830 #if TARGET_ELF
19831 static int
19832 rs6000_elf_reloc_rw_mask (void)
19833 {
19834 if (flag_pic)
19835 return 3;
19836 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19837 return 2;
19838 else
19839 return 0;
19840 }
19841
19842 /* Record an element in the table of global constructors. SYMBOL is
19843 a SYMBOL_REF of the function to be called; PRIORITY is a number
19844 between 0 and MAX_INIT_PRIORITY.
19845
19846 This differs from default_named_section_asm_out_constructor in
19847 that we have special handling for -mrelocatable. */
19848
19849 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19850 static void
19851 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19852 {
19853 const char *section = ".ctors";
19854 char buf[18];
19855
19856 if (priority != DEFAULT_INIT_PRIORITY)
19857 {
19858 sprintf (buf, ".ctors.%.5u",
19859 /* Invert the numbering so the linker puts us in the proper
19860 order; constructors are run from right to left, and the
19861 linker sorts in increasing order. */
19862 MAX_INIT_PRIORITY - priority);
19863 section = buf;
19864 }
19865
19866 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19867 assemble_align (POINTER_SIZE);
19868
19869 if (DEFAULT_ABI == ABI_V4
19870 && (TARGET_RELOCATABLE || flag_pic > 1))
19871 {
19872 fputs ("\t.long (", asm_out_file);
19873 output_addr_const (asm_out_file, symbol);
19874 fputs (")@fixup\n", asm_out_file);
19875 }
19876 else
19877 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19878 }
19879
19880 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19881 static void
19882 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19883 {
19884 const char *section = ".dtors";
19885 char buf[18];
19886
19887 if (priority != DEFAULT_INIT_PRIORITY)
19888 {
19889 sprintf (buf, ".dtors.%.5u",
19890 /* Invert the numbering so the linker puts us in the proper
19891 order; constructors are run from right to left, and the
19892 linker sorts in increasing order. */
19893 MAX_INIT_PRIORITY - priority);
19894 section = buf;
19895 }
19896
19897 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19898 assemble_align (POINTER_SIZE);
19899
19900 if (DEFAULT_ABI == ABI_V4
19901 && (TARGET_RELOCATABLE || flag_pic > 1))
19902 {
19903 fputs ("\t.long (", asm_out_file);
19904 output_addr_const (asm_out_file, symbol);
19905 fputs (")@fixup\n", asm_out_file);
19906 }
19907 else
19908 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19909 }
19910
19911 void
19912 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19913 {
19914 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19915 {
19916 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19917 ASM_OUTPUT_LABEL (file, name);
19918 fputs (DOUBLE_INT_ASM_OP, file);
19919 rs6000_output_function_entry (file, name);
19920 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19921 if (DOT_SYMBOLS)
19922 {
19923 fputs ("\t.size\t", file);
19924 assemble_name (file, name);
19925 fputs (",24\n\t.type\t.", file);
19926 assemble_name (file, name);
19927 fputs (",@function\n", file);
19928 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19929 {
19930 fputs ("\t.globl\t.", file);
19931 assemble_name (file, name);
19932 putc ('\n', file);
19933 }
19934 }
19935 else
19936 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19937 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19938 rs6000_output_function_entry (file, name);
19939 fputs (":\n", file);
19940 return;
19941 }
19942
19943 int uses_toc;
19944 if (DEFAULT_ABI == ABI_V4
19945 && (TARGET_RELOCATABLE || flag_pic > 1)
19946 && !TARGET_SECURE_PLT
19947 && (!constant_pool_empty_p () || crtl->profile)
19948 && (uses_toc = uses_TOC ()))
19949 {
19950 char buf[256];
19951
19952 if (uses_toc == 2)
19953 switch_to_other_text_partition ();
19954 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19955
19956 fprintf (file, "\t.long ");
19957 assemble_name (file, toc_label_name);
19958 need_toc_init = 1;
19959 putc ('-', file);
19960 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19961 assemble_name (file, buf);
19962 putc ('\n', file);
19963 if (uses_toc == 2)
19964 switch_to_other_text_partition ();
19965 }
19966
19967 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19968 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19969
19970 if (TARGET_CMODEL == CMODEL_LARGE
19971 && rs6000_global_entry_point_prologue_needed_p ())
19972 {
19973 char buf[256];
19974
19975 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19976
19977 fprintf (file, "\t.quad .TOC.-");
19978 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19979 assemble_name (file, buf);
19980 putc ('\n', file);
19981 }
19982
19983 if (DEFAULT_ABI == ABI_AIX)
19984 {
19985 const char *desc_name, *orig_name;
19986
19987 orig_name = (*targetm.strip_name_encoding) (name);
19988 desc_name = orig_name;
19989 while (*desc_name == '.')
19990 desc_name++;
19991
19992 if (TREE_PUBLIC (decl))
19993 fprintf (file, "\t.globl %s\n", desc_name);
19994
19995 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19996 fprintf (file, "%s:\n", desc_name);
19997 fprintf (file, "\t.long %s\n", orig_name);
19998 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19999 fputs ("\t.long 0\n", file);
20000 fprintf (file, "\t.previous\n");
20001 }
20002 ASM_OUTPUT_LABEL (file, name);
20003 }
20004
20005 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20006 static void
20007 rs6000_elf_file_end (void)
20008 {
20009 #ifdef HAVE_AS_GNU_ATTRIBUTE
20010 /* ??? The value emitted depends on options active at file end.
20011 Assume anyone using #pragma or attributes that might change
20012 options knows what they are doing. */
20013 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20014 && rs6000_passes_float)
20015 {
20016 int fp;
20017
20018 if (TARGET_HARD_FLOAT)
20019 fp = 1;
20020 else
20021 fp = 2;
20022 if (rs6000_passes_long_double)
20023 {
20024 if (!TARGET_LONG_DOUBLE_128)
20025 fp |= 2 * 4;
20026 else if (TARGET_IEEEQUAD)
20027 fp |= 3 * 4;
20028 else
20029 fp |= 1 * 4;
20030 }
20031 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20032 }
20033 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20034 {
20035 if (rs6000_passes_vector)
20036 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20037 (TARGET_ALTIVEC_ABI ? 2 : 1));
20038 if (rs6000_returns_struct)
20039 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20040 aix_struct_return ? 2 : 1);
20041 }
20042 #endif
20043 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20044 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20045 file_end_indicate_exec_stack ();
20046 #endif
20047
20048 if (flag_split_stack)
20049 file_end_indicate_split_stack ();
20050
20051 if (cpu_builtin_p)
20052 {
20053 /* We have expanded a CPU builtin, so we need to emit a reference to
20054 the special symbol that LIBC uses to declare it supports the
20055 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20056 switch_to_section (data_section);
20057 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20058 fprintf (asm_out_file, "\t%s %s\n",
20059 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20060 }
20061 }
20062 #endif
20063
20064 #if TARGET_XCOFF
20065
20066 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20067 #define HAVE_XCOFF_DWARF_EXTRAS 0
20068 #endif
20069
20070 static enum unwind_info_type
20071 rs6000_xcoff_debug_unwind_info (void)
20072 {
20073 return UI_NONE;
20074 }
20075
20076 static void
20077 rs6000_xcoff_asm_output_anchor (rtx symbol)
20078 {
20079 char buffer[100];
20080
20081 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20082 SYMBOL_REF_BLOCK_OFFSET (symbol));
20083 fprintf (asm_out_file, "%s", SET_ASM_OP);
20084 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20085 fprintf (asm_out_file, ",");
20086 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20087 fprintf (asm_out_file, "\n");
20088 }
20089
20090 static void
20091 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20092 {
20093 fputs (GLOBAL_ASM_OP, stream);
20094 RS6000_OUTPUT_BASENAME (stream, name);
20095 putc ('\n', stream);
20096 }
20097
20098 /* A get_unnamed_decl callback, used for read-only sections. PTR
20099 points to the section string variable. */
20100
20101 static void
20102 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20103 {
20104 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20105 *(const char *const *) directive,
20106 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20107 }
20108
20109 /* Likewise for read-write sections. */
20110
20111 static void
20112 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20113 {
20114 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20115 *(const char *const *) directive,
20116 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20117 }
20118
20119 static void
20120 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20121 {
20122 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20123 *(const char *const *) directive,
20124 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20125 }
20126
20127 /* A get_unnamed_section callback, used for switching to toc_section. */
20128
20129 static void
20130 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20131 {
20132 if (TARGET_MINIMAL_TOC)
20133 {
20134 /* toc_section is always selected at least once from
20135 rs6000_xcoff_file_start, so this is guaranteed to
20136 always be defined once and only once in each file. */
20137 if (!toc_initialized)
20138 {
20139 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20140 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20141 toc_initialized = 1;
20142 }
20143 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20144 (TARGET_32BIT ? "" : ",3"));
20145 }
20146 else
20147 fputs ("\t.toc\n", asm_out_file);
20148 }
20149
20150 /* Implement TARGET_ASM_INIT_SECTIONS. */
20151
20152 static void
20153 rs6000_xcoff_asm_init_sections (void)
20154 {
20155 read_only_data_section
20156 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20157 &xcoff_read_only_section_name);
20158
20159 private_data_section
20160 = get_unnamed_section (SECTION_WRITE,
20161 rs6000_xcoff_output_readwrite_section_asm_op,
20162 &xcoff_private_data_section_name);
20163
20164 read_only_private_data_section
20165 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20166 &xcoff_private_rodata_section_name);
20167
20168 tls_data_section
20169 = get_unnamed_section (SECTION_TLS,
20170 rs6000_xcoff_output_tls_section_asm_op,
20171 &xcoff_tls_data_section_name);
20172
20173 tls_private_data_section
20174 = get_unnamed_section (SECTION_TLS,
20175 rs6000_xcoff_output_tls_section_asm_op,
20176 &xcoff_private_data_section_name);
20177
20178 toc_section
20179 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20180
20181 readonly_data_section = read_only_data_section;
20182 }
20183
20184 static int
20185 rs6000_xcoff_reloc_rw_mask (void)
20186 {
20187 return 3;
20188 }
20189
20190 static void
20191 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20192 tree decl ATTRIBUTE_UNUSED)
20193 {
20194 int smclass;
20195 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20196
20197 if (flags & SECTION_EXCLUDE)
20198 smclass = 4;
20199 else if (flags & SECTION_DEBUG)
20200 {
20201 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20202 return;
20203 }
20204 else if (flags & SECTION_CODE)
20205 smclass = 0;
20206 else if (flags & SECTION_TLS)
20207 smclass = 3;
20208 else if (flags & SECTION_WRITE)
20209 smclass = 2;
20210 else
20211 smclass = 1;
20212
20213 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20214 (flags & SECTION_CODE) ? "." : "",
20215 name, suffix[smclass], flags & SECTION_ENTSIZE);
20216 }
20217
20218 #define IN_NAMED_SECTION(DECL) \
20219 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20220 && DECL_SECTION_NAME (DECL) != NULL)
20221
20222 static section *
20223 rs6000_xcoff_select_section (tree decl, int reloc,
20224 unsigned HOST_WIDE_INT align)
20225 {
20226 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20227 named section. */
20228 if (align > BIGGEST_ALIGNMENT)
20229 {
20230 resolve_unique_section (decl, reloc, true);
20231 if (IN_NAMED_SECTION (decl))
20232 return get_named_section (decl, NULL, reloc);
20233 }
20234
20235 if (decl_readonly_section (decl, reloc))
20236 {
20237 if (TREE_PUBLIC (decl))
20238 return read_only_data_section;
20239 else
20240 return read_only_private_data_section;
20241 }
20242 else
20243 {
20244 #if HAVE_AS_TLS
20245 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20246 {
20247 if (TREE_PUBLIC (decl))
20248 return tls_data_section;
20249 else if (bss_initializer_p (decl))
20250 {
20251 /* Convert to COMMON to emit in BSS. */
20252 DECL_COMMON (decl) = 1;
20253 return tls_comm_section;
20254 }
20255 else
20256 return tls_private_data_section;
20257 }
20258 else
20259 #endif
20260 if (TREE_PUBLIC (decl))
20261 return data_section;
20262 else
20263 return private_data_section;
20264 }
20265 }
20266
20267 static void
20268 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20269 {
20270 const char *name;
20271
20272 /* Use select_section for private data and uninitialized data with
20273 alignment <= BIGGEST_ALIGNMENT. */
20274 if (!TREE_PUBLIC (decl)
20275 || DECL_COMMON (decl)
20276 || (DECL_INITIAL (decl) == NULL_TREE
20277 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20278 || DECL_INITIAL (decl) == error_mark_node
20279 || (flag_zero_initialized_in_bss
20280 && initializer_zerop (DECL_INITIAL (decl))))
20281 return;
20282
20283 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20284 name = (*targetm.strip_name_encoding) (name);
20285 set_decl_section_name (decl, name);
20286 }
20287
20288 /* Select section for constant in constant pool.
20289
20290 On RS/6000, all constants are in the private read-only data area.
20291 However, if this is being placed in the TOC it must be output as a
20292 toc entry. */
20293
20294 static section *
20295 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20296 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20297 {
20298 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20299 return toc_section;
20300 else
20301 return read_only_private_data_section;
20302 }
20303
20304 /* Remove any trailing [DS] or the like from the symbol name. */
20305
20306 static const char *
20307 rs6000_xcoff_strip_name_encoding (const char *name)
20308 {
20309 size_t len;
20310 if (*name == '*')
20311 name++;
20312 len = strlen (name);
20313 if (name[len - 1] == ']')
20314 return ggc_alloc_string (name, len - 4);
20315 else
20316 return name;
20317 }
20318
20319 /* Section attributes. AIX is always PIC. */
20320
20321 static unsigned int
20322 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20323 {
20324 unsigned int align;
20325 unsigned int flags = default_section_type_flags (decl, name, reloc);
20326
20327 /* Align to at least UNIT size. */
20328 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20329 align = MIN_UNITS_PER_WORD;
20330 else
20331 /* Increase alignment of large objects if not already stricter. */
20332 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20333 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20334 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20335
20336 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20337 }
20338
20339 /* Output at beginning of assembler file.
20340
20341 Initialize the section names for the RS/6000 at this point.
20342
20343 Specify filename, including full path, to assembler.
20344
20345 We want to go into the TOC section so at least one .toc will be emitted.
20346 Also, in order to output proper .bs/.es pairs, we need at least one static
20347 [RW] section emitted.
20348
20349 Finally, declare mcount when profiling to make the assembler happy. */
20350
20351 static void
20352 rs6000_xcoff_file_start (void)
20353 {
20354 rs6000_gen_section_name (&xcoff_bss_section_name,
20355 main_input_filename, ".bss_");
20356 rs6000_gen_section_name (&xcoff_private_data_section_name,
20357 main_input_filename, ".rw_");
20358 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20359 main_input_filename, ".rop_");
20360 rs6000_gen_section_name (&xcoff_read_only_section_name,
20361 main_input_filename, ".ro_");
20362 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20363 main_input_filename, ".tls_");
20364 rs6000_gen_section_name (&xcoff_tbss_section_name,
20365 main_input_filename, ".tbss_[UL]");
20366
20367 fputs ("\t.file\t", asm_out_file);
20368 output_quoted_string (asm_out_file, main_input_filename);
20369 fputc ('\n', asm_out_file);
20370 if (write_symbols != NO_DEBUG)
20371 switch_to_section (private_data_section);
20372 switch_to_section (toc_section);
20373 switch_to_section (text_section);
20374 if (profile_flag)
20375 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20376 rs6000_file_start ();
20377 }
20378
20379 /* Output at end of assembler file.
20380 On the RS/6000, referencing data should automatically pull in text. */
20381
20382 static void
20383 rs6000_xcoff_file_end (void)
20384 {
20385 switch_to_section (text_section);
20386 fputs ("_section_.text:\n", asm_out_file);
20387 switch_to_section (data_section);
20388 fputs (TARGET_32BIT
20389 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20390 asm_out_file);
20391 }
20392
20393 struct declare_alias_data
20394 {
20395 FILE *file;
20396 bool function_descriptor;
20397 };
20398
20399 /* Declare alias N. A helper function for for_node_and_aliases. */
20400
20401 static bool
20402 rs6000_declare_alias (struct symtab_node *n, void *d)
20403 {
20404 struct declare_alias_data *data = (struct declare_alias_data *)d;
20405 /* Main symbol is output specially, because varasm machinery does part of
20406 the job for us - we do not need to declare .globl/lglobs and such. */
20407 if (!n->alias || n->weakref)
20408 return false;
20409
20410 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20411 return false;
20412
20413 /* Prevent assemble_alias from trying to use .set pseudo operation
20414 that does not behave as expected by the middle-end. */
20415 TREE_ASM_WRITTEN (n->decl) = true;
20416
20417 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20418 char *buffer = (char *) alloca (strlen (name) + 2);
20419 char *p;
20420 int dollar_inside = 0;
20421
20422 strcpy (buffer, name);
20423 p = strchr (buffer, '$');
20424 while (p) {
20425 *p = '_';
20426 dollar_inside++;
20427 p = strchr (p + 1, '$');
20428 }
20429 if (TREE_PUBLIC (n->decl))
20430 {
20431 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20432 {
20433 if (dollar_inside) {
20434 if (data->function_descriptor)
20435 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20436 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20437 }
20438 if (data->function_descriptor)
20439 {
20440 fputs ("\t.globl .", data->file);
20441 RS6000_OUTPUT_BASENAME (data->file, buffer);
20442 putc ('\n', data->file);
20443 }
20444 fputs ("\t.globl ", data->file);
20445 RS6000_OUTPUT_BASENAME (data->file, buffer);
20446 putc ('\n', data->file);
20447 }
20448 #ifdef ASM_WEAKEN_DECL
20449 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20450 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20451 #endif
20452 }
20453 else
20454 {
20455 if (dollar_inside)
20456 {
20457 if (data->function_descriptor)
20458 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20459 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20460 }
20461 if (data->function_descriptor)
20462 {
20463 fputs ("\t.lglobl .", data->file);
20464 RS6000_OUTPUT_BASENAME (data->file, buffer);
20465 putc ('\n', data->file);
20466 }
20467 fputs ("\t.lglobl ", data->file);
20468 RS6000_OUTPUT_BASENAME (data->file, buffer);
20469 putc ('\n', data->file);
20470 }
20471 if (data->function_descriptor)
20472 fputs (".", data->file);
20473 RS6000_OUTPUT_BASENAME (data->file, buffer);
20474 fputs (":\n", data->file);
20475 return false;
20476 }
20477
20478
20479 #ifdef HAVE_GAS_HIDDEN
20480 /* Helper function to calculate visibility of a DECL
20481 and return the value as a const string. */
20482
20483 static const char *
20484 rs6000_xcoff_visibility (tree decl)
20485 {
20486 static const char * const visibility_types[] = {
20487 "", ",protected", ",hidden", ",internal"
20488 };
20489
20490 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20491 return visibility_types[vis];
20492 }
20493 #endif
20494
20495
20496 /* This macro produces the initial definition of a function name.
20497 On the RS/6000, we need to place an extra '.' in the function name and
20498 output the function descriptor.
20499 Dollar signs are converted to underscores.
20500
20501 The csect for the function will have already been created when
20502 text_section was selected. We do have to go back to that csect, however.
20503
20504 The third and fourth parameters to the .function pseudo-op (16 and 044)
20505 are placeholders which no longer have any use.
20506
20507 Because AIX assembler's .set command has unexpected semantics, we output
20508 all aliases as alternative labels in front of the definition. */
20509
20510 void
20511 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20512 {
20513 char *buffer = (char *) alloca (strlen (name) + 1);
20514 char *p;
20515 int dollar_inside = 0;
20516 struct declare_alias_data data = {file, false};
20517
20518 strcpy (buffer, name);
20519 p = strchr (buffer, '$');
20520 while (p) {
20521 *p = '_';
20522 dollar_inside++;
20523 p = strchr (p + 1, '$');
20524 }
20525 if (TREE_PUBLIC (decl))
20526 {
20527 if (!RS6000_WEAK || !DECL_WEAK (decl))
20528 {
20529 if (dollar_inside) {
20530 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20531 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20532 }
20533 fputs ("\t.globl .", file);
20534 RS6000_OUTPUT_BASENAME (file, buffer);
20535 #ifdef HAVE_GAS_HIDDEN
20536 fputs (rs6000_xcoff_visibility (decl), file);
20537 #endif
20538 putc ('\n', file);
20539 }
20540 }
20541 else
20542 {
20543 if (dollar_inside) {
20544 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20545 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20546 }
20547 fputs ("\t.lglobl .", file);
20548 RS6000_OUTPUT_BASENAME (file, buffer);
20549 putc ('\n', file);
20550 }
20551 fputs ("\t.csect ", file);
20552 RS6000_OUTPUT_BASENAME (file, buffer);
20553 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20554 RS6000_OUTPUT_BASENAME (file, buffer);
20555 fputs (":\n", file);
20556 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20557 &data, true);
20558 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20559 RS6000_OUTPUT_BASENAME (file, buffer);
20560 fputs (", TOC[tc0], 0\n", file);
20561 in_section = NULL;
20562 switch_to_section (function_section (decl));
20563 putc ('.', file);
20564 RS6000_OUTPUT_BASENAME (file, buffer);
20565 fputs (":\n", file);
20566 data.function_descriptor = true;
20567 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20568 &data, true);
20569 if (!DECL_IGNORED_P (decl))
20570 {
20571 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20572 xcoffout_declare_function (file, decl, buffer);
20573 else if (write_symbols == DWARF2_DEBUG)
20574 {
20575 name = (*targetm.strip_name_encoding) (name);
20576 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20577 }
20578 }
20579 return;
20580 }
20581
20582
20583 /* Output assembly language to globalize a symbol from a DECL,
20584 possibly with visibility. */
20585
20586 void
20587 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20588 {
20589 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20590 fputs (GLOBAL_ASM_OP, stream);
20591 RS6000_OUTPUT_BASENAME (stream, name);
20592 #ifdef HAVE_GAS_HIDDEN
20593 fputs (rs6000_xcoff_visibility (decl), stream);
20594 #endif
20595 putc ('\n', stream);
20596 }
20597
20598 /* Output assembly language to define a symbol as COMMON from a DECL,
20599 possibly with visibility. */
20600
20601 void
20602 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20603 tree decl ATTRIBUTE_UNUSED,
20604 const char *name,
20605 unsigned HOST_WIDE_INT size,
20606 unsigned HOST_WIDE_INT align)
20607 {
20608 unsigned HOST_WIDE_INT align2 = 2;
20609
20610 if (align > 32)
20611 align2 = floor_log2 (align / BITS_PER_UNIT);
20612 else if (size > 4)
20613 align2 = 3;
20614
20615 fputs (COMMON_ASM_OP, stream);
20616 RS6000_OUTPUT_BASENAME (stream, name);
20617
20618 fprintf (stream,
20619 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20620 size, align2);
20621
20622 #ifdef HAVE_GAS_HIDDEN
20623 if (decl != NULL)
20624 fputs (rs6000_xcoff_visibility (decl), stream);
20625 #endif
20626 putc ('\n', stream);
20627 }
20628
20629 /* This macro produces the initial definition of a object (variable) name.
20630 Because AIX assembler's .set command has unexpected semantics, we output
20631 all aliases as alternative labels in front of the definition. */
20632
20633 void
20634 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20635 {
20636 struct declare_alias_data data = {file, false};
20637 RS6000_OUTPUT_BASENAME (file, name);
20638 fputs (":\n", file);
20639 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20640 &data, true);
20641 }
20642
20643 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20644
20645 void
20646 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20647 {
20648 fputs (integer_asm_op (size, FALSE), file);
20649 assemble_name (file, label);
20650 fputs ("-$", file);
20651 }
20652
20653 /* Output a symbol offset relative to the dbase for the current object.
20654 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20655 signed offsets.
20656
20657 __gcc_unwind_dbase is embedded in all executables/libraries through
20658 libgcc/config/rs6000/crtdbase.S. */
20659
20660 void
20661 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20662 {
20663 fputs (integer_asm_op (size, FALSE), file);
20664 assemble_name (file, label);
20665 fputs("-__gcc_unwind_dbase", file);
20666 }
20667
20668 #ifdef HAVE_AS_TLS
20669 static void
20670 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20671 {
20672 rtx symbol;
20673 int flags;
20674 const char *symname;
20675
20676 default_encode_section_info (decl, rtl, first);
20677
20678 /* Careful not to prod global register variables. */
20679 if (!MEM_P (rtl))
20680 return;
20681 symbol = XEXP (rtl, 0);
20682 if (!SYMBOL_REF_P (symbol))
20683 return;
20684
20685 flags = SYMBOL_REF_FLAGS (symbol);
20686
20687 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20688 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20689
20690 SYMBOL_REF_FLAGS (symbol) = flags;
20691
20692 /* Append mapping class to extern decls. */
20693 symname = XSTR (symbol, 0);
20694 if (decl /* sync condition with assemble_external () */
20695 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20696 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20697 || TREE_CODE (decl) == FUNCTION_DECL)
20698 && symname[strlen (symname) - 1] != ']')
20699 {
20700 char *newname = (char *) alloca (strlen (symname) + 5);
20701 strcpy (newname, symname);
20702 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20703 ? "[DS]" : "[UA]"));
20704 XSTR (symbol, 0) = ggc_strdup (newname);
20705 }
20706 }
20707 #endif /* HAVE_AS_TLS */
20708 #endif /* TARGET_XCOFF */
20709
20710 void
20711 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20712 const char *name, const char *val)
20713 {
20714 fputs ("\t.weak\t", stream);
20715 RS6000_OUTPUT_BASENAME (stream, name);
20716 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20717 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20718 {
20719 if (TARGET_XCOFF)
20720 fputs ("[DS]", stream);
20721 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20722 if (TARGET_XCOFF)
20723 fputs (rs6000_xcoff_visibility (decl), stream);
20724 #endif
20725 fputs ("\n\t.weak\t.", stream);
20726 RS6000_OUTPUT_BASENAME (stream, name);
20727 }
20728 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20729 if (TARGET_XCOFF)
20730 fputs (rs6000_xcoff_visibility (decl), stream);
20731 #endif
20732 fputc ('\n', stream);
20733 if (val)
20734 {
20735 #ifdef ASM_OUTPUT_DEF
20736 ASM_OUTPUT_DEF (stream, name, val);
20737 #endif
20738 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20739 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20740 {
20741 fputs ("\t.set\t.", stream);
20742 RS6000_OUTPUT_BASENAME (stream, name);
20743 fputs (",.", stream);
20744 RS6000_OUTPUT_BASENAME (stream, val);
20745 fputc ('\n', stream);
20746 }
20747 }
20748 }
20749
20750
20751 /* Return true if INSN should not be copied. */
20752
20753 static bool
20754 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20755 {
20756 return recog_memoized (insn) >= 0
20757 && get_attr_cannot_copy (insn);
20758 }
20759
20760 /* Compute a (partial) cost for rtx X. Return true if the complete
20761 cost has been computed, and false if subexpressions should be
20762 scanned. In either case, *TOTAL contains the cost result. */
20763
20764 static bool
20765 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20766 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20767 {
20768 int code = GET_CODE (x);
20769
20770 switch (code)
20771 {
20772 /* On the RS/6000, if it is valid in the insn, it is free. */
20773 case CONST_INT:
20774 if (((outer_code == SET
20775 || outer_code == PLUS
20776 || outer_code == MINUS)
20777 && (satisfies_constraint_I (x)
20778 || satisfies_constraint_L (x)))
20779 || (outer_code == AND
20780 && (satisfies_constraint_K (x)
20781 || (mode == SImode
20782 ? satisfies_constraint_L (x)
20783 : satisfies_constraint_J (x))))
20784 || ((outer_code == IOR || outer_code == XOR)
20785 && (satisfies_constraint_K (x)
20786 || (mode == SImode
20787 ? satisfies_constraint_L (x)
20788 : satisfies_constraint_J (x))))
20789 || outer_code == ASHIFT
20790 || outer_code == ASHIFTRT
20791 || outer_code == LSHIFTRT
20792 || outer_code == ROTATE
20793 || outer_code == ROTATERT
20794 || outer_code == ZERO_EXTRACT
20795 || (outer_code == MULT
20796 && satisfies_constraint_I (x))
20797 || ((outer_code == DIV || outer_code == UDIV
20798 || outer_code == MOD || outer_code == UMOD)
20799 && exact_log2 (INTVAL (x)) >= 0)
20800 || (outer_code == COMPARE
20801 && (satisfies_constraint_I (x)
20802 || satisfies_constraint_K (x)))
20803 || ((outer_code == EQ || outer_code == NE)
20804 && (satisfies_constraint_I (x)
20805 || satisfies_constraint_K (x)
20806 || (mode == SImode
20807 ? satisfies_constraint_L (x)
20808 : satisfies_constraint_J (x))))
20809 || (outer_code == GTU
20810 && satisfies_constraint_I (x))
20811 || (outer_code == LTU
20812 && satisfies_constraint_P (x)))
20813 {
20814 *total = 0;
20815 return true;
20816 }
20817 else if ((outer_code == PLUS
20818 && reg_or_add_cint_operand (x, VOIDmode))
20819 || (outer_code == MINUS
20820 && reg_or_sub_cint_operand (x, VOIDmode))
20821 || ((outer_code == SET
20822 || outer_code == IOR
20823 || outer_code == XOR)
20824 && (INTVAL (x)
20825 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20826 {
20827 *total = COSTS_N_INSNS (1);
20828 return true;
20829 }
20830 /* FALLTHRU */
20831
20832 case CONST_DOUBLE:
20833 case CONST_WIDE_INT:
20834 case CONST:
20835 case HIGH:
20836 case SYMBOL_REF:
20837 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20838 return true;
20839
20840 case MEM:
20841 /* When optimizing for size, MEM should be slightly more expensive
20842 than generating address, e.g., (plus (reg) (const)).
20843 L1 cache latency is about two instructions. */
20844 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20845 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20846 *total += COSTS_N_INSNS (100);
20847 return true;
20848
20849 case LABEL_REF:
20850 *total = 0;
20851 return true;
20852
20853 case PLUS:
20854 case MINUS:
20855 if (FLOAT_MODE_P (mode))
20856 *total = rs6000_cost->fp;
20857 else
20858 *total = COSTS_N_INSNS (1);
20859 return false;
20860
20861 case MULT:
20862 if (CONST_INT_P (XEXP (x, 1))
20863 && satisfies_constraint_I (XEXP (x, 1)))
20864 {
20865 if (INTVAL (XEXP (x, 1)) >= -256
20866 && INTVAL (XEXP (x, 1)) <= 255)
20867 *total = rs6000_cost->mulsi_const9;
20868 else
20869 *total = rs6000_cost->mulsi_const;
20870 }
20871 else if (mode == SFmode)
20872 *total = rs6000_cost->fp;
20873 else if (FLOAT_MODE_P (mode))
20874 *total = rs6000_cost->dmul;
20875 else if (mode == DImode)
20876 *total = rs6000_cost->muldi;
20877 else
20878 *total = rs6000_cost->mulsi;
20879 return false;
20880
20881 case FMA:
20882 if (mode == SFmode)
20883 *total = rs6000_cost->fp;
20884 else
20885 *total = rs6000_cost->dmul;
20886 break;
20887
20888 case DIV:
20889 case MOD:
20890 if (FLOAT_MODE_P (mode))
20891 {
20892 *total = mode == DFmode ? rs6000_cost->ddiv
20893 : rs6000_cost->sdiv;
20894 return false;
20895 }
20896 /* FALLTHRU */
20897
20898 case UDIV:
20899 case UMOD:
20900 if (CONST_INT_P (XEXP (x, 1))
20901 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20902 {
20903 if (code == DIV || code == MOD)
20904 /* Shift, addze */
20905 *total = COSTS_N_INSNS (2);
20906 else
20907 /* Shift */
20908 *total = COSTS_N_INSNS (1);
20909 }
20910 else
20911 {
20912 if (GET_MODE (XEXP (x, 1)) == DImode)
20913 *total = rs6000_cost->divdi;
20914 else
20915 *total = rs6000_cost->divsi;
20916 }
20917 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20918 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20919 *total += COSTS_N_INSNS (2);
20920 return false;
20921
20922 case CTZ:
20923 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20924 return false;
20925
20926 case FFS:
20927 *total = COSTS_N_INSNS (4);
20928 return false;
20929
20930 case POPCOUNT:
20931 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20932 return false;
20933
20934 case PARITY:
20935 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20936 return false;
20937
20938 case NOT:
20939 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20940 *total = 0;
20941 else
20942 *total = COSTS_N_INSNS (1);
20943 return false;
20944
20945 case AND:
20946 if (CONST_INT_P (XEXP (x, 1)))
20947 {
20948 rtx left = XEXP (x, 0);
20949 rtx_code left_code = GET_CODE (left);
20950
20951 /* rotate-and-mask: 1 insn. */
20952 if ((left_code == ROTATE
20953 || left_code == ASHIFT
20954 || left_code == LSHIFTRT)
20955 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20956 {
20957 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20958 if (!CONST_INT_P (XEXP (left, 1)))
20959 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20960 *total += COSTS_N_INSNS (1);
20961 return true;
20962 }
20963
20964 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20965 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20966 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20967 || (val & 0xffff) == val
20968 || (val & 0xffff0000) == val
20969 || ((val & 0xffff) == 0 && mode == SImode))
20970 {
20971 *total = rtx_cost (left, mode, AND, 0, speed);
20972 *total += COSTS_N_INSNS (1);
20973 return true;
20974 }
20975
20976 /* 2 insns. */
20977 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20978 {
20979 *total = rtx_cost (left, mode, AND, 0, speed);
20980 *total += COSTS_N_INSNS (2);
20981 return true;
20982 }
20983 }
20984
20985 *total = COSTS_N_INSNS (1);
20986 return false;
20987
20988 case IOR:
20989 /* FIXME */
20990 *total = COSTS_N_INSNS (1);
20991 return true;
20992
20993 case CLZ:
20994 case XOR:
20995 case ZERO_EXTRACT:
20996 *total = COSTS_N_INSNS (1);
20997 return false;
20998
20999 case ASHIFT:
21000 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21001 the sign extend and shift separately within the insn. */
21002 if (TARGET_EXTSWSLI && mode == DImode
21003 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21004 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21005 {
21006 *total = 0;
21007 return false;
21008 }
21009 /* fall through */
21010
21011 case ASHIFTRT:
21012 case LSHIFTRT:
21013 case ROTATE:
21014 case ROTATERT:
21015 /* Handle mul_highpart. */
21016 if (outer_code == TRUNCATE
21017 && GET_CODE (XEXP (x, 0)) == MULT)
21018 {
21019 if (mode == DImode)
21020 *total = rs6000_cost->muldi;
21021 else
21022 *total = rs6000_cost->mulsi;
21023 return true;
21024 }
21025 else if (outer_code == AND)
21026 *total = 0;
21027 else
21028 *total = COSTS_N_INSNS (1);
21029 return false;
21030
21031 case SIGN_EXTEND:
21032 case ZERO_EXTEND:
21033 if (MEM_P (XEXP (x, 0)))
21034 *total = 0;
21035 else
21036 *total = COSTS_N_INSNS (1);
21037 return false;
21038
21039 case COMPARE:
21040 case NEG:
21041 case ABS:
21042 if (!FLOAT_MODE_P (mode))
21043 {
21044 *total = COSTS_N_INSNS (1);
21045 return false;
21046 }
21047 /* FALLTHRU */
21048
21049 case FLOAT:
21050 case UNSIGNED_FLOAT:
21051 case FIX:
21052 case UNSIGNED_FIX:
21053 case FLOAT_TRUNCATE:
21054 *total = rs6000_cost->fp;
21055 return false;
21056
21057 case FLOAT_EXTEND:
21058 if (mode == DFmode)
21059 *total = rs6000_cost->sfdf_convert;
21060 else
21061 *total = rs6000_cost->fp;
21062 return false;
21063
21064 case CALL:
21065 case IF_THEN_ELSE:
21066 if (!speed)
21067 {
21068 *total = COSTS_N_INSNS (1);
21069 return true;
21070 }
21071 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21072 {
21073 *total = rs6000_cost->fp;
21074 return false;
21075 }
21076 break;
21077
21078 case NE:
21079 case EQ:
21080 case GTU:
21081 case LTU:
21082 /* Carry bit requires mode == Pmode.
21083 NEG or PLUS already counted so only add one. */
21084 if (mode == Pmode
21085 && (outer_code == NEG || outer_code == PLUS))
21086 {
21087 *total = COSTS_N_INSNS (1);
21088 return true;
21089 }
21090 /* FALLTHRU */
21091
21092 case GT:
21093 case LT:
21094 case UNORDERED:
21095 if (outer_code == SET)
21096 {
21097 if (XEXP (x, 1) == const0_rtx)
21098 {
21099 *total = COSTS_N_INSNS (2);
21100 return true;
21101 }
21102 else
21103 {
21104 *total = COSTS_N_INSNS (3);
21105 return false;
21106 }
21107 }
21108 /* CC COMPARE. */
21109 if (outer_code == COMPARE)
21110 {
21111 *total = 0;
21112 return true;
21113 }
21114 break;
21115
21116 default:
21117 break;
21118 }
21119
21120 return false;
21121 }
21122
21123 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21124
21125 static bool
21126 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21127 int opno, int *total, bool speed)
21128 {
21129 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21130
21131 fprintf (stderr,
21132 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21133 "opno = %d, total = %d, speed = %s, x:\n",
21134 ret ? "complete" : "scan inner",
21135 GET_MODE_NAME (mode),
21136 GET_RTX_NAME (outer_code),
21137 opno,
21138 *total,
21139 speed ? "true" : "false");
21140
21141 debug_rtx (x);
21142
21143 return ret;
21144 }
21145
21146 static int
21147 rs6000_insn_cost (rtx_insn *insn, bool speed)
21148 {
21149 if (recog_memoized (insn) < 0)
21150 return 0;
21151
21152 /* If we are optimizing for size, just use the length. */
21153 if (!speed)
21154 return get_attr_length (insn);
21155
21156 /* Use the cost if provided. */
21157 int cost = get_attr_cost (insn);
21158 if (cost > 0)
21159 return cost;
21160
21161 /* If the insn tells us how many insns there are, use that. Otherwise use
21162 the length/4. Adjust the insn length to remove the extra size that
21163 prefixed instructions take. */
21164 int n = get_attr_num_insns (insn);
21165 if (n == 0)
21166 {
21167 int length = get_attr_length (insn);
21168 if (get_attr_prefixed (insn) == PREFIXED_YES)
21169 {
21170 int adjust = 0;
21171 ADJUST_INSN_LENGTH (insn, adjust);
21172 length -= adjust;
21173 }
21174
21175 n = length / 4;
21176 }
21177
21178 enum attr_type type = get_attr_type (insn);
21179
21180 switch (type)
21181 {
21182 case TYPE_LOAD:
21183 case TYPE_FPLOAD:
21184 case TYPE_VECLOAD:
21185 cost = COSTS_N_INSNS (n + 1);
21186 break;
21187
21188 case TYPE_MUL:
21189 switch (get_attr_size (insn))
21190 {
21191 case SIZE_8:
21192 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21193 break;
21194 case SIZE_16:
21195 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21196 break;
21197 case SIZE_32:
21198 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21199 break;
21200 case SIZE_64:
21201 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21202 break;
21203 default:
21204 gcc_unreachable ();
21205 }
21206 break;
21207 case TYPE_DIV:
21208 switch (get_attr_size (insn))
21209 {
21210 case SIZE_32:
21211 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21212 break;
21213 case SIZE_64:
21214 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21215 break;
21216 default:
21217 gcc_unreachable ();
21218 }
21219 break;
21220
21221 case TYPE_FP:
21222 cost = n * rs6000_cost->fp;
21223 break;
21224 case TYPE_DMUL:
21225 cost = n * rs6000_cost->dmul;
21226 break;
21227 case TYPE_SDIV:
21228 cost = n * rs6000_cost->sdiv;
21229 break;
21230 case TYPE_DDIV:
21231 cost = n * rs6000_cost->ddiv;
21232 break;
21233
21234 case TYPE_SYNC:
21235 case TYPE_LOAD_L:
21236 case TYPE_MFCR:
21237 case TYPE_MFCRF:
21238 cost = COSTS_N_INSNS (n + 2);
21239 break;
21240
21241 default:
21242 cost = COSTS_N_INSNS (n);
21243 }
21244
21245 return cost;
21246 }
21247
21248 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21249
21250 static int
21251 rs6000_debug_address_cost (rtx x, machine_mode mode,
21252 addr_space_t as, bool speed)
21253 {
21254 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21255
21256 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21257 ret, speed ? "true" : "false");
21258 debug_rtx (x);
21259
21260 return ret;
21261 }
21262
21263
21264 /* A C expression returning the cost of moving data from a register of class
21265 CLASS1 to one of CLASS2. */
21266
21267 static int
21268 rs6000_register_move_cost (machine_mode mode,
21269 reg_class_t from, reg_class_t to)
21270 {
21271 int ret;
21272 reg_class_t rclass;
21273
21274 if (TARGET_DEBUG_COST)
21275 dbg_cost_ctrl++;
21276
21277 /* If we have VSX, we can easily move between FPR or Altivec registers,
21278 otherwise we can only easily move within classes.
21279 Do this first so we give best-case answers for union classes
21280 containing both gprs and vsx regs. */
21281 HARD_REG_SET to_vsx, from_vsx;
21282 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21283 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21284 if (!hard_reg_set_empty_p (to_vsx)
21285 && !hard_reg_set_empty_p (from_vsx)
21286 && (TARGET_VSX
21287 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21288 {
21289 int reg = FIRST_FPR_REGNO;
21290 if (TARGET_VSX
21291 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21292 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21293 reg = FIRST_ALTIVEC_REGNO;
21294 ret = 2 * hard_regno_nregs (reg, mode);
21295 }
21296
21297 /* Moves from/to GENERAL_REGS. */
21298 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21299 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21300 {
21301 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21302 {
21303 if (TARGET_DIRECT_MOVE)
21304 {
21305 /* Keep the cost for direct moves above that for within
21306 a register class even if the actual processor cost is
21307 comparable. We do this because a direct move insn
21308 can't be a nop, whereas with ideal register
21309 allocation a move within the same class might turn
21310 out to be a nop. */
21311 if (rs6000_tune == PROCESSOR_POWER9
21312 || rs6000_tune == PROCESSOR_FUTURE)
21313 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21314 else
21315 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21316 /* SFmode requires a conversion when moving between gprs
21317 and vsx. */
21318 if (mode == SFmode)
21319 ret += 2;
21320 }
21321 else
21322 ret = (rs6000_memory_move_cost (mode, rclass, false)
21323 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21324 }
21325
21326 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21327 shift. */
21328 else if (rclass == CR_REGS)
21329 ret = 4;
21330
21331 /* For those processors that have slow LR/CTR moves, make them more
21332 expensive than memory in order to bias spills to memory .*/
21333 else if ((rs6000_tune == PROCESSOR_POWER6
21334 || rs6000_tune == PROCESSOR_POWER7
21335 || rs6000_tune == PROCESSOR_POWER8
21336 || rs6000_tune == PROCESSOR_POWER9)
21337 && reg_class_subset_p (rclass, SPECIAL_REGS))
21338 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21339
21340 else
21341 /* A move will cost one instruction per GPR moved. */
21342 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21343 }
21344
21345 /* Everything else has to go through GENERAL_REGS. */
21346 else
21347 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21348 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21349
21350 if (TARGET_DEBUG_COST)
21351 {
21352 if (dbg_cost_ctrl == 1)
21353 fprintf (stderr,
21354 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21355 ret, GET_MODE_NAME (mode), reg_class_names[from],
21356 reg_class_names[to]);
21357 dbg_cost_ctrl--;
21358 }
21359
21360 return ret;
21361 }
21362
21363 /* A C expressions returning the cost of moving data of MODE from a register to
21364 or from memory. */
21365
21366 static int
21367 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21368 bool in ATTRIBUTE_UNUSED)
21369 {
21370 int ret;
21371
21372 if (TARGET_DEBUG_COST)
21373 dbg_cost_ctrl++;
21374
21375 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21376 ret = 4 * hard_regno_nregs (0, mode);
21377 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21378 || reg_classes_intersect_p (rclass, VSX_REGS)))
21379 ret = 4 * hard_regno_nregs (32, mode);
21380 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21381 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21382 else
21383 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21384
21385 if (TARGET_DEBUG_COST)
21386 {
21387 if (dbg_cost_ctrl == 1)
21388 fprintf (stderr,
21389 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21390 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21391 dbg_cost_ctrl--;
21392 }
21393
21394 return ret;
21395 }
21396
21397 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21398
21399 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21400 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21401 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21402 move cost between GENERAL_REGS and VSX_REGS low.
21403
21404 It might seem reasonable to use a union class. After all, if usage
21405 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21406 rather than memory. However, in cases where register pressure of
21407 both is high, like the cactus_adm spec test, allowing
21408 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21409 the first scheduling pass. This is partly due to an allocno of
21410 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21411 class, which gives too high a pressure for GENERAL_REGS and too low
21412 for VSX_REGS. So, force a choice of the subclass here.
21413
21414 The best class is also the union if GENERAL_REGS and VSX_REGS have
21415 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21416 allocno class, since trying to narrow down the class by regno mode
21417 is prone to error. For example, SImode is allowed in VSX regs and
21418 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21419 it would be wrong to choose an allocno of GENERAL_REGS based on
21420 SImode. */
21421
21422 static reg_class_t
21423 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21424 reg_class_t allocno_class,
21425 reg_class_t best_class)
21426 {
21427 switch (allocno_class)
21428 {
21429 case GEN_OR_VSX_REGS:
21430 /* best_class must be a subset of allocno_class. */
21431 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21432 || best_class == GEN_OR_FLOAT_REGS
21433 || best_class == VSX_REGS
21434 || best_class == ALTIVEC_REGS
21435 || best_class == FLOAT_REGS
21436 || best_class == GENERAL_REGS
21437 || best_class == BASE_REGS);
21438 /* Use best_class but choose wider classes when copying from the
21439 wider class to best_class is cheap. This mimics IRA choice
21440 of allocno class. */
21441 if (best_class == BASE_REGS)
21442 return GENERAL_REGS;
21443 if (TARGET_VSX
21444 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21445 return VSX_REGS;
21446 return best_class;
21447
21448 default:
21449 break;
21450 }
21451
21452 return allocno_class;
21453 }
21454
21455 /* Returns a code for a target-specific builtin that implements
21456 reciprocal of the function, or NULL_TREE if not available. */
21457
21458 static tree
21459 rs6000_builtin_reciprocal (tree fndecl)
21460 {
21461 switch (DECL_MD_FUNCTION_CODE (fndecl))
21462 {
21463 case VSX_BUILTIN_XVSQRTDP:
21464 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21465 return NULL_TREE;
21466
21467 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21468
21469 case VSX_BUILTIN_XVSQRTSP:
21470 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21471 return NULL_TREE;
21472
21473 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21474
21475 default:
21476 return NULL_TREE;
21477 }
21478 }
21479
21480 /* Load up a constant. If the mode is a vector mode, splat the value across
21481 all of the vector elements. */
21482
21483 static rtx
21484 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21485 {
21486 rtx reg;
21487
21488 if (mode == SFmode || mode == DFmode)
21489 {
21490 rtx d = const_double_from_real_value (dconst, mode);
21491 reg = force_reg (mode, d);
21492 }
21493 else if (mode == V4SFmode)
21494 {
21495 rtx d = const_double_from_real_value (dconst, SFmode);
21496 rtvec v = gen_rtvec (4, d, d, d, d);
21497 reg = gen_reg_rtx (mode);
21498 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21499 }
21500 else if (mode == V2DFmode)
21501 {
21502 rtx d = const_double_from_real_value (dconst, DFmode);
21503 rtvec v = gen_rtvec (2, d, d);
21504 reg = gen_reg_rtx (mode);
21505 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21506 }
21507 else
21508 gcc_unreachable ();
21509
21510 return reg;
21511 }
21512
21513 /* Generate an FMA instruction. */
21514
21515 static void
21516 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21517 {
21518 machine_mode mode = GET_MODE (target);
21519 rtx dst;
21520
21521 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21522 gcc_assert (dst != NULL);
21523
21524 if (dst != target)
21525 emit_move_insn (target, dst);
21526 }
21527
21528 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21529
21530 static void
21531 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21532 {
21533 machine_mode mode = GET_MODE (dst);
21534 rtx r;
21535
21536 /* This is a tad more complicated, since the fnma_optab is for
21537 a different expression: fma(-m1, m2, a), which is the same
21538 thing except in the case of signed zeros.
21539
21540 Fortunately we know that if FMA is supported that FNMSUB is
21541 also supported in the ISA. Just expand it directly. */
21542
21543 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21544
21545 r = gen_rtx_NEG (mode, a);
21546 r = gen_rtx_FMA (mode, m1, m2, r);
21547 r = gen_rtx_NEG (mode, r);
21548 emit_insn (gen_rtx_SET (dst, r));
21549 }
21550
21551 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21552 add a reg_note saying that this was a division. Support both scalar and
21553 vector divide. Assumes no trapping math and finite arguments. */
21554
21555 void
21556 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21557 {
21558 machine_mode mode = GET_MODE (dst);
21559 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21560 int i;
21561
21562 /* Low precision estimates guarantee 5 bits of accuracy. High
21563 precision estimates guarantee 14 bits of accuracy. SFmode
21564 requires 23 bits of accuracy. DFmode requires 52 bits of
21565 accuracy. Each pass at least doubles the accuracy, leading
21566 to the following. */
21567 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21568 if (mode == DFmode || mode == V2DFmode)
21569 passes++;
21570
21571 enum insn_code code = optab_handler (smul_optab, mode);
21572 insn_gen_fn gen_mul = GEN_FCN (code);
21573
21574 gcc_assert (code != CODE_FOR_nothing);
21575
21576 one = rs6000_load_constant_and_splat (mode, dconst1);
21577
21578 /* x0 = 1./d estimate */
21579 x0 = gen_reg_rtx (mode);
21580 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21581 UNSPEC_FRES)));
21582
21583 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21584 if (passes > 1) {
21585
21586 /* e0 = 1. - d * x0 */
21587 e0 = gen_reg_rtx (mode);
21588 rs6000_emit_nmsub (e0, d, x0, one);
21589
21590 /* x1 = x0 + e0 * x0 */
21591 x1 = gen_reg_rtx (mode);
21592 rs6000_emit_madd (x1, e0, x0, x0);
21593
21594 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21595 ++i, xprev = xnext, eprev = enext) {
21596
21597 /* enext = eprev * eprev */
21598 enext = gen_reg_rtx (mode);
21599 emit_insn (gen_mul (enext, eprev, eprev));
21600
21601 /* xnext = xprev + enext * xprev */
21602 xnext = gen_reg_rtx (mode);
21603 rs6000_emit_madd (xnext, enext, xprev, xprev);
21604 }
21605
21606 } else
21607 xprev = x0;
21608
21609 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21610
21611 /* u = n * xprev */
21612 u = gen_reg_rtx (mode);
21613 emit_insn (gen_mul (u, n, xprev));
21614
21615 /* v = n - (d * u) */
21616 v = gen_reg_rtx (mode);
21617 rs6000_emit_nmsub (v, d, u, n);
21618
21619 /* dst = (v * xprev) + u */
21620 rs6000_emit_madd (dst, v, xprev, u);
21621
21622 if (note_p)
21623 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21624 }
21625
21626 /* Goldschmidt's Algorithm for single/double-precision floating point
21627 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21628
21629 void
21630 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21631 {
21632 machine_mode mode = GET_MODE (src);
21633 rtx e = gen_reg_rtx (mode);
21634 rtx g = gen_reg_rtx (mode);
21635 rtx h = gen_reg_rtx (mode);
21636
21637 /* Low precision estimates guarantee 5 bits of accuracy. High
21638 precision estimates guarantee 14 bits of accuracy. SFmode
21639 requires 23 bits of accuracy. DFmode requires 52 bits of
21640 accuracy. Each pass at least doubles the accuracy, leading
21641 to the following. */
21642 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21643 if (mode == DFmode || mode == V2DFmode)
21644 passes++;
21645
21646 int i;
21647 rtx mhalf;
21648 enum insn_code code = optab_handler (smul_optab, mode);
21649 insn_gen_fn gen_mul = GEN_FCN (code);
21650
21651 gcc_assert (code != CODE_FOR_nothing);
21652
21653 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21654
21655 /* e = rsqrt estimate */
21656 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21657 UNSPEC_RSQRT)));
21658
21659 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21660 if (!recip)
21661 {
21662 rtx zero = force_reg (mode, CONST0_RTX (mode));
21663
21664 if (mode == SFmode)
21665 {
21666 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21667 e, zero, mode, 0);
21668 if (target != e)
21669 emit_move_insn (e, target);
21670 }
21671 else
21672 {
21673 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21674 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21675 }
21676 }
21677
21678 /* g = sqrt estimate. */
21679 emit_insn (gen_mul (g, e, src));
21680 /* h = 1/(2*sqrt) estimate. */
21681 emit_insn (gen_mul (h, e, mhalf));
21682
21683 if (recip)
21684 {
21685 if (passes == 1)
21686 {
21687 rtx t = gen_reg_rtx (mode);
21688 rs6000_emit_nmsub (t, g, h, mhalf);
21689 /* Apply correction directly to 1/rsqrt estimate. */
21690 rs6000_emit_madd (dst, e, t, e);
21691 }
21692 else
21693 {
21694 for (i = 0; i < passes; i++)
21695 {
21696 rtx t1 = gen_reg_rtx (mode);
21697 rtx g1 = gen_reg_rtx (mode);
21698 rtx h1 = gen_reg_rtx (mode);
21699
21700 rs6000_emit_nmsub (t1, g, h, mhalf);
21701 rs6000_emit_madd (g1, g, t1, g);
21702 rs6000_emit_madd (h1, h, t1, h);
21703
21704 g = g1;
21705 h = h1;
21706 }
21707 /* Multiply by 2 for 1/rsqrt. */
21708 emit_insn (gen_add3_insn (dst, h, h));
21709 }
21710 }
21711 else
21712 {
21713 rtx t = gen_reg_rtx (mode);
21714 rs6000_emit_nmsub (t, g, h, mhalf);
21715 rs6000_emit_madd (dst, g, t, g);
21716 }
21717
21718 return;
21719 }
21720
21721 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21722 (Power7) targets. DST is the target, and SRC is the argument operand. */
21723
21724 void
21725 rs6000_emit_popcount (rtx dst, rtx src)
21726 {
21727 machine_mode mode = GET_MODE (dst);
21728 rtx tmp1, tmp2;
21729
21730 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21731 if (TARGET_POPCNTD)
21732 {
21733 if (mode == SImode)
21734 emit_insn (gen_popcntdsi2 (dst, src));
21735 else
21736 emit_insn (gen_popcntddi2 (dst, src));
21737 return;
21738 }
21739
21740 tmp1 = gen_reg_rtx (mode);
21741
21742 if (mode == SImode)
21743 {
21744 emit_insn (gen_popcntbsi2 (tmp1, src));
21745 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21746 NULL_RTX, 0);
21747 tmp2 = force_reg (SImode, tmp2);
21748 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21749 }
21750 else
21751 {
21752 emit_insn (gen_popcntbdi2 (tmp1, src));
21753 tmp2 = expand_mult (DImode, tmp1,
21754 GEN_INT ((HOST_WIDE_INT)
21755 0x01010101 << 32 | 0x01010101),
21756 NULL_RTX, 0);
21757 tmp2 = force_reg (DImode, tmp2);
21758 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21759 }
21760 }
21761
21762
21763 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21764 target, and SRC is the argument operand. */
21765
21766 void
21767 rs6000_emit_parity (rtx dst, rtx src)
21768 {
21769 machine_mode mode = GET_MODE (dst);
21770 rtx tmp;
21771
21772 tmp = gen_reg_rtx (mode);
21773
21774 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21775 if (TARGET_CMPB)
21776 {
21777 if (mode == SImode)
21778 {
21779 emit_insn (gen_popcntbsi2 (tmp, src));
21780 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21781 }
21782 else
21783 {
21784 emit_insn (gen_popcntbdi2 (tmp, src));
21785 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21786 }
21787 return;
21788 }
21789
21790 if (mode == SImode)
21791 {
21792 /* Is mult+shift >= shift+xor+shift+xor? */
21793 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21794 {
21795 rtx tmp1, tmp2, tmp3, tmp4;
21796
21797 tmp1 = gen_reg_rtx (SImode);
21798 emit_insn (gen_popcntbsi2 (tmp1, src));
21799
21800 tmp2 = gen_reg_rtx (SImode);
21801 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21802 tmp3 = gen_reg_rtx (SImode);
21803 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21804
21805 tmp4 = gen_reg_rtx (SImode);
21806 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21807 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21808 }
21809 else
21810 rs6000_emit_popcount (tmp, src);
21811 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21812 }
21813 else
21814 {
21815 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21816 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21817 {
21818 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21819
21820 tmp1 = gen_reg_rtx (DImode);
21821 emit_insn (gen_popcntbdi2 (tmp1, src));
21822
21823 tmp2 = gen_reg_rtx (DImode);
21824 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21825 tmp3 = gen_reg_rtx (DImode);
21826 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21827
21828 tmp4 = gen_reg_rtx (DImode);
21829 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21830 tmp5 = gen_reg_rtx (DImode);
21831 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21832
21833 tmp6 = gen_reg_rtx (DImode);
21834 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21835 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21836 }
21837 else
21838 rs6000_emit_popcount (tmp, src);
21839 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21840 }
21841 }
21842
21843 /* Expand an Altivec constant permutation for little endian mode.
21844 OP0 and OP1 are the input vectors and TARGET is the output vector.
21845 SEL specifies the constant permutation vector.
21846
21847 There are two issues: First, the two input operands must be
21848 swapped so that together they form a double-wide array in LE
21849 order. Second, the vperm instruction has surprising behavior
21850 in LE mode: it interprets the elements of the source vectors
21851 in BE mode ("left to right") and interprets the elements of
21852 the destination vector in LE mode ("right to left"). To
21853 correct for this, we must subtract each element of the permute
21854 control vector from 31.
21855
21856 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21857 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21858 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21859 serve as the permute control vector. Then, in BE mode,
21860
21861 vperm 9,10,11,12
21862
21863 places the desired result in vr9. However, in LE mode the
21864 vector contents will be
21865
21866 vr10 = 00000003 00000002 00000001 00000000
21867 vr11 = 00000007 00000006 00000005 00000004
21868
21869 The result of the vperm using the same permute control vector is
21870
21871 vr9 = 05000000 07000000 01000000 03000000
21872
21873 That is, the leftmost 4 bytes of vr10 are interpreted as the
21874 source for the rightmost 4 bytes of vr9, and so on.
21875
21876 If we change the permute control vector to
21877
21878 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21879
21880 and issue
21881
21882 vperm 9,11,10,12
21883
21884 we get the desired
21885
21886 vr9 = 00000006 00000004 00000002 00000000. */
21887
21888 static void
21889 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21890 const vec_perm_indices &sel)
21891 {
21892 unsigned int i;
21893 rtx perm[16];
21894 rtx constv, unspec;
21895
21896 /* Unpack and adjust the constant selector. */
21897 for (i = 0; i < 16; ++i)
21898 {
21899 unsigned int elt = 31 - (sel[i] & 31);
21900 perm[i] = GEN_INT (elt);
21901 }
21902
21903 /* Expand to a permute, swapping the inputs and using the
21904 adjusted selector. */
21905 if (!REG_P (op0))
21906 op0 = force_reg (V16QImode, op0);
21907 if (!REG_P (op1))
21908 op1 = force_reg (V16QImode, op1);
21909
21910 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21911 constv = force_reg (V16QImode, constv);
21912 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21913 UNSPEC_VPERM);
21914 if (!REG_P (target))
21915 {
21916 rtx tmp = gen_reg_rtx (V16QImode);
21917 emit_move_insn (tmp, unspec);
21918 unspec = tmp;
21919 }
21920
21921 emit_move_insn (target, unspec);
21922 }
21923
21924 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21925 permute control vector. But here it's not a constant, so we must
21926 generate a vector NAND or NOR to do the adjustment. */
21927
21928 void
21929 altivec_expand_vec_perm_le (rtx operands[4])
21930 {
21931 rtx notx, iorx, unspec;
21932 rtx target = operands[0];
21933 rtx op0 = operands[1];
21934 rtx op1 = operands[2];
21935 rtx sel = operands[3];
21936 rtx tmp = target;
21937 rtx norreg = gen_reg_rtx (V16QImode);
21938 machine_mode mode = GET_MODE (target);
21939
21940 /* Get everything in regs so the pattern matches. */
21941 if (!REG_P (op0))
21942 op0 = force_reg (mode, op0);
21943 if (!REG_P (op1))
21944 op1 = force_reg (mode, op1);
21945 if (!REG_P (sel))
21946 sel = force_reg (V16QImode, sel);
21947 if (!REG_P (target))
21948 tmp = gen_reg_rtx (mode);
21949
21950 if (TARGET_P9_VECTOR)
21951 {
21952 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21953 UNSPEC_VPERMR);
21954 }
21955 else
21956 {
21957 /* Invert the selector with a VNAND if available, else a VNOR.
21958 The VNAND is preferred for future fusion opportunities. */
21959 notx = gen_rtx_NOT (V16QImode, sel);
21960 iorx = (TARGET_P8_VECTOR
21961 ? gen_rtx_IOR (V16QImode, notx, notx)
21962 : gen_rtx_AND (V16QImode, notx, notx));
21963 emit_insn (gen_rtx_SET (norreg, iorx));
21964
21965 /* Permute with operands reversed and adjusted selector. */
21966 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21967 UNSPEC_VPERM);
21968 }
21969
21970 /* Copy into target, possibly by way of a register. */
21971 if (!REG_P (target))
21972 {
21973 emit_move_insn (tmp, unspec);
21974 unspec = tmp;
21975 }
21976
21977 emit_move_insn (target, unspec);
21978 }
21979
21980 /* Expand an Altivec constant permutation. Return true if we match
21981 an efficient implementation; false to fall back to VPERM.
21982
21983 OP0 and OP1 are the input vectors and TARGET is the output vector.
21984 SEL specifies the constant permutation vector. */
21985
21986 static bool
21987 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21988 const vec_perm_indices &sel)
21989 {
21990 struct altivec_perm_insn {
21991 HOST_WIDE_INT mask;
21992 enum insn_code impl;
21993 unsigned char perm[16];
21994 };
21995 static const struct altivec_perm_insn patterns[] = {
21996 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21997 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21998 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21999 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22000 { OPTION_MASK_ALTIVEC,
22001 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22002 : CODE_FOR_altivec_vmrglb_direct),
22003 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22004 { OPTION_MASK_ALTIVEC,
22005 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22006 : CODE_FOR_altivec_vmrglh_direct),
22007 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22008 { OPTION_MASK_ALTIVEC,
22009 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22010 : CODE_FOR_altivec_vmrglw_direct),
22011 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22012 { OPTION_MASK_ALTIVEC,
22013 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22014 : CODE_FOR_altivec_vmrghb_direct),
22015 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22016 { OPTION_MASK_ALTIVEC,
22017 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22018 : CODE_FOR_altivec_vmrghh_direct),
22019 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22020 { OPTION_MASK_ALTIVEC,
22021 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22022 : CODE_FOR_altivec_vmrghw_direct),
22023 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22024 { OPTION_MASK_P8_VECTOR,
22025 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22026 : CODE_FOR_p8_vmrgow_v4sf_direct),
22027 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22028 { OPTION_MASK_P8_VECTOR,
22029 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22030 : CODE_FOR_p8_vmrgew_v4sf_direct),
22031 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22032 };
22033
22034 unsigned int i, j, elt, which;
22035 unsigned char perm[16];
22036 rtx x;
22037 bool one_vec;
22038
22039 /* Unpack the constant selector. */
22040 for (i = which = 0; i < 16; ++i)
22041 {
22042 elt = sel[i] & 31;
22043 which |= (elt < 16 ? 1 : 2);
22044 perm[i] = elt;
22045 }
22046
22047 /* Simplify the constant selector based on operands. */
22048 switch (which)
22049 {
22050 default:
22051 gcc_unreachable ();
22052
22053 case 3:
22054 one_vec = false;
22055 if (!rtx_equal_p (op0, op1))
22056 break;
22057 /* FALLTHRU */
22058
22059 case 2:
22060 for (i = 0; i < 16; ++i)
22061 perm[i] &= 15;
22062 op0 = op1;
22063 one_vec = true;
22064 break;
22065
22066 case 1:
22067 op1 = op0;
22068 one_vec = true;
22069 break;
22070 }
22071
22072 /* Look for splat patterns. */
22073 if (one_vec)
22074 {
22075 elt = perm[0];
22076
22077 for (i = 0; i < 16; ++i)
22078 if (perm[i] != elt)
22079 break;
22080 if (i == 16)
22081 {
22082 if (!BYTES_BIG_ENDIAN)
22083 elt = 15 - elt;
22084 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22085 return true;
22086 }
22087
22088 if (elt % 2 == 0)
22089 {
22090 for (i = 0; i < 16; i += 2)
22091 if (perm[i] != elt || perm[i + 1] != elt + 1)
22092 break;
22093 if (i == 16)
22094 {
22095 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22096 x = gen_reg_rtx (V8HImode);
22097 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22098 GEN_INT (field)));
22099 emit_move_insn (target, gen_lowpart (V16QImode, x));
22100 return true;
22101 }
22102 }
22103
22104 if (elt % 4 == 0)
22105 {
22106 for (i = 0; i < 16; i += 4)
22107 if (perm[i] != elt
22108 || perm[i + 1] != elt + 1
22109 || perm[i + 2] != elt + 2
22110 || perm[i + 3] != elt + 3)
22111 break;
22112 if (i == 16)
22113 {
22114 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22115 x = gen_reg_rtx (V4SImode);
22116 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22117 GEN_INT (field)));
22118 emit_move_insn (target, gen_lowpart (V16QImode, x));
22119 return true;
22120 }
22121 }
22122 }
22123
22124 /* Look for merge and pack patterns. */
22125 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22126 {
22127 bool swapped;
22128
22129 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22130 continue;
22131
22132 elt = patterns[j].perm[0];
22133 if (perm[0] == elt)
22134 swapped = false;
22135 else if (perm[0] == elt + 16)
22136 swapped = true;
22137 else
22138 continue;
22139 for (i = 1; i < 16; ++i)
22140 {
22141 elt = patterns[j].perm[i];
22142 if (swapped)
22143 elt = (elt >= 16 ? elt - 16 : elt + 16);
22144 else if (one_vec && elt >= 16)
22145 elt -= 16;
22146 if (perm[i] != elt)
22147 break;
22148 }
22149 if (i == 16)
22150 {
22151 enum insn_code icode = patterns[j].impl;
22152 machine_mode omode = insn_data[icode].operand[0].mode;
22153 machine_mode imode = insn_data[icode].operand[1].mode;
22154
22155 /* For little-endian, don't use vpkuwum and vpkuhum if the
22156 underlying vector type is not V4SI and V8HI, respectively.
22157 For example, using vpkuwum with a V8HI picks up the even
22158 halfwords (BE numbering) when the even halfwords (LE
22159 numbering) are what we need. */
22160 if (!BYTES_BIG_ENDIAN
22161 && icode == CODE_FOR_altivec_vpkuwum_direct
22162 && ((REG_P (op0)
22163 && GET_MODE (op0) != V4SImode)
22164 || (SUBREG_P (op0)
22165 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22166 continue;
22167 if (!BYTES_BIG_ENDIAN
22168 && icode == CODE_FOR_altivec_vpkuhum_direct
22169 && ((REG_P (op0)
22170 && GET_MODE (op0) != V8HImode)
22171 || (SUBREG_P (op0)
22172 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22173 continue;
22174
22175 /* For little-endian, the two input operands must be swapped
22176 (or swapped back) to ensure proper right-to-left numbering
22177 from 0 to 2N-1. */
22178 if (swapped ^ !BYTES_BIG_ENDIAN)
22179 std::swap (op0, op1);
22180 if (imode != V16QImode)
22181 {
22182 op0 = gen_lowpart (imode, op0);
22183 op1 = gen_lowpart (imode, op1);
22184 }
22185 if (omode == V16QImode)
22186 x = target;
22187 else
22188 x = gen_reg_rtx (omode);
22189 emit_insn (GEN_FCN (icode) (x, op0, op1));
22190 if (omode != V16QImode)
22191 emit_move_insn (target, gen_lowpart (V16QImode, x));
22192 return true;
22193 }
22194 }
22195
22196 if (!BYTES_BIG_ENDIAN)
22197 {
22198 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22199 return true;
22200 }
22201
22202 return false;
22203 }
22204
22205 /* Expand a VSX Permute Doubleword constant permutation.
22206 Return true if we match an efficient implementation. */
22207
22208 static bool
22209 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22210 unsigned char perm0, unsigned char perm1)
22211 {
22212 rtx x;
22213
22214 /* If both selectors come from the same operand, fold to single op. */
22215 if ((perm0 & 2) == (perm1 & 2))
22216 {
22217 if (perm0 & 2)
22218 op0 = op1;
22219 else
22220 op1 = op0;
22221 }
22222 /* If both operands are equal, fold to simpler permutation. */
22223 if (rtx_equal_p (op0, op1))
22224 {
22225 perm0 = perm0 & 1;
22226 perm1 = (perm1 & 1) + 2;
22227 }
22228 /* If the first selector comes from the second operand, swap. */
22229 else if (perm0 & 2)
22230 {
22231 if (perm1 & 2)
22232 return false;
22233 perm0 -= 2;
22234 perm1 += 2;
22235 std::swap (op0, op1);
22236 }
22237 /* If the second selector does not come from the second operand, fail. */
22238 else if ((perm1 & 2) == 0)
22239 return false;
22240
22241 /* Success! */
22242 if (target != NULL)
22243 {
22244 machine_mode vmode, dmode;
22245 rtvec v;
22246
22247 vmode = GET_MODE (target);
22248 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22249 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22250 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22251 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22252 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22253 emit_insn (gen_rtx_SET (target, x));
22254 }
22255 return true;
22256 }
22257
22258 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22259
22260 static bool
22261 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22262 rtx op1, const vec_perm_indices &sel)
22263 {
22264 bool testing_p = !target;
22265
22266 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22267 if (TARGET_ALTIVEC && testing_p)
22268 return true;
22269
22270 /* Check for ps_merge* or xxpermdi insns. */
22271 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22272 {
22273 if (testing_p)
22274 {
22275 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22276 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22277 }
22278 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22279 return true;
22280 }
22281
22282 if (TARGET_ALTIVEC)
22283 {
22284 /* Force the target-independent code to lower to V16QImode. */
22285 if (vmode != V16QImode)
22286 return false;
22287 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22288 return true;
22289 }
22290
22291 return false;
22292 }
22293
22294 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22295 OP0 and OP1 are the input vectors and TARGET is the output vector.
22296 PERM specifies the constant permutation vector. */
22297
22298 static void
22299 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22300 machine_mode vmode, const vec_perm_builder &perm)
22301 {
22302 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22303 if (x != target)
22304 emit_move_insn (target, x);
22305 }
22306
22307 /* Expand an extract even operation. */
22308
22309 void
22310 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22311 {
22312 machine_mode vmode = GET_MODE (target);
22313 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22314 vec_perm_builder perm (nelt, nelt, 1);
22315
22316 for (i = 0; i < nelt; i++)
22317 perm.quick_push (i * 2);
22318
22319 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22320 }
22321
22322 /* Expand a vector interleave operation. */
22323
22324 void
22325 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22326 {
22327 machine_mode vmode = GET_MODE (target);
22328 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22329 vec_perm_builder perm (nelt, nelt, 1);
22330
22331 high = (highp ? 0 : nelt / 2);
22332 for (i = 0; i < nelt / 2; i++)
22333 {
22334 perm.quick_push (i + high);
22335 perm.quick_push (i + nelt + high);
22336 }
22337
22338 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22339 }
22340
22341 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22342 void
22343 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22344 {
22345 HOST_WIDE_INT hwi_scale (scale);
22346 REAL_VALUE_TYPE r_pow;
22347 rtvec v = rtvec_alloc (2);
22348 rtx elt;
22349 rtx scale_vec = gen_reg_rtx (V2DFmode);
22350 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22351 elt = const_double_from_real_value (r_pow, DFmode);
22352 RTVEC_ELT (v, 0) = elt;
22353 RTVEC_ELT (v, 1) = elt;
22354 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22355 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22356 }
22357
22358 /* Return an RTX representing where to find the function value of a
22359 function returning MODE. */
22360 static rtx
22361 rs6000_complex_function_value (machine_mode mode)
22362 {
22363 unsigned int regno;
22364 rtx r1, r2;
22365 machine_mode inner = GET_MODE_INNER (mode);
22366 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22367
22368 if (TARGET_FLOAT128_TYPE
22369 && (mode == KCmode
22370 || (mode == TCmode && TARGET_IEEEQUAD)))
22371 regno = ALTIVEC_ARG_RETURN;
22372
22373 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22374 regno = FP_ARG_RETURN;
22375
22376 else
22377 {
22378 regno = GP_ARG_RETURN;
22379
22380 /* 32-bit is OK since it'll go in r3/r4. */
22381 if (TARGET_32BIT && inner_bytes >= 4)
22382 return gen_rtx_REG (mode, regno);
22383 }
22384
22385 if (inner_bytes >= 8)
22386 return gen_rtx_REG (mode, regno);
22387
22388 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22389 const0_rtx);
22390 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22391 GEN_INT (inner_bytes));
22392 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22393 }
22394
22395 /* Return an rtx describing a return value of MODE as a PARALLEL
22396 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22397 stride REG_STRIDE. */
22398
22399 static rtx
22400 rs6000_parallel_return (machine_mode mode,
22401 int n_elts, machine_mode elt_mode,
22402 unsigned int regno, unsigned int reg_stride)
22403 {
22404 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22405
22406 int i;
22407 for (i = 0; i < n_elts; i++)
22408 {
22409 rtx r = gen_rtx_REG (elt_mode, regno);
22410 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22411 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22412 regno += reg_stride;
22413 }
22414
22415 return par;
22416 }
22417
22418 /* Target hook for TARGET_FUNCTION_VALUE.
22419
22420 An integer value is in r3 and a floating-point value is in fp1,
22421 unless -msoft-float. */
22422
22423 static rtx
22424 rs6000_function_value (const_tree valtype,
22425 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22426 bool outgoing ATTRIBUTE_UNUSED)
22427 {
22428 machine_mode mode;
22429 unsigned int regno;
22430 machine_mode elt_mode;
22431 int n_elts;
22432
22433 /* Special handling for structs in darwin64. */
22434 if (TARGET_MACHO
22435 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22436 {
22437 CUMULATIVE_ARGS valcum;
22438 rtx valret;
22439
22440 valcum.words = 0;
22441 valcum.fregno = FP_ARG_MIN_REG;
22442 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22443 /* Do a trial code generation as if this were going to be passed as
22444 an argument; if any part goes in memory, we return NULL. */
22445 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22446 if (valret)
22447 return valret;
22448 /* Otherwise fall through to standard ABI rules. */
22449 }
22450
22451 mode = TYPE_MODE (valtype);
22452
22453 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22454 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22455 {
22456 int first_reg, n_regs;
22457
22458 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22459 {
22460 /* _Decimal128 must use even/odd register pairs. */
22461 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22462 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22463 }
22464 else
22465 {
22466 first_reg = ALTIVEC_ARG_RETURN;
22467 n_regs = 1;
22468 }
22469
22470 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22471 }
22472
22473 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22474 if (TARGET_32BIT && TARGET_POWERPC64)
22475 switch (mode)
22476 {
22477 default:
22478 break;
22479 case E_DImode:
22480 case E_SCmode:
22481 case E_DCmode:
22482 case E_TCmode:
22483 int count = GET_MODE_SIZE (mode) / 4;
22484 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22485 }
22486
22487 if ((INTEGRAL_TYPE_P (valtype)
22488 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22489 || POINTER_TYPE_P (valtype))
22490 mode = TARGET_32BIT ? SImode : DImode;
22491
22492 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22493 /* _Decimal128 must use an even/odd register pair. */
22494 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22495 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22496 && !FLOAT128_VECTOR_P (mode))
22497 regno = FP_ARG_RETURN;
22498 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22499 && targetm.calls.split_complex_arg)
22500 return rs6000_complex_function_value (mode);
22501 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22502 return register is used in both cases, and we won't see V2DImode/V2DFmode
22503 for pure altivec, combine the two cases. */
22504 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22505 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22506 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22507 regno = ALTIVEC_ARG_RETURN;
22508 else
22509 regno = GP_ARG_RETURN;
22510
22511 return gen_rtx_REG (mode, regno);
22512 }
22513
22514 /* Define how to find the value returned by a library function
22515 assuming the value has mode MODE. */
22516 rtx
22517 rs6000_libcall_value (machine_mode mode)
22518 {
22519 unsigned int regno;
22520
22521 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22522 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22523 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22524
22525 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22526 /* _Decimal128 must use an even/odd register pair. */
22527 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22528 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22529 regno = FP_ARG_RETURN;
22530 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22531 return register is used in both cases, and we won't see V2DImode/V2DFmode
22532 for pure altivec, combine the two cases. */
22533 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22534 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22535 regno = ALTIVEC_ARG_RETURN;
22536 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22537 return rs6000_complex_function_value (mode);
22538 else
22539 regno = GP_ARG_RETURN;
22540
22541 return gen_rtx_REG (mode, regno);
22542 }
22543
22544 /* Compute register pressure classes. We implement the target hook to avoid
22545 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22546 lead to incorrect estimates of number of available registers and therefor
22547 increased register pressure/spill. */
22548 static int
22549 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22550 {
22551 int n;
22552
22553 n = 0;
22554 pressure_classes[n++] = GENERAL_REGS;
22555 if (TARGET_VSX)
22556 pressure_classes[n++] = VSX_REGS;
22557 else
22558 {
22559 if (TARGET_ALTIVEC)
22560 pressure_classes[n++] = ALTIVEC_REGS;
22561 if (TARGET_HARD_FLOAT)
22562 pressure_classes[n++] = FLOAT_REGS;
22563 }
22564 pressure_classes[n++] = CR_REGS;
22565 pressure_classes[n++] = SPECIAL_REGS;
22566
22567 return n;
22568 }
22569
22570 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22571 Frame pointer elimination is automatically handled.
22572
22573 For the RS/6000, if frame pointer elimination is being done, we would like
22574 to convert ap into fp, not sp.
22575
22576 We need r30 if -mminimal-toc was specified, and there are constant pool
22577 references. */
22578
22579 static bool
22580 rs6000_can_eliminate (const int from, const int to)
22581 {
22582 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22583 ? ! frame_pointer_needed
22584 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22585 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22586 || constant_pool_empty_p ()
22587 : true);
22588 }
22589
22590 /* Define the offset between two registers, FROM to be eliminated and its
22591 replacement TO, at the start of a routine. */
22592 HOST_WIDE_INT
22593 rs6000_initial_elimination_offset (int from, int to)
22594 {
22595 rs6000_stack_t *info = rs6000_stack_info ();
22596 HOST_WIDE_INT offset;
22597
22598 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22599 offset = info->push_p ? 0 : -info->total_size;
22600 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22601 {
22602 offset = info->push_p ? 0 : -info->total_size;
22603 if (FRAME_GROWS_DOWNWARD)
22604 offset += info->fixed_size + info->vars_size + info->parm_size;
22605 }
22606 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22607 offset = FRAME_GROWS_DOWNWARD
22608 ? info->fixed_size + info->vars_size + info->parm_size
22609 : 0;
22610 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22611 offset = info->total_size;
22612 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22613 offset = info->push_p ? info->total_size : 0;
22614 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22615 offset = 0;
22616 else
22617 gcc_unreachable ();
22618
22619 return offset;
22620 }
22621
22622 /* Fill in sizes of registers used by unwinder. */
22623
22624 static void
22625 rs6000_init_dwarf_reg_sizes_extra (tree address)
22626 {
22627 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22628 {
22629 int i;
22630 machine_mode mode = TYPE_MODE (char_type_node);
22631 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22632 rtx mem = gen_rtx_MEM (BLKmode, addr);
22633 rtx value = gen_int_mode (16, mode);
22634
22635 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22636 The unwinder still needs to know the size of Altivec registers. */
22637
22638 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22639 {
22640 int column = DWARF_REG_TO_UNWIND_COLUMN
22641 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22642 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22643
22644 emit_move_insn (adjust_address (mem, mode, offset), value);
22645 }
22646 }
22647 }
22648
22649 /* Map internal gcc register numbers to debug format register numbers.
22650 FORMAT specifies the type of debug register number to use:
22651 0 -- debug information, except for frame-related sections
22652 1 -- DWARF .debug_frame section
22653 2 -- DWARF .eh_frame section */
22654
22655 unsigned int
22656 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22657 {
22658 /* On some platforms, we use the standard DWARF register
22659 numbering for .debug_info and .debug_frame. */
22660 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22661 {
22662 #ifdef RS6000_USE_DWARF_NUMBERING
22663 if (regno <= 31)
22664 return regno;
22665 if (FP_REGNO_P (regno))
22666 return regno - FIRST_FPR_REGNO + 32;
22667 if (ALTIVEC_REGNO_P (regno))
22668 return regno - FIRST_ALTIVEC_REGNO + 1124;
22669 if (regno == LR_REGNO)
22670 return 108;
22671 if (regno == CTR_REGNO)
22672 return 109;
22673 if (regno == CA_REGNO)
22674 return 101; /* XER */
22675 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22676 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22677 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22678 to the DWARF reg for CR. */
22679 if (format == 1 && regno == CR2_REGNO)
22680 return 64;
22681 if (CR_REGNO_P (regno))
22682 return regno - CR0_REGNO + 86;
22683 if (regno == VRSAVE_REGNO)
22684 return 356;
22685 if (regno == VSCR_REGNO)
22686 return 67;
22687
22688 /* These do not make much sense. */
22689 if (regno == FRAME_POINTER_REGNUM)
22690 return 111;
22691 if (regno == ARG_POINTER_REGNUM)
22692 return 67;
22693 if (regno == 64)
22694 return 100;
22695
22696 gcc_unreachable ();
22697 #endif
22698 }
22699
22700 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22701 information, and also for .eh_frame. */
22702 /* Translate the regnos to their numbers in GCC 7 (and before). */
22703 if (regno <= 31)
22704 return regno;
22705 if (FP_REGNO_P (regno))
22706 return regno - FIRST_FPR_REGNO + 32;
22707 if (ALTIVEC_REGNO_P (regno))
22708 return regno - FIRST_ALTIVEC_REGNO + 77;
22709 if (regno == LR_REGNO)
22710 return 65;
22711 if (regno == CTR_REGNO)
22712 return 66;
22713 if (regno == CA_REGNO)
22714 return 76; /* XER */
22715 if (CR_REGNO_P (regno))
22716 return regno - CR0_REGNO + 68;
22717 if (regno == VRSAVE_REGNO)
22718 return 109;
22719 if (regno == VSCR_REGNO)
22720 return 110;
22721
22722 if (regno == FRAME_POINTER_REGNUM)
22723 return 111;
22724 if (regno == ARG_POINTER_REGNUM)
22725 return 67;
22726 if (regno == 64)
22727 return 64;
22728
22729 gcc_unreachable ();
22730 }
22731
22732 /* target hook eh_return_filter_mode */
22733 static scalar_int_mode
22734 rs6000_eh_return_filter_mode (void)
22735 {
22736 return TARGET_32BIT ? SImode : word_mode;
22737 }
22738
22739 /* Target hook for translate_mode_attribute. */
22740 static machine_mode
22741 rs6000_translate_mode_attribute (machine_mode mode)
22742 {
22743 if ((FLOAT128_IEEE_P (mode)
22744 && ieee128_float_type_node == long_double_type_node)
22745 || (FLOAT128_IBM_P (mode)
22746 && ibm128_float_type_node == long_double_type_node))
22747 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22748 return mode;
22749 }
22750
22751 /* Target hook for scalar_mode_supported_p. */
22752 static bool
22753 rs6000_scalar_mode_supported_p (scalar_mode mode)
22754 {
22755 /* -m32 does not support TImode. This is the default, from
22756 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22757 same ABI as for -m32. But default_scalar_mode_supported_p allows
22758 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22759 for -mpowerpc64. */
22760 if (TARGET_32BIT && mode == TImode)
22761 return false;
22762
22763 if (DECIMAL_FLOAT_MODE_P (mode))
22764 return default_decimal_float_supported_p ();
22765 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22766 return true;
22767 else
22768 return default_scalar_mode_supported_p (mode);
22769 }
22770
22771 /* Target hook for vector_mode_supported_p. */
22772 static bool
22773 rs6000_vector_mode_supported_p (machine_mode mode)
22774 {
22775 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22776 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22777 double-double. */
22778 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22779 return true;
22780
22781 else
22782 return false;
22783 }
22784
22785 /* Target hook for floatn_mode. */
22786 static opt_scalar_float_mode
22787 rs6000_floatn_mode (int n, bool extended)
22788 {
22789 if (extended)
22790 {
22791 switch (n)
22792 {
22793 case 32:
22794 return DFmode;
22795
22796 case 64:
22797 if (TARGET_FLOAT128_TYPE)
22798 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22799 else
22800 return opt_scalar_float_mode ();
22801
22802 case 128:
22803 return opt_scalar_float_mode ();
22804
22805 default:
22806 /* Those are the only valid _FloatNx types. */
22807 gcc_unreachable ();
22808 }
22809 }
22810 else
22811 {
22812 switch (n)
22813 {
22814 case 32:
22815 return SFmode;
22816
22817 case 64:
22818 return DFmode;
22819
22820 case 128:
22821 if (TARGET_FLOAT128_TYPE)
22822 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22823 else
22824 return opt_scalar_float_mode ();
22825
22826 default:
22827 return opt_scalar_float_mode ();
22828 }
22829 }
22830
22831 }
22832
22833 /* Target hook for c_mode_for_suffix. */
22834 static machine_mode
22835 rs6000_c_mode_for_suffix (char suffix)
22836 {
22837 if (TARGET_FLOAT128_TYPE)
22838 {
22839 if (suffix == 'q' || suffix == 'Q')
22840 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22841
22842 /* At the moment, we are not defining a suffix for IBM extended double.
22843 If/when the default for -mabi=ieeelongdouble is changed, and we want
22844 to support __ibm128 constants in legacy library code, we may need to
22845 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22846 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22847 __float80 constants. */
22848 }
22849
22850 return VOIDmode;
22851 }
22852
22853 /* Target hook for invalid_arg_for_unprototyped_fn. */
22854 static const char *
22855 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22856 {
22857 return (!rs6000_darwin64_abi
22858 && typelist == 0
22859 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22860 && (funcdecl == NULL_TREE
22861 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22862 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22863 ? N_("AltiVec argument passed to unprototyped function")
22864 : NULL;
22865 }
22866
22867 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22868 setup by using __stack_chk_fail_local hidden function instead of
22869 calling __stack_chk_fail directly. Otherwise it is better to call
22870 __stack_chk_fail directly. */
22871
22872 static tree ATTRIBUTE_UNUSED
22873 rs6000_stack_protect_fail (void)
22874 {
22875 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22876 ? default_hidden_stack_protect_fail ()
22877 : default_external_stack_protect_fail ();
22878 }
22879
22880 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22881
22882 #if TARGET_ELF
22883 static unsigned HOST_WIDE_INT
22884 rs6000_asan_shadow_offset (void)
22885 {
22886 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22887 }
22888 #endif
22889 \f
22890 /* Mask options that we want to support inside of attribute((target)) and
22891 #pragma GCC target operations. Note, we do not include things like
22892 64/32-bit, endianness, hard/soft floating point, etc. that would have
22893 different calling sequences. */
22894
22895 struct rs6000_opt_mask {
22896 const char *name; /* option name */
22897 HOST_WIDE_INT mask; /* mask to set */
22898 bool invert; /* invert sense of mask */
22899 bool valid_target; /* option is a target option */
22900 };
22901
22902 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22903 {
22904 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22905 { "cmpb", OPTION_MASK_CMPB, false, true },
22906 { "crypto", OPTION_MASK_CRYPTO, false, true },
22907 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22908 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22909 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22910 false, true },
22911 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22912 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22913 { "fprnd", OPTION_MASK_FPRND, false, true },
22914 { "future", OPTION_MASK_FUTURE, false, true },
22915 { "hard-dfp", OPTION_MASK_DFP, false, true },
22916 { "htm", OPTION_MASK_HTM, false, true },
22917 { "isel", OPTION_MASK_ISEL, false, true },
22918 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22919 { "mfpgpr", 0, false, true },
22920 { "modulo", OPTION_MASK_MODULO, false, true },
22921 { "mulhw", OPTION_MASK_MULHW, false, true },
22922 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22923 { "pcrel", OPTION_MASK_PCREL, false, true },
22924 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22925 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22926 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22927 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22928 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22929 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22930 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22931 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22932 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22933 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22934 { "prefixed", OPTION_MASK_PREFIXED, false, true },
22935 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22936 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22937 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22938 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22939 { "string", 0, false, true },
22940 { "update", OPTION_MASK_NO_UPDATE, true , true },
22941 { "vsx", OPTION_MASK_VSX, false, true },
22942 #ifdef OPTION_MASK_64BIT
22943 #if TARGET_AIX_OS
22944 { "aix64", OPTION_MASK_64BIT, false, false },
22945 { "aix32", OPTION_MASK_64BIT, true, false },
22946 #else
22947 { "64", OPTION_MASK_64BIT, false, false },
22948 { "32", OPTION_MASK_64BIT, true, false },
22949 #endif
22950 #endif
22951 #ifdef OPTION_MASK_EABI
22952 { "eabi", OPTION_MASK_EABI, false, false },
22953 #endif
22954 #ifdef OPTION_MASK_LITTLE_ENDIAN
22955 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22956 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22957 #endif
22958 #ifdef OPTION_MASK_RELOCATABLE
22959 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22960 #endif
22961 #ifdef OPTION_MASK_STRICT_ALIGN
22962 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22963 #endif
22964 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22965 { "string", 0, false, false },
22966 };
22967
22968 /* Builtin mask mapping for printing the flags. */
22969 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22970 {
22971 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22972 { "vsx", RS6000_BTM_VSX, false, false },
22973 { "fre", RS6000_BTM_FRE, false, false },
22974 { "fres", RS6000_BTM_FRES, false, false },
22975 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22976 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22977 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22978 { "cell", RS6000_BTM_CELL, false, false },
22979 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22980 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22981 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22982 { "crypto", RS6000_BTM_CRYPTO, false, false },
22983 { "htm", RS6000_BTM_HTM, false, false },
22984 { "hard-dfp", RS6000_BTM_DFP, false, false },
22985 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22986 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22987 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22988 { "float128", RS6000_BTM_FLOAT128, false, false },
22989 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22990 };
22991
22992 /* Option variables that we want to support inside attribute((target)) and
22993 #pragma GCC target operations. */
22994
22995 struct rs6000_opt_var {
22996 const char *name; /* option name */
22997 size_t global_offset; /* offset of the option in global_options. */
22998 size_t target_offset; /* offset of the option in target options. */
22999 };
23000
23001 static struct rs6000_opt_var const rs6000_opt_vars[] =
23002 {
23003 { "friz",
23004 offsetof (struct gcc_options, x_TARGET_FRIZ),
23005 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23006 { "avoid-indexed-addresses",
23007 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23008 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23009 { "longcall",
23010 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23011 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23012 { "optimize-swaps",
23013 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23014 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23015 { "allow-movmisalign",
23016 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23017 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23018 { "sched-groups",
23019 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23020 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23021 { "always-hint",
23022 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23023 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23024 { "align-branch-targets",
23025 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23026 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23027 { "sched-prolog",
23028 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23029 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23030 { "sched-epilog",
23031 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23032 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23033 { "speculate-indirect-jumps",
23034 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23035 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23036 };
23037
23038 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23039 parsing. Return true if there were no errors. */
23040
23041 static bool
23042 rs6000_inner_target_options (tree args, bool attr_p)
23043 {
23044 bool ret = true;
23045
23046 if (args == NULL_TREE)
23047 ;
23048
23049 else if (TREE_CODE (args) == STRING_CST)
23050 {
23051 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23052 char *q;
23053
23054 while ((q = strtok (p, ",")) != NULL)
23055 {
23056 bool error_p = false;
23057 bool not_valid_p = false;
23058 const char *cpu_opt = NULL;
23059
23060 p = NULL;
23061 if (strncmp (q, "cpu=", 4) == 0)
23062 {
23063 int cpu_index = rs6000_cpu_name_lookup (q+4);
23064 if (cpu_index >= 0)
23065 rs6000_cpu_index = cpu_index;
23066 else
23067 {
23068 error_p = true;
23069 cpu_opt = q+4;
23070 }
23071 }
23072 else if (strncmp (q, "tune=", 5) == 0)
23073 {
23074 int tune_index = rs6000_cpu_name_lookup (q+5);
23075 if (tune_index >= 0)
23076 rs6000_tune_index = tune_index;
23077 else
23078 {
23079 error_p = true;
23080 cpu_opt = q+5;
23081 }
23082 }
23083 else
23084 {
23085 size_t i;
23086 bool invert = false;
23087 char *r = q;
23088
23089 error_p = true;
23090 if (strncmp (r, "no-", 3) == 0)
23091 {
23092 invert = true;
23093 r += 3;
23094 }
23095
23096 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23097 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23098 {
23099 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23100
23101 if (!rs6000_opt_masks[i].valid_target)
23102 not_valid_p = true;
23103 else
23104 {
23105 error_p = false;
23106 rs6000_isa_flags_explicit |= mask;
23107
23108 /* VSX needs altivec, so -mvsx automagically sets
23109 altivec and disables -mavoid-indexed-addresses. */
23110 if (!invert)
23111 {
23112 if (mask == OPTION_MASK_VSX)
23113 {
23114 mask |= OPTION_MASK_ALTIVEC;
23115 TARGET_AVOID_XFORM = 0;
23116 }
23117 }
23118
23119 if (rs6000_opt_masks[i].invert)
23120 invert = !invert;
23121
23122 if (invert)
23123 rs6000_isa_flags &= ~mask;
23124 else
23125 rs6000_isa_flags |= mask;
23126 }
23127 break;
23128 }
23129
23130 if (error_p && !not_valid_p)
23131 {
23132 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23133 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23134 {
23135 size_t j = rs6000_opt_vars[i].global_offset;
23136 *((int *) ((char *)&global_options + j)) = !invert;
23137 error_p = false;
23138 not_valid_p = false;
23139 break;
23140 }
23141 }
23142 }
23143
23144 if (error_p)
23145 {
23146 const char *eprefix, *esuffix;
23147
23148 ret = false;
23149 if (attr_p)
23150 {
23151 eprefix = "__attribute__((__target__(";
23152 esuffix = ")))";
23153 }
23154 else
23155 {
23156 eprefix = "#pragma GCC target ";
23157 esuffix = "";
23158 }
23159
23160 if (cpu_opt)
23161 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23162 q, esuffix);
23163 else if (not_valid_p)
23164 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23165 else
23166 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23167 }
23168 }
23169 }
23170
23171 else if (TREE_CODE (args) == TREE_LIST)
23172 {
23173 do
23174 {
23175 tree value = TREE_VALUE (args);
23176 if (value)
23177 {
23178 bool ret2 = rs6000_inner_target_options (value, attr_p);
23179 if (!ret2)
23180 ret = false;
23181 }
23182 args = TREE_CHAIN (args);
23183 }
23184 while (args != NULL_TREE);
23185 }
23186
23187 else
23188 {
23189 error ("attribute %<target%> argument not a string");
23190 return false;
23191 }
23192
23193 return ret;
23194 }
23195
23196 /* Print out the target options as a list for -mdebug=target. */
23197
23198 static void
23199 rs6000_debug_target_options (tree args, const char *prefix)
23200 {
23201 if (args == NULL_TREE)
23202 fprintf (stderr, "%s<NULL>", prefix);
23203
23204 else if (TREE_CODE (args) == STRING_CST)
23205 {
23206 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23207 char *q;
23208
23209 while ((q = strtok (p, ",")) != NULL)
23210 {
23211 p = NULL;
23212 fprintf (stderr, "%s\"%s\"", prefix, q);
23213 prefix = ", ";
23214 }
23215 }
23216
23217 else if (TREE_CODE (args) == TREE_LIST)
23218 {
23219 do
23220 {
23221 tree value = TREE_VALUE (args);
23222 if (value)
23223 {
23224 rs6000_debug_target_options (value, prefix);
23225 prefix = ", ";
23226 }
23227 args = TREE_CHAIN (args);
23228 }
23229 while (args != NULL_TREE);
23230 }
23231
23232 else
23233 gcc_unreachable ();
23234
23235 return;
23236 }
23237
23238 \f
23239 /* Hook to validate attribute((target("..."))). */
23240
23241 static bool
23242 rs6000_valid_attribute_p (tree fndecl,
23243 tree ARG_UNUSED (name),
23244 tree args,
23245 int flags)
23246 {
23247 struct cl_target_option cur_target;
23248 bool ret;
23249 tree old_optimize;
23250 tree new_target, new_optimize;
23251 tree func_optimize;
23252
23253 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23254
23255 if (TARGET_DEBUG_TARGET)
23256 {
23257 tree tname = DECL_NAME (fndecl);
23258 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23259 if (tname)
23260 fprintf (stderr, "function: %.*s\n",
23261 (int) IDENTIFIER_LENGTH (tname),
23262 IDENTIFIER_POINTER (tname));
23263 else
23264 fprintf (stderr, "function: unknown\n");
23265
23266 fprintf (stderr, "args:");
23267 rs6000_debug_target_options (args, " ");
23268 fprintf (stderr, "\n");
23269
23270 if (flags)
23271 fprintf (stderr, "flags: 0x%x\n", flags);
23272
23273 fprintf (stderr, "--------------------\n");
23274 }
23275
23276 /* attribute((target("default"))) does nothing, beyond
23277 affecting multi-versioning. */
23278 if (TREE_VALUE (args)
23279 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23280 && TREE_CHAIN (args) == NULL_TREE
23281 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23282 return true;
23283
23284 old_optimize = build_optimization_node (&global_options);
23285 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23286
23287 /* If the function changed the optimization levels as well as setting target
23288 options, start with the optimizations specified. */
23289 if (func_optimize && func_optimize != old_optimize)
23290 cl_optimization_restore (&global_options,
23291 TREE_OPTIMIZATION (func_optimize));
23292
23293 /* The target attributes may also change some optimization flags, so update
23294 the optimization options if necessary. */
23295 cl_target_option_save (&cur_target, &global_options);
23296 rs6000_cpu_index = rs6000_tune_index = -1;
23297 ret = rs6000_inner_target_options (args, true);
23298
23299 /* Set up any additional state. */
23300 if (ret)
23301 {
23302 ret = rs6000_option_override_internal (false);
23303 new_target = build_target_option_node (&global_options);
23304 }
23305 else
23306 new_target = NULL;
23307
23308 new_optimize = build_optimization_node (&global_options);
23309
23310 if (!new_target)
23311 ret = false;
23312
23313 else if (fndecl)
23314 {
23315 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23316
23317 if (old_optimize != new_optimize)
23318 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23319 }
23320
23321 cl_target_option_restore (&global_options, &cur_target);
23322
23323 if (old_optimize != new_optimize)
23324 cl_optimization_restore (&global_options,
23325 TREE_OPTIMIZATION (old_optimize));
23326
23327 return ret;
23328 }
23329
23330 \f
23331 /* Hook to validate the current #pragma GCC target and set the state, and
23332 update the macros based on what was changed. If ARGS is NULL, then
23333 POP_TARGET is used to reset the options. */
23334
23335 bool
23336 rs6000_pragma_target_parse (tree args, tree pop_target)
23337 {
23338 tree prev_tree = build_target_option_node (&global_options);
23339 tree cur_tree;
23340 struct cl_target_option *prev_opt, *cur_opt;
23341 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23342 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23343
23344 if (TARGET_DEBUG_TARGET)
23345 {
23346 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23347 fprintf (stderr, "args:");
23348 rs6000_debug_target_options (args, " ");
23349 fprintf (stderr, "\n");
23350
23351 if (pop_target)
23352 {
23353 fprintf (stderr, "pop_target:\n");
23354 debug_tree (pop_target);
23355 }
23356 else
23357 fprintf (stderr, "pop_target: <NULL>\n");
23358
23359 fprintf (stderr, "--------------------\n");
23360 }
23361
23362 if (! args)
23363 {
23364 cur_tree = ((pop_target)
23365 ? pop_target
23366 : target_option_default_node);
23367 cl_target_option_restore (&global_options,
23368 TREE_TARGET_OPTION (cur_tree));
23369 }
23370 else
23371 {
23372 rs6000_cpu_index = rs6000_tune_index = -1;
23373 if (!rs6000_inner_target_options (args, false)
23374 || !rs6000_option_override_internal (false)
23375 || (cur_tree = build_target_option_node (&global_options))
23376 == NULL_TREE)
23377 {
23378 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23379 fprintf (stderr, "invalid pragma\n");
23380
23381 return false;
23382 }
23383 }
23384
23385 target_option_current_node = cur_tree;
23386 rs6000_activate_target_options (target_option_current_node);
23387
23388 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23389 change the macros that are defined. */
23390 if (rs6000_target_modify_macros_ptr)
23391 {
23392 prev_opt = TREE_TARGET_OPTION (prev_tree);
23393 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23394 prev_flags = prev_opt->x_rs6000_isa_flags;
23395
23396 cur_opt = TREE_TARGET_OPTION (cur_tree);
23397 cur_flags = cur_opt->x_rs6000_isa_flags;
23398 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23399
23400 diff_bumask = (prev_bumask ^ cur_bumask);
23401 diff_flags = (prev_flags ^ cur_flags);
23402
23403 if ((diff_flags != 0) || (diff_bumask != 0))
23404 {
23405 /* Delete old macros. */
23406 rs6000_target_modify_macros_ptr (false,
23407 prev_flags & diff_flags,
23408 prev_bumask & diff_bumask);
23409
23410 /* Define new macros. */
23411 rs6000_target_modify_macros_ptr (true,
23412 cur_flags & diff_flags,
23413 cur_bumask & diff_bumask);
23414 }
23415 }
23416
23417 return true;
23418 }
23419
23420 \f
23421 /* Remember the last target of rs6000_set_current_function. */
23422 static GTY(()) tree rs6000_previous_fndecl;
23423
23424 /* Restore target's globals from NEW_TREE and invalidate the
23425 rs6000_previous_fndecl cache. */
23426
23427 void
23428 rs6000_activate_target_options (tree new_tree)
23429 {
23430 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23431 if (TREE_TARGET_GLOBALS (new_tree))
23432 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23433 else if (new_tree == target_option_default_node)
23434 restore_target_globals (&default_target_globals);
23435 else
23436 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23437 rs6000_previous_fndecl = NULL_TREE;
23438 }
23439
23440 /* Establish appropriate back-end context for processing the function
23441 FNDECL. The argument might be NULL to indicate processing at top
23442 level, outside of any function scope. */
23443 static void
23444 rs6000_set_current_function (tree fndecl)
23445 {
23446 if (TARGET_DEBUG_TARGET)
23447 {
23448 fprintf (stderr, "\n==================== rs6000_set_current_function");
23449
23450 if (fndecl)
23451 fprintf (stderr, ", fndecl %s (%p)",
23452 (DECL_NAME (fndecl)
23453 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23454 : "<unknown>"), (void *)fndecl);
23455
23456 if (rs6000_previous_fndecl)
23457 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23458
23459 fprintf (stderr, "\n");
23460 }
23461
23462 /* Only change the context if the function changes. This hook is called
23463 several times in the course of compiling a function, and we don't want to
23464 slow things down too much or call target_reinit when it isn't safe. */
23465 if (fndecl == rs6000_previous_fndecl)
23466 return;
23467
23468 tree old_tree;
23469 if (rs6000_previous_fndecl == NULL_TREE)
23470 old_tree = target_option_current_node;
23471 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23472 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23473 else
23474 old_tree = target_option_default_node;
23475
23476 tree new_tree;
23477 if (fndecl == NULL_TREE)
23478 {
23479 if (old_tree != target_option_current_node)
23480 new_tree = target_option_current_node;
23481 else
23482 new_tree = NULL_TREE;
23483 }
23484 else
23485 {
23486 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23487 if (new_tree == NULL_TREE)
23488 new_tree = target_option_default_node;
23489 }
23490
23491 if (TARGET_DEBUG_TARGET)
23492 {
23493 if (new_tree)
23494 {
23495 fprintf (stderr, "\nnew fndecl target specific options:\n");
23496 debug_tree (new_tree);
23497 }
23498
23499 if (old_tree)
23500 {
23501 fprintf (stderr, "\nold fndecl target specific options:\n");
23502 debug_tree (old_tree);
23503 }
23504
23505 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23506 fprintf (stderr, "--------------------\n");
23507 }
23508
23509 if (new_tree && old_tree != new_tree)
23510 rs6000_activate_target_options (new_tree);
23511
23512 if (fndecl)
23513 rs6000_previous_fndecl = fndecl;
23514 }
23515
23516 \f
23517 /* Save the current options */
23518
23519 static void
23520 rs6000_function_specific_save (struct cl_target_option *ptr,
23521 struct gcc_options *opts)
23522 {
23523 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23524 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23525 }
23526
23527 /* Restore the current options */
23528
23529 static void
23530 rs6000_function_specific_restore (struct gcc_options *opts,
23531 struct cl_target_option *ptr)
23532
23533 {
23534 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23535 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23536 (void) rs6000_option_override_internal (false);
23537 }
23538
23539 /* Print the current options */
23540
23541 static void
23542 rs6000_function_specific_print (FILE *file, int indent,
23543 struct cl_target_option *ptr)
23544 {
23545 rs6000_print_isa_options (file, indent, "Isa options set",
23546 ptr->x_rs6000_isa_flags);
23547
23548 rs6000_print_isa_options (file, indent, "Isa options explicit",
23549 ptr->x_rs6000_isa_flags_explicit);
23550 }
23551
23552 /* Helper function to print the current isa or misc options on a line. */
23553
23554 static void
23555 rs6000_print_options_internal (FILE *file,
23556 int indent,
23557 const char *string,
23558 HOST_WIDE_INT flags,
23559 const char *prefix,
23560 const struct rs6000_opt_mask *opts,
23561 size_t num_elements)
23562 {
23563 size_t i;
23564 size_t start_column = 0;
23565 size_t cur_column;
23566 size_t max_column = 120;
23567 size_t prefix_len = strlen (prefix);
23568 size_t comma_len = 0;
23569 const char *comma = "";
23570
23571 if (indent)
23572 start_column += fprintf (file, "%*s", indent, "");
23573
23574 if (!flags)
23575 {
23576 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23577 return;
23578 }
23579
23580 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23581
23582 /* Print the various mask options. */
23583 cur_column = start_column;
23584 for (i = 0; i < num_elements; i++)
23585 {
23586 bool invert = opts[i].invert;
23587 const char *name = opts[i].name;
23588 const char *no_str = "";
23589 HOST_WIDE_INT mask = opts[i].mask;
23590 size_t len = comma_len + prefix_len + strlen (name);
23591
23592 if (!invert)
23593 {
23594 if ((flags & mask) == 0)
23595 {
23596 no_str = "no-";
23597 len += strlen ("no-");
23598 }
23599
23600 flags &= ~mask;
23601 }
23602
23603 else
23604 {
23605 if ((flags & mask) != 0)
23606 {
23607 no_str = "no-";
23608 len += strlen ("no-");
23609 }
23610
23611 flags |= mask;
23612 }
23613
23614 cur_column += len;
23615 if (cur_column > max_column)
23616 {
23617 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23618 cur_column = start_column + len;
23619 comma = "";
23620 }
23621
23622 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23623 comma = ", ";
23624 comma_len = strlen (", ");
23625 }
23626
23627 fputs ("\n", file);
23628 }
23629
23630 /* Helper function to print the current isa options on a line. */
23631
23632 static void
23633 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23634 HOST_WIDE_INT flags)
23635 {
23636 rs6000_print_options_internal (file, indent, string, flags, "-m",
23637 &rs6000_opt_masks[0],
23638 ARRAY_SIZE (rs6000_opt_masks));
23639 }
23640
23641 static void
23642 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23643 HOST_WIDE_INT flags)
23644 {
23645 rs6000_print_options_internal (file, indent, string, flags, "",
23646 &rs6000_builtin_mask_names[0],
23647 ARRAY_SIZE (rs6000_builtin_mask_names));
23648 }
23649
23650 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23651 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23652 -mupper-regs-df, etc.).
23653
23654 If the user used -mno-power8-vector, we need to turn off all of the implicit
23655 ISA 2.07 and 3.0 options that relate to the vector unit.
23656
23657 If the user used -mno-power9-vector, we need to turn off all of the implicit
23658 ISA 3.0 options that relate to the vector unit.
23659
23660 This function does not handle explicit options such as the user specifying
23661 -mdirect-move. These are handled in rs6000_option_override_internal, and
23662 the appropriate error is given if needed.
23663
23664 We return a mask of all of the implicit options that should not be enabled
23665 by default. */
23666
23667 static HOST_WIDE_INT
23668 rs6000_disable_incompatible_switches (void)
23669 {
23670 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23671 size_t i, j;
23672
23673 static const struct {
23674 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23675 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23676 const char *const name; /* name of the switch. */
23677 } flags[] = {
23678 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23679 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23680 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23681 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23682 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
23683 };
23684
23685 for (i = 0; i < ARRAY_SIZE (flags); i++)
23686 {
23687 HOST_WIDE_INT no_flag = flags[i].no_flag;
23688
23689 if ((rs6000_isa_flags & no_flag) == 0
23690 && (rs6000_isa_flags_explicit & no_flag) != 0)
23691 {
23692 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23693 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23694 & rs6000_isa_flags
23695 & dep_flags);
23696
23697 if (set_flags)
23698 {
23699 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23700 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23701 {
23702 set_flags &= ~rs6000_opt_masks[j].mask;
23703 error ("%<-mno-%s%> turns off %<-m%s%>",
23704 flags[i].name,
23705 rs6000_opt_masks[j].name);
23706 }
23707
23708 gcc_assert (!set_flags);
23709 }
23710
23711 rs6000_isa_flags &= ~dep_flags;
23712 ignore_masks |= no_flag | dep_flags;
23713 }
23714 }
23715
23716 return ignore_masks;
23717 }
23718
23719 \f
23720 /* Helper function for printing the function name when debugging. */
23721
23722 static const char *
23723 get_decl_name (tree fn)
23724 {
23725 tree name;
23726
23727 if (!fn)
23728 return "<null>";
23729
23730 name = DECL_NAME (fn);
23731 if (!name)
23732 return "<no-name>";
23733
23734 return IDENTIFIER_POINTER (name);
23735 }
23736
23737 /* Return the clone id of the target we are compiling code for in a target
23738 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23739 the priority list for the target clones (ordered from lowest to
23740 highest). */
23741
23742 static int
23743 rs6000_clone_priority (tree fndecl)
23744 {
23745 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23746 HOST_WIDE_INT isa_masks;
23747 int ret = CLONE_DEFAULT;
23748 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23749 const char *attrs_str = NULL;
23750
23751 attrs = TREE_VALUE (TREE_VALUE (attrs));
23752 attrs_str = TREE_STRING_POINTER (attrs);
23753
23754 /* Return priority zero for default function. Return the ISA needed for the
23755 function if it is not the default. */
23756 if (strcmp (attrs_str, "default") != 0)
23757 {
23758 if (fn_opts == NULL_TREE)
23759 fn_opts = target_option_default_node;
23760
23761 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23762 isa_masks = rs6000_isa_flags;
23763 else
23764 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23765
23766 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23767 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23768 break;
23769 }
23770
23771 if (TARGET_DEBUG_TARGET)
23772 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23773 get_decl_name (fndecl), ret);
23774
23775 return ret;
23776 }
23777
23778 /* This compares the priority of target features in function DECL1 and DECL2.
23779 It returns positive value if DECL1 is higher priority, negative value if
23780 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23781 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23782
23783 static int
23784 rs6000_compare_version_priority (tree decl1, tree decl2)
23785 {
23786 int priority1 = rs6000_clone_priority (decl1);
23787 int priority2 = rs6000_clone_priority (decl2);
23788 int ret = priority1 - priority2;
23789
23790 if (TARGET_DEBUG_TARGET)
23791 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23792 get_decl_name (decl1), get_decl_name (decl2), ret);
23793
23794 return ret;
23795 }
23796
23797 /* Make a dispatcher declaration for the multi-versioned function DECL.
23798 Calls to DECL function will be replaced with calls to the dispatcher
23799 by the front-end. Returns the decl of the dispatcher function. */
23800
23801 static tree
23802 rs6000_get_function_versions_dispatcher (void *decl)
23803 {
23804 tree fn = (tree) decl;
23805 struct cgraph_node *node = NULL;
23806 struct cgraph_node *default_node = NULL;
23807 struct cgraph_function_version_info *node_v = NULL;
23808 struct cgraph_function_version_info *first_v = NULL;
23809
23810 tree dispatch_decl = NULL;
23811
23812 struct cgraph_function_version_info *default_version_info = NULL;
23813 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23814
23815 if (TARGET_DEBUG_TARGET)
23816 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23817 get_decl_name (fn));
23818
23819 node = cgraph_node::get (fn);
23820 gcc_assert (node != NULL);
23821
23822 node_v = node->function_version ();
23823 gcc_assert (node_v != NULL);
23824
23825 if (node_v->dispatcher_resolver != NULL)
23826 return node_v->dispatcher_resolver;
23827
23828 /* Find the default version and make it the first node. */
23829 first_v = node_v;
23830 /* Go to the beginning of the chain. */
23831 while (first_v->prev != NULL)
23832 first_v = first_v->prev;
23833
23834 default_version_info = first_v;
23835 while (default_version_info != NULL)
23836 {
23837 const tree decl2 = default_version_info->this_node->decl;
23838 if (is_function_default_version (decl2))
23839 break;
23840 default_version_info = default_version_info->next;
23841 }
23842
23843 /* If there is no default node, just return NULL. */
23844 if (default_version_info == NULL)
23845 return NULL;
23846
23847 /* Make default info the first node. */
23848 if (first_v != default_version_info)
23849 {
23850 default_version_info->prev->next = default_version_info->next;
23851 if (default_version_info->next)
23852 default_version_info->next->prev = default_version_info->prev;
23853 first_v->prev = default_version_info;
23854 default_version_info->next = first_v;
23855 default_version_info->prev = NULL;
23856 }
23857
23858 default_node = default_version_info->this_node;
23859
23860 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23861 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23862 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23863 "exports hardware capability bits");
23864 #else
23865
23866 if (targetm.has_ifunc_p ())
23867 {
23868 struct cgraph_function_version_info *it_v = NULL;
23869 struct cgraph_node *dispatcher_node = NULL;
23870 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23871
23872 /* Right now, the dispatching is done via ifunc. */
23873 dispatch_decl = make_dispatcher_decl (default_node->decl);
23874
23875 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23876 gcc_assert (dispatcher_node != NULL);
23877 dispatcher_node->dispatcher_function = 1;
23878 dispatcher_version_info
23879 = dispatcher_node->insert_new_function_version ();
23880 dispatcher_version_info->next = default_version_info;
23881 dispatcher_node->definition = 1;
23882
23883 /* Set the dispatcher for all the versions. */
23884 it_v = default_version_info;
23885 while (it_v != NULL)
23886 {
23887 it_v->dispatcher_resolver = dispatch_decl;
23888 it_v = it_v->next;
23889 }
23890 }
23891 else
23892 {
23893 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23894 "multiversioning needs ifunc which is not supported "
23895 "on this target");
23896 }
23897 #endif
23898
23899 return dispatch_decl;
23900 }
23901
23902 /* Make the resolver function decl to dispatch the versions of a multi-
23903 versioned function, DEFAULT_DECL. Create an empty basic block in the
23904 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23905 function. */
23906
23907 static tree
23908 make_resolver_func (const tree default_decl,
23909 const tree dispatch_decl,
23910 basic_block *empty_bb)
23911 {
23912 /* Make the resolver function static. The resolver function returns
23913 void *. */
23914 tree decl_name = clone_function_name (default_decl, "resolver");
23915 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23916 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23917 tree decl = build_fn_decl (resolver_name, type);
23918 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23919
23920 DECL_NAME (decl) = decl_name;
23921 TREE_USED (decl) = 1;
23922 DECL_ARTIFICIAL (decl) = 1;
23923 DECL_IGNORED_P (decl) = 0;
23924 TREE_PUBLIC (decl) = 0;
23925 DECL_UNINLINABLE (decl) = 1;
23926
23927 /* Resolver is not external, body is generated. */
23928 DECL_EXTERNAL (decl) = 0;
23929 DECL_EXTERNAL (dispatch_decl) = 0;
23930
23931 DECL_CONTEXT (decl) = NULL_TREE;
23932 DECL_INITIAL (decl) = make_node (BLOCK);
23933 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23934
23935 if (DECL_COMDAT_GROUP (default_decl)
23936 || TREE_PUBLIC (default_decl))
23937 {
23938 /* In this case, each translation unit with a call to this
23939 versioned function will put out a resolver. Ensure it
23940 is comdat to keep just one copy. */
23941 DECL_COMDAT (decl) = 1;
23942 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23943 }
23944 else
23945 TREE_PUBLIC (dispatch_decl) = 0;
23946
23947 /* Build result decl and add to function_decl. */
23948 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23949 DECL_CONTEXT (t) = decl;
23950 DECL_ARTIFICIAL (t) = 1;
23951 DECL_IGNORED_P (t) = 1;
23952 DECL_RESULT (decl) = t;
23953
23954 gimplify_function_tree (decl);
23955 push_cfun (DECL_STRUCT_FUNCTION (decl));
23956 *empty_bb = init_lowered_empty_function (decl, false,
23957 profile_count::uninitialized ());
23958
23959 cgraph_node::add_new_function (decl, true);
23960 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23961
23962 pop_cfun ();
23963
23964 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23965 DECL_ATTRIBUTES (dispatch_decl)
23966 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23967
23968 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23969
23970 return decl;
23971 }
23972
23973 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23974 return a pointer to VERSION_DECL if we are running on a machine that
23975 supports the index CLONE_ISA hardware architecture bits. This function will
23976 be called during version dispatch to decide which function version to
23977 execute. It returns the basic block at the end, to which more conditions
23978 can be added. */
23979
23980 static basic_block
23981 add_condition_to_bb (tree function_decl, tree version_decl,
23982 int clone_isa, basic_block new_bb)
23983 {
23984 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23985
23986 gcc_assert (new_bb != NULL);
23987 gimple_seq gseq = bb_seq (new_bb);
23988
23989
23990 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23991 build_fold_addr_expr (version_decl));
23992 tree result_var = create_tmp_var (ptr_type_node);
23993 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23994 gimple *return_stmt = gimple_build_return (result_var);
23995
23996 if (clone_isa == CLONE_DEFAULT)
23997 {
23998 gimple_seq_add_stmt (&gseq, convert_stmt);
23999 gimple_seq_add_stmt (&gseq, return_stmt);
24000 set_bb_seq (new_bb, gseq);
24001 gimple_set_bb (convert_stmt, new_bb);
24002 gimple_set_bb (return_stmt, new_bb);
24003 pop_cfun ();
24004 return new_bb;
24005 }
24006
24007 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24008 tree cond_var = create_tmp_var (bool_int_type_node);
24009 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24010 const char *arg_str = rs6000_clone_map[clone_isa].name;
24011 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24012 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24013 gimple_call_set_lhs (call_cond_stmt, cond_var);
24014
24015 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24016 gimple_set_bb (call_cond_stmt, new_bb);
24017 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24018
24019 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24020 NULL_TREE, NULL_TREE);
24021 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24022 gimple_set_bb (if_else_stmt, new_bb);
24023 gimple_seq_add_stmt (&gseq, if_else_stmt);
24024
24025 gimple_seq_add_stmt (&gseq, convert_stmt);
24026 gimple_seq_add_stmt (&gseq, return_stmt);
24027 set_bb_seq (new_bb, gseq);
24028
24029 basic_block bb1 = new_bb;
24030 edge e12 = split_block (bb1, if_else_stmt);
24031 basic_block bb2 = e12->dest;
24032 e12->flags &= ~EDGE_FALLTHRU;
24033 e12->flags |= EDGE_TRUE_VALUE;
24034
24035 edge e23 = split_block (bb2, return_stmt);
24036 gimple_set_bb (convert_stmt, bb2);
24037 gimple_set_bb (return_stmt, bb2);
24038
24039 basic_block bb3 = e23->dest;
24040 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24041
24042 remove_edge (e23);
24043 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24044
24045 pop_cfun ();
24046 return bb3;
24047 }
24048
24049 /* This function generates the dispatch function for multi-versioned functions.
24050 DISPATCH_DECL is the function which will contain the dispatch logic.
24051 FNDECLS are the function choices for dispatch, and is a tree chain.
24052 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24053 code is generated. */
24054
24055 static int
24056 dispatch_function_versions (tree dispatch_decl,
24057 void *fndecls_p,
24058 basic_block *empty_bb)
24059 {
24060 int ix;
24061 tree ele;
24062 vec<tree> *fndecls;
24063 tree clones[CLONE_MAX];
24064
24065 if (TARGET_DEBUG_TARGET)
24066 fputs ("dispatch_function_versions, top\n", stderr);
24067
24068 gcc_assert (dispatch_decl != NULL
24069 && fndecls_p != NULL
24070 && empty_bb != NULL);
24071
24072 /* fndecls_p is actually a vector. */
24073 fndecls = static_cast<vec<tree> *> (fndecls_p);
24074
24075 /* At least one more version other than the default. */
24076 gcc_assert (fndecls->length () >= 2);
24077
24078 /* The first version in the vector is the default decl. */
24079 memset ((void *) clones, '\0', sizeof (clones));
24080 clones[CLONE_DEFAULT] = (*fndecls)[0];
24081
24082 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24083 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24084 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24085 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24086 to insert the code here to do the call. */
24087
24088 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24089 {
24090 int priority = rs6000_clone_priority (ele);
24091 if (!clones[priority])
24092 clones[priority] = ele;
24093 }
24094
24095 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24096 if (clones[ix])
24097 {
24098 if (TARGET_DEBUG_TARGET)
24099 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24100 ix, get_decl_name (clones[ix]));
24101
24102 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24103 *empty_bb);
24104 }
24105
24106 return 0;
24107 }
24108
24109 /* Generate the dispatching code body to dispatch multi-versioned function
24110 DECL. The target hook is called to process the "target" attributes and
24111 provide the code to dispatch the right function at run-time. NODE points
24112 to the dispatcher decl whose body will be created. */
24113
24114 static tree
24115 rs6000_generate_version_dispatcher_body (void *node_p)
24116 {
24117 tree resolver;
24118 basic_block empty_bb;
24119 struct cgraph_node *node = (cgraph_node *) node_p;
24120 struct cgraph_function_version_info *ninfo = node->function_version ();
24121
24122 if (ninfo->dispatcher_resolver)
24123 return ninfo->dispatcher_resolver;
24124
24125 /* node is going to be an alias, so remove the finalized bit. */
24126 node->definition = false;
24127
24128 /* The first version in the chain corresponds to the default version. */
24129 ninfo->dispatcher_resolver = resolver
24130 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24131
24132 if (TARGET_DEBUG_TARGET)
24133 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24134 get_decl_name (resolver));
24135
24136 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24137 auto_vec<tree, 2> fn_ver_vec;
24138
24139 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24140 vinfo;
24141 vinfo = vinfo->next)
24142 {
24143 struct cgraph_node *version = vinfo->this_node;
24144 /* Check for virtual functions here again, as by this time it should
24145 have been determined if this function needs a vtable index or
24146 not. This happens for methods in derived classes that override
24147 virtual methods in base classes but are not explicitly marked as
24148 virtual. */
24149 if (DECL_VINDEX (version->decl))
24150 sorry ("Virtual function multiversioning not supported");
24151
24152 fn_ver_vec.safe_push (version->decl);
24153 }
24154
24155 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24156 cgraph_edge::rebuild_edges ();
24157 pop_cfun ();
24158 return resolver;
24159 }
24160
24161 \f
24162 /* Hook to determine if one function can safely inline another. */
24163
24164 static bool
24165 rs6000_can_inline_p (tree caller, tree callee)
24166 {
24167 bool ret = false;
24168 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24169 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24170
24171 /* If the callee has no option attributes, then it is ok to inline. */
24172 if (!callee_tree)
24173 ret = true;
24174
24175 else
24176 {
24177 HOST_WIDE_INT caller_isa;
24178 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24179 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24180 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24181
24182 /* If the caller has option attributes, then use them.
24183 Otherwise, use the command line options. */
24184 if (caller_tree)
24185 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24186 else
24187 caller_isa = rs6000_isa_flags;
24188
24189 /* The callee's options must be a subset of the caller's options, i.e.
24190 a vsx function may inline an altivec function, but a no-vsx function
24191 must not inline a vsx function. However, for those options that the
24192 callee has explicitly enabled or disabled, then we must enforce that
24193 the callee's and caller's options match exactly; see PR70010. */
24194 if (((caller_isa & callee_isa) == callee_isa)
24195 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24196 ret = true;
24197 }
24198
24199 if (TARGET_DEBUG_TARGET)
24200 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24201 get_decl_name (caller), get_decl_name (callee),
24202 (ret ? "can" : "cannot"));
24203
24204 return ret;
24205 }
24206 \f
24207 /* Allocate a stack temp and fixup the address so it meets the particular
24208 memory requirements (either offetable or REG+REG addressing). */
24209
24210 rtx
24211 rs6000_allocate_stack_temp (machine_mode mode,
24212 bool offsettable_p,
24213 bool reg_reg_p)
24214 {
24215 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24216 rtx addr = XEXP (stack, 0);
24217 int strict_p = reload_completed;
24218
24219 if (!legitimate_indirect_address_p (addr, strict_p))
24220 {
24221 if (offsettable_p
24222 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24223 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24224
24225 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24226 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24227 }
24228
24229 return stack;
24230 }
24231
24232 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24233 convert to such a form to deal with memory reference instructions
24234 like STFIWX and LDBRX that only take reg+reg addressing. */
24235
24236 rtx
24237 rs6000_force_indexed_or_indirect_mem (rtx x)
24238 {
24239 machine_mode mode = GET_MODE (x);
24240
24241 gcc_assert (MEM_P (x));
24242 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24243 {
24244 rtx addr = XEXP (x, 0);
24245 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24246 {
24247 rtx reg = XEXP (addr, 0);
24248 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24249 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24250 gcc_assert (REG_P (reg));
24251 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24252 addr = reg;
24253 }
24254 else if (GET_CODE (addr) == PRE_MODIFY)
24255 {
24256 rtx reg = XEXP (addr, 0);
24257 rtx expr = XEXP (addr, 1);
24258 gcc_assert (REG_P (reg));
24259 gcc_assert (GET_CODE (expr) == PLUS);
24260 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24261 addr = reg;
24262 }
24263
24264 if (GET_CODE (addr) == PLUS)
24265 {
24266 rtx op0 = XEXP (addr, 0);
24267 rtx op1 = XEXP (addr, 1);
24268 op0 = force_reg (Pmode, op0);
24269 op1 = force_reg (Pmode, op1);
24270 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24271 }
24272 else
24273 x = replace_equiv_address (x, force_reg (Pmode, addr));
24274 }
24275
24276 return x;
24277 }
24278
24279 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24280
24281 On the RS/6000, all integer constants are acceptable, most won't be valid
24282 for particular insns, though. Only easy FP constants are acceptable. */
24283
24284 static bool
24285 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24286 {
24287 if (TARGET_ELF && tls_referenced_p (x))
24288 return false;
24289
24290 if (CONST_DOUBLE_P (x))
24291 return easy_fp_constant (x, mode);
24292
24293 if (GET_CODE (x) == CONST_VECTOR)
24294 return easy_vector_constant (x, mode);
24295
24296 return true;
24297 }
24298
24299 \f
24300 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24301
24302 static bool
24303 chain_already_loaded (rtx_insn *last)
24304 {
24305 for (; last != NULL; last = PREV_INSN (last))
24306 {
24307 if (NONJUMP_INSN_P (last))
24308 {
24309 rtx patt = PATTERN (last);
24310
24311 if (GET_CODE (patt) == SET)
24312 {
24313 rtx lhs = XEXP (patt, 0);
24314
24315 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24316 return true;
24317 }
24318 }
24319 }
24320 return false;
24321 }
24322
24323 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24324
24325 void
24326 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24327 {
24328 rtx func = func_desc;
24329 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24330 rtx toc_load = NULL_RTX;
24331 rtx toc_restore = NULL_RTX;
24332 rtx func_addr;
24333 rtx abi_reg = NULL_RTX;
24334 rtx call[5];
24335 int n_call;
24336 rtx insn;
24337 bool is_pltseq_longcall;
24338
24339 if (global_tlsarg)
24340 tlsarg = global_tlsarg;
24341
24342 /* Handle longcall attributes. */
24343 is_pltseq_longcall = false;
24344 if ((INTVAL (cookie) & CALL_LONG) != 0
24345 && GET_CODE (func_desc) == SYMBOL_REF)
24346 {
24347 func = rs6000_longcall_ref (func_desc, tlsarg);
24348 if (TARGET_PLTSEQ)
24349 is_pltseq_longcall = true;
24350 }
24351
24352 /* Handle indirect calls. */
24353 if (!SYMBOL_REF_P (func)
24354 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24355 {
24356 if (!rs6000_pcrel_p (cfun))
24357 {
24358 /* Save the TOC into its reserved slot before the call,
24359 and prepare to restore it after the call. */
24360 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24361 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24362 gen_rtvec (1, stack_toc_offset),
24363 UNSPEC_TOCSLOT);
24364 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24365
24366 /* Can we optimize saving the TOC in the prologue or
24367 do we need to do it at every call? */
24368 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24369 cfun->machine->save_toc_in_prologue = true;
24370 else
24371 {
24372 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24373 rtx stack_toc_mem = gen_frame_mem (Pmode,
24374 gen_rtx_PLUS (Pmode, stack_ptr,
24375 stack_toc_offset));
24376 MEM_VOLATILE_P (stack_toc_mem) = 1;
24377 if (is_pltseq_longcall)
24378 {
24379 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24380 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24381 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24382 }
24383 else
24384 emit_move_insn (stack_toc_mem, toc_reg);
24385 }
24386 }
24387
24388 if (DEFAULT_ABI == ABI_ELFv2)
24389 {
24390 /* A function pointer in the ELFv2 ABI is just a plain address, but
24391 the ABI requires it to be loaded into r12 before the call. */
24392 func_addr = gen_rtx_REG (Pmode, 12);
24393 if (!rtx_equal_p (func_addr, func))
24394 emit_move_insn (func_addr, func);
24395 abi_reg = func_addr;
24396 /* Indirect calls via CTR are strongly preferred over indirect
24397 calls via LR, so move the address there. Needed to mark
24398 this insn for linker plt sequence editing too. */
24399 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24400 if (is_pltseq_longcall)
24401 {
24402 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24403 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24404 emit_insn (gen_rtx_SET (func_addr, mark_func));
24405 v = gen_rtvec (2, func_addr, func_desc);
24406 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24407 }
24408 else
24409 emit_move_insn (func_addr, abi_reg);
24410 }
24411 else
24412 {
24413 /* A function pointer under AIX is a pointer to a data area whose
24414 first word contains the actual address of the function, whose
24415 second word contains a pointer to its TOC, and whose third word
24416 contains a value to place in the static chain register (r11).
24417 Note that if we load the static chain, our "trampoline" need
24418 not have any executable code. */
24419
24420 /* Load up address of the actual function. */
24421 func = force_reg (Pmode, func);
24422 func_addr = gen_reg_rtx (Pmode);
24423 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24424
24425 /* Indirect calls via CTR are strongly preferred over indirect
24426 calls via LR, so move the address there. */
24427 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24428 emit_move_insn (ctr_reg, func_addr);
24429 func_addr = ctr_reg;
24430
24431 /* Prepare to load the TOC of the called function. Note that the
24432 TOC load must happen immediately before the actual call so
24433 that unwinding the TOC registers works correctly. See the
24434 comment in frob_update_context. */
24435 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24436 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24437 gen_rtx_PLUS (Pmode, func,
24438 func_toc_offset));
24439 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24440
24441 /* If we have a static chain, load it up. But, if the call was
24442 originally direct, the 3rd word has not been written since no
24443 trampoline has been built, so we ought not to load it, lest we
24444 override a static chain value. */
24445 if (!(GET_CODE (func_desc) == SYMBOL_REF
24446 && SYMBOL_REF_FUNCTION_P (func_desc))
24447 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24448 && !chain_already_loaded (get_current_sequence ()->next->last))
24449 {
24450 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24451 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24452 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24453 gen_rtx_PLUS (Pmode, func,
24454 func_sc_offset));
24455 emit_move_insn (sc_reg, func_sc_mem);
24456 abi_reg = sc_reg;
24457 }
24458 }
24459 }
24460 else
24461 {
24462 /* No TOC register needed for calls from PC-relative callers. */
24463 if (!rs6000_pcrel_p (cfun))
24464 /* Direct calls use the TOC: for local calls, the callee will
24465 assume the TOC register is set; for non-local calls, the
24466 PLT stub needs the TOC register. */
24467 abi_reg = toc_reg;
24468 func_addr = func;
24469 }
24470
24471 /* Create the call. */
24472 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24473 if (value != NULL_RTX)
24474 call[0] = gen_rtx_SET (value, call[0]);
24475 call[1] = gen_rtx_USE (VOIDmode, cookie);
24476 n_call = 2;
24477
24478 if (toc_load)
24479 call[n_call++] = toc_load;
24480 if (toc_restore)
24481 call[n_call++] = toc_restore;
24482
24483 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24484
24485 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24486 insn = emit_call_insn (insn);
24487
24488 /* Mention all registers defined by the ABI to hold information
24489 as uses in CALL_INSN_FUNCTION_USAGE. */
24490 if (abi_reg)
24491 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24492 }
24493
24494 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24495
24496 void
24497 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24498 {
24499 rtx call[2];
24500 rtx insn;
24501
24502 gcc_assert (INTVAL (cookie) == 0);
24503
24504 if (global_tlsarg)
24505 tlsarg = global_tlsarg;
24506
24507 /* Create the call. */
24508 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24509 if (value != NULL_RTX)
24510 call[0] = gen_rtx_SET (value, call[0]);
24511
24512 call[1] = simple_return_rtx;
24513
24514 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24515 insn = emit_call_insn (insn);
24516
24517 /* Note use of the TOC register. */
24518 if (!rs6000_pcrel_p (cfun))
24519 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24520 gen_rtx_REG (Pmode, TOC_REGNUM));
24521 }
24522
24523 /* Expand code to perform a call under the SYSV4 ABI. */
24524
24525 void
24526 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24527 {
24528 rtx func = func_desc;
24529 rtx func_addr;
24530 rtx call[4];
24531 rtx insn;
24532 rtx abi_reg = NULL_RTX;
24533 int n;
24534
24535 if (global_tlsarg)
24536 tlsarg = global_tlsarg;
24537
24538 /* Handle longcall attributes. */
24539 if ((INTVAL (cookie) & CALL_LONG) != 0
24540 && GET_CODE (func_desc) == SYMBOL_REF)
24541 {
24542 func = rs6000_longcall_ref (func_desc, tlsarg);
24543 /* If the longcall was implemented as an inline PLT call using
24544 PLT unspecs then func will be REG:r11. If not, func will be
24545 a pseudo reg. The inline PLT call sequence supports lazy
24546 linking (and longcalls to functions in dlopen'd libraries).
24547 The other style of longcalls don't. The lazy linking entry
24548 to the dynamic symbol resolver requires r11 be the function
24549 address (as it is for linker generated PLT stubs). Ensure
24550 r11 stays valid to the bctrl by marking r11 used by the call. */
24551 if (TARGET_PLTSEQ)
24552 abi_reg = func;
24553 }
24554
24555 /* Handle indirect calls. */
24556 if (GET_CODE (func) != SYMBOL_REF)
24557 {
24558 func = force_reg (Pmode, func);
24559
24560 /* Indirect calls via CTR are strongly preferred over indirect
24561 calls via LR, so move the address there. That can't be left
24562 to reload because we want to mark every instruction in an
24563 inline PLT call sequence with a reloc, enabling the linker to
24564 edit the sequence back to a direct call when that makes sense. */
24565 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24566 if (abi_reg)
24567 {
24568 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24569 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24570 emit_insn (gen_rtx_SET (func_addr, mark_func));
24571 v = gen_rtvec (2, func_addr, func_desc);
24572 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24573 }
24574 else
24575 emit_move_insn (func_addr, func);
24576 }
24577 else
24578 func_addr = func;
24579
24580 /* Create the call. */
24581 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24582 if (value != NULL_RTX)
24583 call[0] = gen_rtx_SET (value, call[0]);
24584
24585 call[1] = gen_rtx_USE (VOIDmode, cookie);
24586 n = 2;
24587 if (TARGET_SECURE_PLT
24588 && flag_pic
24589 && GET_CODE (func_addr) == SYMBOL_REF
24590 && !SYMBOL_REF_LOCAL_P (func_addr))
24591 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24592
24593 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24594
24595 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24596 insn = emit_call_insn (insn);
24597 if (abi_reg)
24598 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24599 }
24600
24601 /* Expand code to perform a sibling call under the SysV4 ABI. */
24602
24603 void
24604 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24605 {
24606 rtx func = func_desc;
24607 rtx func_addr;
24608 rtx call[3];
24609 rtx insn;
24610 rtx abi_reg = NULL_RTX;
24611
24612 if (global_tlsarg)
24613 tlsarg = global_tlsarg;
24614
24615 /* Handle longcall attributes. */
24616 if ((INTVAL (cookie) & CALL_LONG) != 0
24617 && GET_CODE (func_desc) == SYMBOL_REF)
24618 {
24619 func = rs6000_longcall_ref (func_desc, tlsarg);
24620 /* If the longcall was implemented as an inline PLT call using
24621 PLT unspecs then func will be REG:r11. If not, func will be
24622 a pseudo reg. The inline PLT call sequence supports lazy
24623 linking (and longcalls to functions in dlopen'd libraries).
24624 The other style of longcalls don't. The lazy linking entry
24625 to the dynamic symbol resolver requires r11 be the function
24626 address (as it is for linker generated PLT stubs). Ensure
24627 r11 stays valid to the bctr by marking r11 used by the call. */
24628 if (TARGET_PLTSEQ)
24629 abi_reg = func;
24630 }
24631
24632 /* Handle indirect calls. */
24633 if (GET_CODE (func) != SYMBOL_REF)
24634 {
24635 func = force_reg (Pmode, func);
24636
24637 /* Indirect sibcalls must go via CTR. That can't be left to
24638 reload because we want to mark every instruction in an inline
24639 PLT call sequence with a reloc, enabling the linker to edit
24640 the sequence back to a direct call when that makes sense. */
24641 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24642 if (abi_reg)
24643 {
24644 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24645 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24646 emit_insn (gen_rtx_SET (func_addr, mark_func));
24647 v = gen_rtvec (2, func_addr, func_desc);
24648 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24649 }
24650 else
24651 emit_move_insn (func_addr, func);
24652 }
24653 else
24654 func_addr = func;
24655
24656 /* Create the call. */
24657 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24658 if (value != NULL_RTX)
24659 call[0] = gen_rtx_SET (value, call[0]);
24660
24661 call[1] = gen_rtx_USE (VOIDmode, cookie);
24662 call[2] = simple_return_rtx;
24663
24664 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24665 insn = emit_call_insn (insn);
24666 if (abi_reg)
24667 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24668 }
24669
24670 #if TARGET_MACHO
24671
24672 /* Expand code to perform a call under the Darwin ABI.
24673 Modulo handling of mlongcall, this is much the same as sysv.
24674 if/when the longcall optimisation is removed, we could drop this
24675 code and use the sysv case (taking care to avoid the tls stuff).
24676
24677 We can use this for sibcalls too, if needed. */
24678
24679 void
24680 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24681 rtx cookie, bool sibcall)
24682 {
24683 rtx func = func_desc;
24684 rtx func_addr;
24685 rtx call[3];
24686 rtx insn;
24687 int cookie_val = INTVAL (cookie);
24688 bool make_island = false;
24689
24690 /* Handle longcall attributes, there are two cases for Darwin:
24691 1) Newer linkers are capable of synthesising any branch islands needed.
24692 2) We need a helper branch island synthesised by the compiler.
24693 The second case has mostly been retired and we don't use it for m64.
24694 In fact, it's is an optimisation, we could just indirect as sysv does..
24695 ... however, backwards compatibility for now.
24696 If we're going to use this, then we need to keep the CALL_LONG bit set,
24697 so that we can pick up the special insn form later. */
24698 if ((cookie_val & CALL_LONG) != 0
24699 && GET_CODE (func_desc) == SYMBOL_REF)
24700 {
24701 /* FIXME: the longcall opt should not hang off this flag, it is most
24702 likely incorrect for kernel-mode code-generation. */
24703 if (darwin_symbol_stubs && TARGET_32BIT)
24704 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24705 else
24706 {
24707 /* The linker is capable of doing this, but the user explicitly
24708 asked for -mlongcall, so we'll do the 'normal' version. */
24709 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24710 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24711 }
24712 }
24713
24714 /* Handle indirect calls. */
24715 if (GET_CODE (func) != SYMBOL_REF)
24716 {
24717 func = force_reg (Pmode, func);
24718
24719 /* Indirect calls via CTR are strongly preferred over indirect
24720 calls via LR, and are required for indirect sibcalls, so move
24721 the address there. */
24722 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24723 emit_move_insn (func_addr, func);
24724 }
24725 else
24726 func_addr = func;
24727
24728 /* Create the call. */
24729 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24730 if (value != NULL_RTX)
24731 call[0] = gen_rtx_SET (value, call[0]);
24732
24733 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24734
24735 if (sibcall)
24736 call[2] = simple_return_rtx;
24737 else
24738 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24739
24740 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24741 insn = emit_call_insn (insn);
24742 /* Now we have the debug info in the insn, we can set up the branch island
24743 if we're using one. */
24744 if (make_island)
24745 {
24746 tree funname = get_identifier (XSTR (func_desc, 0));
24747
24748 if (no_previous_def (funname))
24749 {
24750 rtx label_rtx = gen_label_rtx ();
24751 char *label_buf, temp_buf[256];
24752 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24753 CODE_LABEL_NUMBER (label_rtx));
24754 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24755 tree labelname = get_identifier (label_buf);
24756 add_compiler_branch_island (labelname, funname,
24757 insn_line ((const rtx_insn*)insn));
24758 }
24759 }
24760 }
24761 #endif
24762
24763 void
24764 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24765 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24766 {
24767 #if TARGET_MACHO
24768 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24769 #else
24770 gcc_unreachable();
24771 #endif
24772 }
24773
24774
24775 void
24776 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24777 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24778 {
24779 #if TARGET_MACHO
24780 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24781 #else
24782 gcc_unreachable();
24783 #endif
24784 }
24785
24786 /* Return whether we should generate PC-relative code for FNDECL. */
24787 bool
24788 rs6000_fndecl_pcrel_p (const_tree fndecl)
24789 {
24790 if (DEFAULT_ABI != ABI_ELFv2)
24791 return false;
24792
24793 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24794
24795 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24796 && TARGET_CMODEL == CMODEL_MEDIUM);
24797 }
24798
24799 /* Return whether we should generate PC-relative code for *FN. */
24800 bool
24801 rs6000_pcrel_p (struct function *fn)
24802 {
24803 if (DEFAULT_ABI != ABI_ELFv2)
24804 return false;
24805
24806 /* Optimize usual case. */
24807 if (fn == cfun)
24808 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24809 && TARGET_CMODEL == CMODEL_MEDIUM);
24810
24811 return rs6000_fndecl_pcrel_p (fn->decl);
24812 }
24813
24814 \f
24815 /* Given an address (ADDR), a mode (MODE), and what the format of the
24816 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24817 for the address. */
24818
24819 enum insn_form
24820 address_to_insn_form (rtx addr,
24821 machine_mode mode,
24822 enum non_prefixed_form non_prefixed_format)
24823 {
24824 /* Single register is easy. */
24825 if (REG_P (addr) || SUBREG_P (addr))
24826 return INSN_FORM_BASE_REG;
24827
24828 /* If the non prefixed instruction format doesn't support offset addressing,
24829 make sure only indexed addressing is allowed.
24830
24831 We special case SDmode so that the register allocator does not try to move
24832 SDmode through GPR registers, but instead uses the 32-bit integer load and
24833 store instructions for the floating point registers. */
24834 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24835 {
24836 if (GET_CODE (addr) != PLUS)
24837 return INSN_FORM_BAD;
24838
24839 rtx op0 = XEXP (addr, 0);
24840 rtx op1 = XEXP (addr, 1);
24841 if (!REG_P (op0) && !SUBREG_P (op0))
24842 return INSN_FORM_BAD;
24843
24844 if (!REG_P (op1) && !SUBREG_P (op1))
24845 return INSN_FORM_BAD;
24846
24847 return INSN_FORM_X;
24848 }
24849
24850 /* Deal with update forms. */
24851 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24852 return INSN_FORM_UPDATE;
24853
24854 /* Handle PC-relative symbols and labels. Check for both local and
24855 external symbols. Assume labels are always local. TLS symbols
24856 are not PC-relative for rs6000. */
24857 if (TARGET_PCREL)
24858 {
24859 if (LABEL_REF_P (addr))
24860 return INSN_FORM_PCREL_LOCAL;
24861
24862 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
24863 {
24864 if (!SYMBOL_REF_LOCAL_P (addr))
24865 return INSN_FORM_PCREL_EXTERNAL;
24866 else
24867 return INSN_FORM_PCREL_LOCAL;
24868 }
24869 }
24870
24871 if (GET_CODE (addr) == CONST)
24872 addr = XEXP (addr, 0);
24873
24874 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24875 if (GET_CODE (addr) == LO_SUM)
24876 return INSN_FORM_LO_SUM;
24877
24878 /* Everything below must be an offset address of some form. */
24879 if (GET_CODE (addr) != PLUS)
24880 return INSN_FORM_BAD;
24881
24882 rtx op0 = XEXP (addr, 0);
24883 rtx op1 = XEXP (addr, 1);
24884
24885 /* Check for indexed addresses. */
24886 if (REG_P (op1) || SUBREG_P (op1))
24887 {
24888 if (REG_P (op0) || SUBREG_P (op0))
24889 return INSN_FORM_X;
24890
24891 return INSN_FORM_BAD;
24892 }
24893
24894 if (!CONST_INT_P (op1))
24895 return INSN_FORM_BAD;
24896
24897 HOST_WIDE_INT offset = INTVAL (op1);
24898 if (!SIGNED_INTEGER_34BIT_P (offset))
24899 return INSN_FORM_BAD;
24900
24901 /* Check for local and external PC-relative addresses. Labels are always
24902 local. TLS symbols are not PC-relative for rs6000. */
24903 if (TARGET_PCREL)
24904 {
24905 if (LABEL_REF_P (op0))
24906 return INSN_FORM_PCREL_LOCAL;
24907
24908 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
24909 {
24910 if (!SYMBOL_REF_LOCAL_P (op0))
24911 return INSN_FORM_PCREL_EXTERNAL;
24912 else
24913 return INSN_FORM_PCREL_LOCAL;
24914 }
24915 }
24916
24917 /* If it isn't PC-relative, the address must use a base register. */
24918 if (!REG_P (op0) && !SUBREG_P (op0))
24919 return INSN_FORM_BAD;
24920
24921 /* Large offsets must be prefixed. */
24922 if (!SIGNED_INTEGER_16BIT_P (offset))
24923 {
24924 if (TARGET_PREFIXED)
24925 return INSN_FORM_PREFIXED_NUMERIC;
24926
24927 return INSN_FORM_BAD;
24928 }
24929
24930 /* We have a 16-bit offset, see what default instruction format to use. */
24931 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24932 {
24933 unsigned size = GET_MODE_SIZE (mode);
24934
24935 /* On 64-bit systems, assume 64-bit integers need to use DS form
24936 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24937 (for LXV and STXV). TImode is problematical in that its normal usage
24938 is expected to be GPRs where it wants a DS instruction format, but if
24939 it goes into the vector registers, it wants a DQ instruction
24940 format. */
24941 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24942 non_prefixed_format = NON_PREFIXED_DS;
24943
24944 else if (TARGET_VSX && size >= 16
24945 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24946 non_prefixed_format = NON_PREFIXED_DQ;
24947
24948 else
24949 non_prefixed_format = NON_PREFIXED_D;
24950 }
24951
24952 /* Classify the D/DS/DQ-form addresses. */
24953 switch (non_prefixed_format)
24954 {
24955 /* Instruction format D, all 16 bits are valid. */
24956 case NON_PREFIXED_D:
24957 return INSN_FORM_D;
24958
24959 /* Instruction format DS, bottom 2 bits must be 0. */
24960 case NON_PREFIXED_DS:
24961 if ((offset & 3) == 0)
24962 return INSN_FORM_DS;
24963
24964 else if (TARGET_PREFIXED)
24965 return INSN_FORM_PREFIXED_NUMERIC;
24966
24967 else
24968 return INSN_FORM_BAD;
24969
24970 /* Instruction format DQ, bottom 4 bits must be 0. */
24971 case NON_PREFIXED_DQ:
24972 if ((offset & 15) == 0)
24973 return INSN_FORM_DQ;
24974
24975 else if (TARGET_PREFIXED)
24976 return INSN_FORM_PREFIXED_NUMERIC;
24977
24978 else
24979 return INSN_FORM_BAD;
24980
24981 default:
24982 break;
24983 }
24984
24985 return INSN_FORM_BAD;
24986 }
24987
24988 /* Helper function to see if we're potentially looking at lfs/stfs.
24989 - PARALLEL containing a SET and a CLOBBER
24990 - stfs:
24991 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
24992 - CLOBBER is a V4SF
24993 - lfs:
24994 - SET is from UNSPEC_SF_FROM_SI to REG:SF
24995 - CLOBBER is a DI
24996 */
24997
24998 static bool
24999 is_lfs_stfs_insn (rtx_insn *insn)
25000 {
25001 rtx pattern = PATTERN (insn);
25002 if (GET_CODE (pattern) != PARALLEL)
25003 return false;
25004
25005 /* This should be a parallel with exactly one set and one clobber. */
25006 if (XVECLEN (pattern, 0) != 2)
25007 return false;
25008
25009 rtx set = XVECEXP (pattern, 0, 0);
25010 if (GET_CODE (set) != SET)
25011 return false;
25012
25013 rtx clobber = XVECEXP (pattern, 0, 1);
25014 if (GET_CODE (clobber) != CLOBBER)
25015 return false;
25016
25017 /* All we care is that the destination of the SET is a mem:SI,
25018 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25019 should be a scratch:V4SF. */
25020
25021 rtx dest = SET_DEST (set);
25022 rtx src = SET_SRC (set);
25023 rtx scratch = SET_DEST (clobber);
25024
25025 if (GET_CODE (src) != UNSPEC)
25026 return false;
25027
25028 /* stfs case. */
25029 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
25030 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
25031 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
25032 return true;
25033
25034 /* lfs case. */
25035 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
25036 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
25037 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
25038 return true;
25039
25040 return false;
25041 }
25042
25043 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25044 instruction format (D/DS/DQ) used for offset memory. */
25045
25046 static enum non_prefixed_form
25047 reg_to_non_prefixed (rtx reg, machine_mode mode)
25048 {
25049 /* If it isn't a register, use the defaults. */
25050 if (!REG_P (reg) && !SUBREG_P (reg))
25051 return NON_PREFIXED_DEFAULT;
25052
25053 unsigned int r = reg_or_subregno (reg);
25054
25055 /* If we have a pseudo, use the default instruction format. */
25056 if (!HARD_REGISTER_NUM_P (r))
25057 return NON_PREFIXED_DEFAULT;
25058
25059 unsigned size = GET_MODE_SIZE (mode);
25060
25061 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25062 128-bit floating point, and 128-bit integers. Before power9, only indexed
25063 addressing was available for vectors. */
25064 if (FP_REGNO_P (r))
25065 {
25066 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25067 return NON_PREFIXED_D;
25068
25069 else if (size < 8)
25070 return NON_PREFIXED_X;
25071
25072 else if (TARGET_VSX && size >= 16
25073 && (VECTOR_MODE_P (mode)
25074 || FLOAT128_VECTOR_P (mode)
25075 || mode == TImode || mode == CTImode))
25076 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25077
25078 else
25079 return NON_PREFIXED_DEFAULT;
25080 }
25081
25082 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25083 128-bit floating point, and 128-bit integers. Before power9, only indexed
25084 addressing was available. */
25085 else if (ALTIVEC_REGNO_P (r))
25086 {
25087 if (!TARGET_P9_VECTOR)
25088 return NON_PREFIXED_X;
25089
25090 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25091 return NON_PREFIXED_DS;
25092
25093 else if (size < 8)
25094 return NON_PREFIXED_X;
25095
25096 else if (TARGET_VSX && size >= 16
25097 && (VECTOR_MODE_P (mode)
25098 || FLOAT128_VECTOR_P (mode)
25099 || mode == TImode || mode == CTImode))
25100 return NON_PREFIXED_DQ;
25101
25102 else
25103 return NON_PREFIXED_DEFAULT;
25104 }
25105
25106 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25107 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25108 through the GPR registers for memory operations. */
25109 else if (TARGET_POWERPC64 && size >= 8)
25110 return NON_PREFIXED_DS;
25111
25112 return NON_PREFIXED_D;
25113 }
25114
25115 \f
25116 /* Whether a load instruction is a prefixed instruction. This is called from
25117 the prefixed attribute processing. */
25118
25119 bool
25120 prefixed_load_p (rtx_insn *insn)
25121 {
25122 /* Validate the insn to make sure it is a normal load insn. */
25123 extract_insn_cached (insn);
25124 if (recog_data.n_operands < 2)
25125 return false;
25126
25127 rtx reg = recog_data.operand[0];
25128 rtx mem = recog_data.operand[1];
25129
25130 if (!REG_P (reg) && !SUBREG_P (reg))
25131 return false;
25132
25133 if (!MEM_P (mem))
25134 return false;
25135
25136 /* Prefixed load instructions do not support update or indexed forms. */
25137 if (get_attr_indexed (insn) == INDEXED_YES
25138 || get_attr_update (insn) == UPDATE_YES)
25139 return false;
25140
25141 /* LWA uses the DS format instead of the D format that LWZ uses. */
25142 enum non_prefixed_form non_prefixed;
25143 machine_mode reg_mode = GET_MODE (reg);
25144 machine_mode mem_mode = GET_MODE (mem);
25145
25146 if (mem_mode == SImode && reg_mode == DImode
25147 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25148 non_prefixed = NON_PREFIXED_DS;
25149
25150 else
25151 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25152
25153 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25154 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
25155 else
25156 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25157 }
25158
25159 /* Whether a store instruction is a prefixed instruction. This is called from
25160 the prefixed attribute processing. */
25161
25162 bool
25163 prefixed_store_p (rtx_insn *insn)
25164 {
25165 /* Validate the insn to make sure it is a normal store insn. */
25166 extract_insn_cached (insn);
25167 if (recog_data.n_operands < 2)
25168 return false;
25169
25170 rtx mem = recog_data.operand[0];
25171 rtx reg = recog_data.operand[1];
25172
25173 if (!REG_P (reg) && !SUBREG_P (reg))
25174 return false;
25175
25176 if (!MEM_P (mem))
25177 return false;
25178
25179 /* Prefixed store instructions do not support update or indexed forms. */
25180 if (get_attr_indexed (insn) == INDEXED_YES
25181 || get_attr_update (insn) == UPDATE_YES)
25182 return false;
25183
25184 machine_mode mem_mode = GET_MODE (mem);
25185 rtx addr = XEXP (mem, 0);
25186 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25187
25188 /* Need to make sure we aren't looking at a stfs which doesn't look
25189 like the other things reg_to_non_prefixed/address_is_prefixed
25190 looks for. */
25191 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25192 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
25193 else
25194 return address_is_prefixed (addr, mem_mode, non_prefixed);
25195 }
25196
25197 /* Whether a load immediate or add instruction is a prefixed instruction. This
25198 is called from the prefixed attribute processing. */
25199
25200 bool
25201 prefixed_paddi_p (rtx_insn *insn)
25202 {
25203 rtx set = single_set (insn);
25204 if (!set)
25205 return false;
25206
25207 rtx dest = SET_DEST (set);
25208 rtx src = SET_SRC (set);
25209
25210 if (!REG_P (dest) && !SUBREG_P (dest))
25211 return false;
25212
25213 /* Is this a load immediate that can't be done with a simple ADDI or
25214 ADDIS? */
25215 if (CONST_INT_P (src))
25216 return (satisfies_constraint_eI (src)
25217 && !satisfies_constraint_I (src)
25218 && !satisfies_constraint_L (src));
25219
25220 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25221 ADDIS? */
25222 if (GET_CODE (src) == PLUS)
25223 {
25224 rtx op1 = XEXP (src, 1);
25225
25226 return (CONST_INT_P (op1)
25227 && satisfies_constraint_eI (op1)
25228 && !satisfies_constraint_I (op1)
25229 && !satisfies_constraint_L (op1));
25230 }
25231
25232 /* If not, is it a load of a PC-relative address? */
25233 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25234 return false;
25235
25236 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25237 return false;
25238
25239 enum insn_form iform = address_to_insn_form (src, Pmode,
25240 NON_PREFIXED_DEFAULT);
25241
25242 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25243 }
25244
25245 /* Whether the next instruction needs a 'p' prefix issued before the
25246 instruction is printed out. */
25247 static bool next_insn_prefixed_p;
25248
25249 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25250 outputting the assembler code. On the PowerPC, we remember if the current
25251 insn is a prefixed insn where we need to emit a 'p' before the insn.
25252
25253 In addition, if the insn is part of a PC-relative reference to an external
25254 label optimization, this is recorded also. */
25255 void
25256 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25257 {
25258 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25259 return;
25260 }
25261
25262 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25263 We use it to emit a 'p' for prefixed insns that is set in
25264 FINAL_PRESCAN_INSN. */
25265 void
25266 rs6000_asm_output_opcode (FILE *stream)
25267 {
25268 if (next_insn_prefixed_p)
25269 fprintf (stream, "p");
25270
25271 return;
25272 }
25273
25274 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25275 should be adjusted to reflect any required changes. This macro is used when
25276 there is some systematic length adjustment required that would be difficult
25277 to express in the length attribute.
25278
25279 In the PowerPC, we use this to adjust the length of an instruction if one or
25280 more prefixed instructions are generated, using the attribute
25281 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25282 hardware requires that a prefied instruciton does not cross a 64-byte
25283 boundary. This means the compiler has to assume the length of the first
25284 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25285 already set for the non-prefixed instruction, we just need to udpate for the
25286 difference. */
25287
25288 int
25289 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25290 {
25291 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25292 {
25293 rtx pattern = PATTERN (insn);
25294 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25295 && get_attr_prefixed (insn) == PREFIXED_YES)
25296 {
25297 int num_prefixed = get_attr_max_prefixed_insns (insn);
25298 length += 4 * (num_prefixed + 1);
25299 }
25300 }
25301
25302 return length;
25303 }
25304
25305 \f
25306 #ifdef HAVE_GAS_HIDDEN
25307 # define USE_HIDDEN_LINKONCE 1
25308 #else
25309 # define USE_HIDDEN_LINKONCE 0
25310 #endif
25311
25312 /* Fills in the label name that should be used for a 476 link stack thunk. */
25313
25314 void
25315 get_ppc476_thunk_name (char name[32])
25316 {
25317 gcc_assert (TARGET_LINK_STACK);
25318
25319 if (USE_HIDDEN_LINKONCE)
25320 sprintf (name, "__ppc476.get_thunk");
25321 else
25322 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25323 }
25324
25325 /* This function emits the simple thunk routine that is used to preserve
25326 the link stack on the 476 cpu. */
25327
25328 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25329 static void
25330 rs6000_code_end (void)
25331 {
25332 char name[32];
25333 tree decl;
25334
25335 if (!TARGET_LINK_STACK)
25336 return;
25337
25338 get_ppc476_thunk_name (name);
25339
25340 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25341 build_function_type_list (void_type_node, NULL_TREE));
25342 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25343 NULL_TREE, void_type_node);
25344 TREE_PUBLIC (decl) = 1;
25345 TREE_STATIC (decl) = 1;
25346
25347 #if RS6000_WEAK
25348 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25349 {
25350 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25351 targetm.asm_out.unique_section (decl, 0);
25352 switch_to_section (get_named_section (decl, NULL, 0));
25353 DECL_WEAK (decl) = 1;
25354 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25355 targetm.asm_out.globalize_label (asm_out_file, name);
25356 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25357 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25358 }
25359 else
25360 #endif
25361 {
25362 switch_to_section (text_section);
25363 ASM_OUTPUT_LABEL (asm_out_file, name);
25364 }
25365
25366 DECL_INITIAL (decl) = make_node (BLOCK);
25367 current_function_decl = decl;
25368 allocate_struct_function (decl, false);
25369 init_function_start (decl);
25370 first_function_block_is_cold = false;
25371 /* Make sure unwind info is emitted for the thunk if needed. */
25372 final_start_function (emit_barrier (), asm_out_file, 1);
25373
25374 fputs ("\tblr\n", asm_out_file);
25375
25376 final_end_function ();
25377 init_insn_lengths ();
25378 free_after_compilation (cfun);
25379 set_cfun (NULL);
25380 current_function_decl = NULL;
25381 }
25382
25383 /* Add r30 to hard reg set if the prologue sets it up and it is not
25384 pic_offset_table_rtx. */
25385
25386 static void
25387 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25388 {
25389 if (!TARGET_SINGLE_PIC_BASE
25390 && TARGET_TOC
25391 && TARGET_MINIMAL_TOC
25392 && !constant_pool_empty_p ())
25393 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25394 if (cfun->machine->split_stack_argp_used)
25395 add_to_hard_reg_set (&set->set, Pmode, 12);
25396
25397 /* Make sure the hard reg set doesn't include r2, which was possibly added
25398 via PIC_OFFSET_TABLE_REGNUM. */
25399 if (TARGET_TOC)
25400 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25401 }
25402
25403 \f
25404 /* Helper function for rs6000_split_logical to emit a logical instruction after
25405 spliting the operation to single GPR registers.
25406
25407 DEST is the destination register.
25408 OP1 and OP2 are the input source registers.
25409 CODE is the base operation (AND, IOR, XOR, NOT).
25410 MODE is the machine mode.
25411 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25412 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25413 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25414
25415 static void
25416 rs6000_split_logical_inner (rtx dest,
25417 rtx op1,
25418 rtx op2,
25419 enum rtx_code code,
25420 machine_mode mode,
25421 bool complement_final_p,
25422 bool complement_op1_p,
25423 bool complement_op2_p)
25424 {
25425 rtx bool_rtx;
25426
25427 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25428 if (op2 && CONST_INT_P (op2)
25429 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25430 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25431 {
25432 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25433 HOST_WIDE_INT value = INTVAL (op2) & mask;
25434
25435 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25436 if (code == AND)
25437 {
25438 if (value == 0)
25439 {
25440 emit_insn (gen_rtx_SET (dest, const0_rtx));
25441 return;
25442 }
25443
25444 else if (value == mask)
25445 {
25446 if (!rtx_equal_p (dest, op1))
25447 emit_insn (gen_rtx_SET (dest, op1));
25448 return;
25449 }
25450 }
25451
25452 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25453 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25454 else if (code == IOR || code == XOR)
25455 {
25456 if (value == 0)
25457 {
25458 if (!rtx_equal_p (dest, op1))
25459 emit_insn (gen_rtx_SET (dest, op1));
25460 return;
25461 }
25462 }
25463 }
25464
25465 if (code == AND && mode == SImode
25466 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25467 {
25468 emit_insn (gen_andsi3 (dest, op1, op2));
25469 return;
25470 }
25471
25472 if (complement_op1_p)
25473 op1 = gen_rtx_NOT (mode, op1);
25474
25475 if (complement_op2_p)
25476 op2 = gen_rtx_NOT (mode, op2);
25477
25478 /* For canonical RTL, if only one arm is inverted it is the first. */
25479 if (!complement_op1_p && complement_op2_p)
25480 std::swap (op1, op2);
25481
25482 bool_rtx = ((code == NOT)
25483 ? gen_rtx_NOT (mode, op1)
25484 : gen_rtx_fmt_ee (code, mode, op1, op2));
25485
25486 if (complement_final_p)
25487 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25488
25489 emit_insn (gen_rtx_SET (dest, bool_rtx));
25490 }
25491
25492 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25493 operations are split immediately during RTL generation to allow for more
25494 optimizations of the AND/IOR/XOR.
25495
25496 OPERANDS is an array containing the destination and two input operands.
25497 CODE is the base operation (AND, IOR, XOR, NOT).
25498 MODE is the machine mode.
25499 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25500 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25501 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25502 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25503 formation of the AND instructions. */
25504
25505 static void
25506 rs6000_split_logical_di (rtx operands[3],
25507 enum rtx_code code,
25508 bool complement_final_p,
25509 bool complement_op1_p,
25510 bool complement_op2_p)
25511 {
25512 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25513 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25514 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25515 enum hi_lo { hi = 0, lo = 1 };
25516 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25517 size_t i;
25518
25519 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25520 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25521 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25522 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25523
25524 if (code == NOT)
25525 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25526 else
25527 {
25528 if (!CONST_INT_P (operands[2]))
25529 {
25530 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25531 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25532 }
25533 else
25534 {
25535 HOST_WIDE_INT value = INTVAL (operands[2]);
25536 HOST_WIDE_INT value_hi_lo[2];
25537
25538 gcc_assert (!complement_final_p);
25539 gcc_assert (!complement_op1_p);
25540 gcc_assert (!complement_op2_p);
25541
25542 value_hi_lo[hi] = value >> 32;
25543 value_hi_lo[lo] = value & lower_32bits;
25544
25545 for (i = 0; i < 2; i++)
25546 {
25547 HOST_WIDE_INT sub_value = value_hi_lo[i];
25548
25549 if (sub_value & sign_bit)
25550 sub_value |= upper_32bits;
25551
25552 op2_hi_lo[i] = GEN_INT (sub_value);
25553
25554 /* If this is an AND instruction, check to see if we need to load
25555 the value in a register. */
25556 if (code == AND && sub_value != -1 && sub_value != 0
25557 && !and_operand (op2_hi_lo[i], SImode))
25558 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25559 }
25560 }
25561 }
25562
25563 for (i = 0; i < 2; i++)
25564 {
25565 /* Split large IOR/XOR operations. */
25566 if ((code == IOR || code == XOR)
25567 && CONST_INT_P (op2_hi_lo[i])
25568 && !complement_final_p
25569 && !complement_op1_p
25570 && !complement_op2_p
25571 && !logical_const_operand (op2_hi_lo[i], SImode))
25572 {
25573 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25574 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25575 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25576 rtx tmp = gen_reg_rtx (SImode);
25577
25578 /* Make sure the constant is sign extended. */
25579 if ((hi_16bits & sign_bit) != 0)
25580 hi_16bits |= upper_32bits;
25581
25582 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25583 code, SImode, false, false, false);
25584
25585 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25586 code, SImode, false, false, false);
25587 }
25588 else
25589 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25590 code, SImode, complement_final_p,
25591 complement_op1_p, complement_op2_p);
25592 }
25593
25594 return;
25595 }
25596
25597 /* Split the insns that make up boolean operations operating on multiple GPR
25598 registers. The boolean MD patterns ensure that the inputs either are
25599 exactly the same as the output registers, or there is no overlap.
25600
25601 OPERANDS is an array containing the destination and two input operands.
25602 CODE is the base operation (AND, IOR, XOR, NOT).
25603 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25604 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25605 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25606
25607 void
25608 rs6000_split_logical (rtx operands[3],
25609 enum rtx_code code,
25610 bool complement_final_p,
25611 bool complement_op1_p,
25612 bool complement_op2_p)
25613 {
25614 machine_mode mode = GET_MODE (operands[0]);
25615 machine_mode sub_mode;
25616 rtx op0, op1, op2;
25617 int sub_size, regno0, regno1, nregs, i;
25618
25619 /* If this is DImode, use the specialized version that can run before
25620 register allocation. */
25621 if (mode == DImode && !TARGET_POWERPC64)
25622 {
25623 rs6000_split_logical_di (operands, code, complement_final_p,
25624 complement_op1_p, complement_op2_p);
25625 return;
25626 }
25627
25628 op0 = operands[0];
25629 op1 = operands[1];
25630 op2 = (code == NOT) ? NULL_RTX : operands[2];
25631 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25632 sub_size = GET_MODE_SIZE (sub_mode);
25633 regno0 = REGNO (op0);
25634 regno1 = REGNO (op1);
25635
25636 gcc_assert (reload_completed);
25637 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25638 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25639
25640 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25641 gcc_assert (nregs > 1);
25642
25643 if (op2 && REG_P (op2))
25644 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25645
25646 for (i = 0; i < nregs; i++)
25647 {
25648 int offset = i * sub_size;
25649 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25650 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25651 rtx sub_op2 = ((code == NOT)
25652 ? NULL_RTX
25653 : simplify_subreg (sub_mode, op2, mode, offset));
25654
25655 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25656 complement_final_p, complement_op1_p,
25657 complement_op2_p);
25658 }
25659
25660 return;
25661 }
25662
25663 \f
25664 /* Return true if the peephole2 can combine a load involving a combination of
25665 an addis instruction and a load with an offset that can be fused together on
25666 a power8. */
25667
25668 bool
25669 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25670 rtx addis_value, /* addis value. */
25671 rtx target, /* target register that is loaded. */
25672 rtx mem) /* bottom part of the memory addr. */
25673 {
25674 rtx addr;
25675 rtx base_reg;
25676
25677 /* Validate arguments. */
25678 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25679 return false;
25680
25681 if (!base_reg_operand (target, GET_MODE (target)))
25682 return false;
25683
25684 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25685 return false;
25686
25687 /* Allow sign/zero extension. */
25688 if (GET_CODE (mem) == ZERO_EXTEND
25689 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25690 mem = XEXP (mem, 0);
25691
25692 if (!MEM_P (mem))
25693 return false;
25694
25695 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25696 return false;
25697
25698 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25699 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25700 return false;
25701
25702 /* Validate that the register used to load the high value is either the
25703 register being loaded, or we can safely replace its use.
25704
25705 This function is only called from the peephole2 pass and we assume that
25706 there are 2 instructions in the peephole (addis and load), so we want to
25707 check if the target register was not used in the memory address and the
25708 register to hold the addis result is dead after the peephole. */
25709 if (REGNO (addis_reg) != REGNO (target))
25710 {
25711 if (reg_mentioned_p (target, mem))
25712 return false;
25713
25714 if (!peep2_reg_dead_p (2, addis_reg))
25715 return false;
25716
25717 /* If the target register being loaded is the stack pointer, we must
25718 avoid loading any other value into it, even temporarily. */
25719 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25720 return false;
25721 }
25722
25723 base_reg = XEXP (addr, 0);
25724 return REGNO (addis_reg) == REGNO (base_reg);
25725 }
25726
25727 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25728 sequence. We adjust the addis register to use the target register. If the
25729 load sign extends, we adjust the code to do the zero extending load, and an
25730 explicit sign extension later since the fusion only covers zero extending
25731 loads.
25732
25733 The operands are:
25734 operands[0] register set with addis (to be replaced with target)
25735 operands[1] value set via addis
25736 operands[2] target register being loaded
25737 operands[3] D-form memory reference using operands[0]. */
25738
25739 void
25740 expand_fusion_gpr_load (rtx *operands)
25741 {
25742 rtx addis_value = operands[1];
25743 rtx target = operands[2];
25744 rtx orig_mem = operands[3];
25745 rtx new_addr, new_mem, orig_addr, offset;
25746 enum rtx_code plus_or_lo_sum;
25747 machine_mode target_mode = GET_MODE (target);
25748 machine_mode extend_mode = target_mode;
25749 machine_mode ptr_mode = Pmode;
25750 enum rtx_code extend = UNKNOWN;
25751
25752 if (GET_CODE (orig_mem) == ZERO_EXTEND
25753 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25754 {
25755 extend = GET_CODE (orig_mem);
25756 orig_mem = XEXP (orig_mem, 0);
25757 target_mode = GET_MODE (orig_mem);
25758 }
25759
25760 gcc_assert (MEM_P (orig_mem));
25761
25762 orig_addr = XEXP (orig_mem, 0);
25763 plus_or_lo_sum = GET_CODE (orig_addr);
25764 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25765
25766 offset = XEXP (orig_addr, 1);
25767 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25768 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25769
25770 if (extend != UNKNOWN)
25771 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25772
25773 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25774 UNSPEC_FUSION_GPR);
25775 emit_insn (gen_rtx_SET (target, new_mem));
25776
25777 if (extend == SIGN_EXTEND)
25778 {
25779 int sub_off = ((BYTES_BIG_ENDIAN)
25780 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25781 : 0);
25782 rtx sign_reg
25783 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25784
25785 emit_insn (gen_rtx_SET (target,
25786 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25787 }
25788
25789 return;
25790 }
25791
25792 /* Emit the addis instruction that will be part of a fused instruction
25793 sequence. */
25794
25795 void
25796 emit_fusion_addis (rtx target, rtx addis_value)
25797 {
25798 rtx fuse_ops[10];
25799 const char *addis_str = NULL;
25800
25801 /* Emit the addis instruction. */
25802 fuse_ops[0] = target;
25803 if (satisfies_constraint_L (addis_value))
25804 {
25805 fuse_ops[1] = addis_value;
25806 addis_str = "lis %0,%v1";
25807 }
25808
25809 else if (GET_CODE (addis_value) == PLUS)
25810 {
25811 rtx op0 = XEXP (addis_value, 0);
25812 rtx op1 = XEXP (addis_value, 1);
25813
25814 if (REG_P (op0) && CONST_INT_P (op1)
25815 && satisfies_constraint_L (op1))
25816 {
25817 fuse_ops[1] = op0;
25818 fuse_ops[2] = op1;
25819 addis_str = "addis %0,%1,%v2";
25820 }
25821 }
25822
25823 else if (GET_CODE (addis_value) == HIGH)
25824 {
25825 rtx value = XEXP (addis_value, 0);
25826 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25827 {
25828 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25829 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25830 if (TARGET_ELF)
25831 addis_str = "addis %0,%2,%1@toc@ha";
25832
25833 else if (TARGET_XCOFF)
25834 addis_str = "addis %0,%1@u(%2)";
25835
25836 else
25837 gcc_unreachable ();
25838 }
25839
25840 else if (GET_CODE (value) == PLUS)
25841 {
25842 rtx op0 = XEXP (value, 0);
25843 rtx op1 = XEXP (value, 1);
25844
25845 if (GET_CODE (op0) == UNSPEC
25846 && XINT (op0, 1) == UNSPEC_TOCREL
25847 && CONST_INT_P (op1))
25848 {
25849 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25850 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25851 fuse_ops[3] = op1;
25852 if (TARGET_ELF)
25853 addis_str = "addis %0,%2,%1+%3@toc@ha";
25854
25855 else if (TARGET_XCOFF)
25856 addis_str = "addis %0,%1+%3@u(%2)";
25857
25858 else
25859 gcc_unreachable ();
25860 }
25861 }
25862
25863 else if (satisfies_constraint_L (value))
25864 {
25865 fuse_ops[1] = value;
25866 addis_str = "lis %0,%v1";
25867 }
25868
25869 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25870 {
25871 fuse_ops[1] = value;
25872 addis_str = "lis %0,%1@ha";
25873 }
25874 }
25875
25876 if (!addis_str)
25877 fatal_insn ("Could not generate addis value for fusion", addis_value);
25878
25879 output_asm_insn (addis_str, fuse_ops);
25880 }
25881
25882 /* Emit a D-form load or store instruction that is the second instruction
25883 of a fusion sequence. */
25884
25885 static void
25886 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25887 {
25888 rtx fuse_ops[10];
25889 char insn_template[80];
25890
25891 fuse_ops[0] = load_reg;
25892 fuse_ops[1] = addis_reg;
25893
25894 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25895 {
25896 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25897 fuse_ops[2] = offset;
25898 output_asm_insn (insn_template, fuse_ops);
25899 }
25900
25901 else if (GET_CODE (offset) == UNSPEC
25902 && XINT (offset, 1) == UNSPEC_TOCREL)
25903 {
25904 if (TARGET_ELF)
25905 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25906
25907 else if (TARGET_XCOFF)
25908 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25909
25910 else
25911 gcc_unreachable ();
25912
25913 fuse_ops[2] = XVECEXP (offset, 0, 0);
25914 output_asm_insn (insn_template, fuse_ops);
25915 }
25916
25917 else if (GET_CODE (offset) == PLUS
25918 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25919 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25920 && CONST_INT_P (XEXP (offset, 1)))
25921 {
25922 rtx tocrel_unspec = XEXP (offset, 0);
25923 if (TARGET_ELF)
25924 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25925
25926 else if (TARGET_XCOFF)
25927 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25928
25929 else
25930 gcc_unreachable ();
25931
25932 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25933 fuse_ops[3] = XEXP (offset, 1);
25934 output_asm_insn (insn_template, fuse_ops);
25935 }
25936
25937 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25938 {
25939 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25940
25941 fuse_ops[2] = offset;
25942 output_asm_insn (insn_template, fuse_ops);
25943 }
25944
25945 else
25946 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25947
25948 return;
25949 }
25950
25951 /* Given an address, convert it into the addis and load offset parts. Addresses
25952 created during the peephole2 process look like:
25953 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25954 (unspec [(...)] UNSPEC_TOCREL)) */
25955
25956 static void
25957 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25958 {
25959 rtx hi, lo;
25960
25961 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25962 {
25963 hi = XEXP (addr, 0);
25964 lo = XEXP (addr, 1);
25965 }
25966 else
25967 gcc_unreachable ();
25968
25969 *p_hi = hi;
25970 *p_lo = lo;
25971 }
25972
25973 /* Return a string to fuse an addis instruction with a gpr load to the same
25974 register that we loaded up the addis instruction. The address that is used
25975 is the logical address that was formed during peephole2:
25976 (lo_sum (high) (low-part))
25977
25978 The code is complicated, so we call output_asm_insn directly, and just
25979 return "". */
25980
25981 const char *
25982 emit_fusion_gpr_load (rtx target, rtx mem)
25983 {
25984 rtx addis_value;
25985 rtx addr;
25986 rtx load_offset;
25987 const char *load_str = NULL;
25988 machine_mode mode;
25989
25990 if (GET_CODE (mem) == ZERO_EXTEND)
25991 mem = XEXP (mem, 0);
25992
25993 gcc_assert (REG_P (target) && MEM_P (mem));
25994
25995 addr = XEXP (mem, 0);
25996 fusion_split_address (addr, &addis_value, &load_offset);
25997
25998 /* Now emit the load instruction to the same register. */
25999 mode = GET_MODE (mem);
26000 switch (mode)
26001 {
26002 case E_QImode:
26003 load_str = "lbz";
26004 break;
26005
26006 case E_HImode:
26007 load_str = "lhz";
26008 break;
26009
26010 case E_SImode:
26011 case E_SFmode:
26012 load_str = "lwz";
26013 break;
26014
26015 case E_DImode:
26016 case E_DFmode:
26017 gcc_assert (TARGET_POWERPC64);
26018 load_str = "ld";
26019 break;
26020
26021 default:
26022 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
26023 }
26024
26025 /* Emit the addis instruction. */
26026 emit_fusion_addis (target, addis_value);
26027
26028 /* Emit the D-form load instruction. */
26029 emit_fusion_load (target, target, load_offset, load_str);
26030
26031 return "";
26032 }
26033 \f
26034
26035 #ifdef RS6000_GLIBC_ATOMIC_FENV
26036 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26037 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
26038 #endif
26039
26040 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26041
26042 static void
26043 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26044 {
26045 if (!TARGET_HARD_FLOAT)
26046 {
26047 #ifdef RS6000_GLIBC_ATOMIC_FENV
26048 if (atomic_hold_decl == NULL_TREE)
26049 {
26050 atomic_hold_decl
26051 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26052 get_identifier ("__atomic_feholdexcept"),
26053 build_function_type_list (void_type_node,
26054 double_ptr_type_node,
26055 NULL_TREE));
26056 TREE_PUBLIC (atomic_hold_decl) = 1;
26057 DECL_EXTERNAL (atomic_hold_decl) = 1;
26058 }
26059
26060 if (atomic_clear_decl == NULL_TREE)
26061 {
26062 atomic_clear_decl
26063 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26064 get_identifier ("__atomic_feclearexcept"),
26065 build_function_type_list (void_type_node,
26066 NULL_TREE));
26067 TREE_PUBLIC (atomic_clear_decl) = 1;
26068 DECL_EXTERNAL (atomic_clear_decl) = 1;
26069 }
26070
26071 tree const_double = build_qualified_type (double_type_node,
26072 TYPE_QUAL_CONST);
26073 tree const_double_ptr = build_pointer_type (const_double);
26074 if (atomic_update_decl == NULL_TREE)
26075 {
26076 atomic_update_decl
26077 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26078 get_identifier ("__atomic_feupdateenv"),
26079 build_function_type_list (void_type_node,
26080 const_double_ptr,
26081 NULL_TREE));
26082 TREE_PUBLIC (atomic_update_decl) = 1;
26083 DECL_EXTERNAL (atomic_update_decl) = 1;
26084 }
26085
26086 tree fenv_var = create_tmp_var_raw (double_type_node);
26087 TREE_ADDRESSABLE (fenv_var) = 1;
26088 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26089 build4 (TARGET_EXPR, double_type_node, fenv_var,
26090 void_node, NULL_TREE, NULL_TREE));
26091
26092 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26093 *clear = build_call_expr (atomic_clear_decl, 0);
26094 *update = build_call_expr (atomic_update_decl, 1,
26095 fold_convert (const_double_ptr, fenv_addr));
26096 #endif
26097 return;
26098 }
26099
26100 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26101 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26102 tree call_mffs = build_call_expr (mffs, 0);
26103
26104 /* Generates the equivalent of feholdexcept (&fenv_var)
26105
26106 *fenv_var = __builtin_mffs ();
26107 double fenv_hold;
26108 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26109 __builtin_mtfsf (0xff, fenv_hold); */
26110
26111 /* Mask to clear everything except for the rounding modes and non-IEEE
26112 arithmetic flag. */
26113 const unsigned HOST_WIDE_INT hold_exception_mask
26114 = HOST_WIDE_INT_C (0xffffffff00000007);
26115
26116 tree fenv_var = create_tmp_var_raw (double_type_node);
26117
26118 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26119 NULL_TREE, NULL_TREE);
26120
26121 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26122 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26123 build_int_cst (uint64_type_node,
26124 hold_exception_mask));
26125
26126 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26127 fenv_llu_and);
26128
26129 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26130 build_int_cst (unsigned_type_node, 0xff),
26131 fenv_hold_mtfsf);
26132
26133 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26134
26135 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26136
26137 double fenv_clear = __builtin_mffs ();
26138 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26139 __builtin_mtfsf (0xff, fenv_clear); */
26140
26141 /* Mask to clear everything except for the rounding modes and non-IEEE
26142 arithmetic flag. */
26143 const unsigned HOST_WIDE_INT clear_exception_mask
26144 = HOST_WIDE_INT_C (0xffffffff00000000);
26145
26146 tree fenv_clear = create_tmp_var_raw (double_type_node);
26147
26148 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26149 call_mffs, NULL_TREE, NULL_TREE);
26150
26151 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26152 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26153 fenv_clean_llu,
26154 build_int_cst (uint64_type_node,
26155 clear_exception_mask));
26156
26157 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26158 fenv_clear_llu_and);
26159
26160 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26161 build_int_cst (unsigned_type_node, 0xff),
26162 fenv_clear_mtfsf);
26163
26164 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26165
26166 /* Generates the equivalent of feupdateenv (&fenv_var)
26167
26168 double old_fenv = __builtin_mffs ();
26169 double fenv_update;
26170 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26171 (*(uint64_t*)fenv_var 0x1ff80fff);
26172 __builtin_mtfsf (0xff, fenv_update); */
26173
26174 const unsigned HOST_WIDE_INT update_exception_mask
26175 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26176 const unsigned HOST_WIDE_INT new_exception_mask
26177 = HOST_WIDE_INT_C (0x1ff80fff);
26178
26179 tree old_fenv = create_tmp_var_raw (double_type_node);
26180 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26181 call_mffs, NULL_TREE, NULL_TREE);
26182
26183 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26184 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26185 build_int_cst (uint64_type_node,
26186 update_exception_mask));
26187
26188 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26189 build_int_cst (uint64_type_node,
26190 new_exception_mask));
26191
26192 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26193 old_llu_and, new_llu_and);
26194
26195 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26196 new_llu_mask);
26197
26198 tree update_mtfsf = build_call_expr (mtfsf, 2,
26199 build_int_cst (unsigned_type_node, 0xff),
26200 fenv_update_mtfsf);
26201
26202 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26203 }
26204
26205 void
26206 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26207 {
26208 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26209
26210 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26211 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26212
26213 /* The destination of the vmrgew instruction layout is:
26214 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26215 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26216 vmrgew instruction will be correct. */
26217 if (BYTES_BIG_ENDIAN)
26218 {
26219 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26220 GEN_INT (0)));
26221 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26222 GEN_INT (3)));
26223 }
26224 else
26225 {
26226 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26227 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26228 }
26229
26230 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26231 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26232
26233 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26234 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26235
26236 if (BYTES_BIG_ENDIAN)
26237 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26238 else
26239 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26240 }
26241
26242 void
26243 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26244 {
26245 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26246
26247 rtx_tmp0 = gen_reg_rtx (V2DImode);
26248 rtx_tmp1 = gen_reg_rtx (V2DImode);
26249
26250 /* The destination of the vmrgew instruction layout is:
26251 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26252 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26253 vmrgew instruction will be correct. */
26254 if (BYTES_BIG_ENDIAN)
26255 {
26256 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26257 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26258 }
26259 else
26260 {
26261 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26262 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26263 }
26264
26265 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26266 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26267
26268 if (signed_convert)
26269 {
26270 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26271 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26272 }
26273 else
26274 {
26275 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26276 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26277 }
26278
26279 if (BYTES_BIG_ENDIAN)
26280 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26281 else
26282 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26283 }
26284
26285 void
26286 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26287 rtx src2)
26288 {
26289 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26290
26291 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26292 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26293
26294 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26295 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26296
26297 rtx_tmp2 = gen_reg_rtx (V4SImode);
26298 rtx_tmp3 = gen_reg_rtx (V4SImode);
26299
26300 if (signed_convert)
26301 {
26302 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26303 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26304 }
26305 else
26306 {
26307 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26308 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26309 }
26310
26311 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26312 }
26313
26314 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26315
26316 static bool
26317 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26318 optimization_type opt_type)
26319 {
26320 switch (op)
26321 {
26322 case rsqrt_optab:
26323 return (opt_type == OPTIMIZE_FOR_SPEED
26324 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26325
26326 default:
26327 return true;
26328 }
26329 }
26330
26331 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26332
26333 static HOST_WIDE_INT
26334 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26335 {
26336 if (TREE_CODE (exp) == STRING_CST
26337 && (STRICT_ALIGNMENT || !optimize_size))
26338 return MAX (align, BITS_PER_WORD);
26339 return align;
26340 }
26341
26342 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26343
26344 static HOST_WIDE_INT
26345 rs6000_starting_frame_offset (void)
26346 {
26347 if (FRAME_GROWS_DOWNWARD)
26348 return 0;
26349 return RS6000_STARTING_FRAME_OFFSET;
26350 }
26351 \f
26352
26353 /* Create an alias for a mangled name where we have changed the mangling (in
26354 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26355 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26356
26357 #if TARGET_ELF && RS6000_WEAK
26358 static void
26359 rs6000_globalize_decl_name (FILE * stream, tree decl)
26360 {
26361 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26362
26363 targetm.asm_out.globalize_label (stream, name);
26364
26365 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26366 {
26367 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26368 const char *old_name;
26369
26370 ieee128_mangling_gcc_8_1 = true;
26371 lang_hooks.set_decl_assembler_name (decl);
26372 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26373 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26374 ieee128_mangling_gcc_8_1 = false;
26375
26376 if (strcmp (name, old_name) != 0)
26377 {
26378 fprintf (stream, "\t.weak %s\n", old_name);
26379 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26380 }
26381 }
26382 }
26383 #endif
26384
26385 \f
26386 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26387 function names from <foo>l to <foo>f128 if the default long double type is
26388 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26389 include file switches the names on systems that support long double as IEEE
26390 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26391 In the future, glibc will export names like __ieee128_sinf128 and we can
26392 switch to using those instead of using sinf128, which pollutes the user's
26393 namespace.
26394
26395 This will switch the names for Fortran math functions as well (which doesn't
26396 use math.h). However, Fortran needs other changes to the compiler and
26397 library before you can switch the real*16 type at compile time.
26398
26399 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26400 only do this if the default is that long double is IBM extended double, and
26401 the user asked for IEEE 128-bit. */
26402
26403 static tree
26404 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26405 {
26406 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26407 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26408 {
26409 size_t len = IDENTIFIER_LENGTH (id);
26410 const char *name = IDENTIFIER_POINTER (id);
26411
26412 if (name[len - 1] == 'l')
26413 {
26414 bool uses_ieee128_p = false;
26415 tree type = TREE_TYPE (decl);
26416 machine_mode ret_mode = TYPE_MODE (type);
26417
26418 /* See if the function returns a IEEE 128-bit floating point type or
26419 complex type. */
26420 if (ret_mode == TFmode || ret_mode == TCmode)
26421 uses_ieee128_p = true;
26422 else
26423 {
26424 function_args_iterator args_iter;
26425 tree arg;
26426
26427 /* See if the function passes a IEEE 128-bit floating point type
26428 or complex type. */
26429 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26430 {
26431 machine_mode arg_mode = TYPE_MODE (arg);
26432 if (arg_mode == TFmode || arg_mode == TCmode)
26433 {
26434 uses_ieee128_p = true;
26435 break;
26436 }
26437 }
26438 }
26439
26440 /* If we passed or returned an IEEE 128-bit floating point type,
26441 change the name. */
26442 if (uses_ieee128_p)
26443 {
26444 char *name2 = (char *) alloca (len + 4);
26445 memcpy (name2, name, len - 1);
26446 strcpy (name2 + len - 1, "f128");
26447 id = get_identifier (name2);
26448 }
26449 }
26450 }
26451
26452 return id;
26453 }
26454
26455 /* Predict whether the given loop in gimple will be transformed in the RTL
26456 doloop_optimize pass. */
26457
26458 static bool
26459 rs6000_predict_doloop_p (struct loop *loop)
26460 {
26461 gcc_assert (loop);
26462
26463 /* On rs6000, targetm.can_use_doloop_p is actually
26464 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26465 if (loop->inner != NULL)
26466 {
26467 if (dump_file && (dump_flags & TDF_DETAILS))
26468 fprintf (dump_file, "Predict doloop failure due to"
26469 " loop nesting.\n");
26470 return false;
26471 }
26472
26473 return true;
26474 }
26475
26476 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
26477
26478 static bool
26479 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
26480 {
26481 gcc_assert (MEM_P (mem));
26482
26483 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
26484 type addresses, so don't allow MEMs with those address types to be
26485 substituted as an equivalent expression. See PR93974 for details. */
26486 if (GET_CODE (XEXP (mem, 0)) == AND)
26487 return true;
26488
26489 return false;
26490 }
26491
26492 struct gcc_target targetm = TARGET_INITIALIZER;
26493
26494 #include "gt-rs6000.h"